]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[YoutubeDL] format spec: correctly handle dashes and other unused operators
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
8222d8de 11import io
b82f815f 12import itertools
8694c600 13import json
62fec3b2 14import locale
083c9df9 15import operator
8222d8de 16import os
dca08720 17import platform
8222d8de
JMF
18import re
19import shutil
dca08720 20import subprocess
8222d8de
JMF
21import socket
22import sys
23import time
67134eab 24import tokenize
8222d8de
JMF
25import traceback
26
1e5b9a95
PH
27if os.name == 'nt':
28 import ctypes
29
8c25f81b 30from .compat import (
8f9312c3 31 compat_basestring,
dca08720 32 compat_cookiejar,
4644ac55 33 compat_expanduser,
003c69a8 34 compat_get_terminal_size,
ce02ed60 35 compat_http_client,
4f026faf 36 compat_kwargs,
ce02ed60 37 compat_str,
67134eab 38 compat_tokenize_tokenize,
ce02ed60
PH
39 compat_urllib_error,
40 compat_urllib_request,
8c25f81b
PH
41)
42from .utils import (
d05cfe06 43 escape_url,
ce02ed60
PH
44 ContentTooShortError,
45 date_from_str,
46 DateRange,
acd69589 47 DEFAULT_OUTTMPL,
ce02ed60
PH
48 determine_ext,
49 DownloadError,
50 encodeFilename,
51 ExtractorError,
02dbf93f 52 format_bytes,
525ef922 53 formatSeconds,
931bc3c3 54 HEADRequest,
ce02ed60 55 locked_file,
dca08720 56 make_HTTPS_handler,
ce02ed60 57 MaxDownloadsReached,
b7ab0590 58 PagedList,
083c9df9 59 parse_filesize,
91410c9b 60 PerRequestProxyHandler,
ce02ed60 61 PostProcessingError,
dca08720 62 platform_name,
ce02ed60 63 preferredencoding,
cfb56d1a 64 render_table,
ce02ed60
PH
65 SameFileError,
66 sanitize_filename,
1bb5c511 67 sanitize_path,
e5660ee6 68 std_headers,
ce02ed60 69 subtitles_filename,
ce02ed60 70 UnavailableVideoError,
29eb5174 71 url_basename,
58b1f00d 72 version_tuple,
ce02ed60
PH
73 write_json_file,
74 write_string,
dca08720 75 YoutubeDLHandler,
6350728b 76 prepend_extension,
b29e0000 77 replace_extension,
7d4111ed 78 args_to_str,
05900629 79 age_restricted,
ce02ed60 80)
a0e07d31 81from .cache import Cache
023fa8c4 82from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 83from .downloader import get_suitable_downloader
4c83c967 84from .downloader.rtmp import rtmpdump_version
4f026faf 85from .postprocessor import (
62cd676c 86 FFmpegFixupM4aPP,
6271f1ca 87 FFmpegFixupStretchedPP,
4f026faf
PH
88 FFmpegMergerPP,
89 FFmpegPostProcessor,
90 get_postprocessor,
91)
dca08720 92from .version import __version__
8222d8de
JMF
93
94
95class YoutubeDL(object):
96 """YoutubeDL class.
97
98 YoutubeDL objects are the ones responsible of downloading the
99 actual video file and writing it to disk if the user has requested
100 it, among some other tasks. In most cases there should be one per
101 program. As, given a video URL, the downloader doesn't know how to
102 extract all the needed information, task that InfoExtractors do, it
103 has to pass the URL to one of them.
104
105 For this, YoutubeDL objects have a method that allows
106 InfoExtractors to be registered in a given order. When it is passed
107 a URL, the YoutubeDL object handles it to the first InfoExtractor it
108 finds that reports being able to handle it. The InfoExtractor extracts
109 all the information about the video or videos the URL refers to, and
110 YoutubeDL process the extracted information, possibly using a File
111 Downloader to download the video.
112
113 YoutubeDL objects accept a lot of parameters. In order not to saturate
114 the object constructor with arguments, it receives a dictionary of
115 options instead. These options are available through the params
116 attribute for the InfoExtractors to use. The YoutubeDL also
117 registers itself as the downloader in charge for the InfoExtractors
118 that are added to it, so this is a "mutual registration".
119
120 Available options:
121
122 username: Username for authentication purposes.
123 password: Password for authentication purposes.
180940e0 124 videopassword: Password for accessing a video.
8222d8de
JMF
125 usenetrc: Use netrc for authentication instead.
126 verbose: Print additional info to stdout.
127 quiet: Do not print messages to stdout.
ad8915b7 128 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
129 forceurl: Force printing final URL.
130 forcetitle: Force printing title.
131 forceid: Force printing ID.
132 forcethumbnail: Force printing thumbnail URL.
133 forcedescription: Force printing description.
134 forcefilename: Force printing final filename.
525ef922 135 forceduration: Force printing duration.
8694c600 136 forcejson: Force printing info_dict as JSON.
63e0be34
PH
137 dump_single_json: Force printing the info_dict of the whole playlist
138 (or video) as a single JSON line.
8222d8de 139 simulate: Do not download the video files.
d8600787 140 format: Video format code. See options.py for more information.
8222d8de
JMF
141 outtmpl: Template for output names.
142 restrictfilenames: Do not allow "&" and spaces in file names
143 ignoreerrors: Do not stop on download errors.
d22dec74 144 force_generic_extractor: Force downloader to use the generic extractor
8222d8de
JMF
145 nooverwrites: Prevent overwriting files.
146 playliststart: Playlist item to start at.
147 playlistend: Playlist item to end at.
c14e88f0 148 playlist_items: Specific indices of playlist to download.
ff815fe6 149 playlistreverse: Download playlist items in reverse order.
8222d8de
JMF
150 matchtitle: Download only matching titles.
151 rejecttitle: Reject downloads for matching titles.
8bf9319e 152 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
153 logtostderr: Log messages to stderr instead of stdout.
154 writedescription: Write the video description to a .description file
155 writeinfojson: Write the video description to a .info.json file
1fb07d10 156 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 157 writethumbnail: Write the thumbnail image to a file
ec82d85a 158 write_all_thumbnails: Write all thumbnail formats to files
8222d8de 159 writesubtitles: Write the video subtitles to a file
b004821f 160 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 161 allsubtitles: Downloads all the subtitles of the video
0b7f3118 162 (requires writesubtitles or writeautomaticsub)
8222d8de 163 listsubtitles: Lists all available subtitles for the video
a504ced0 164 subtitlesformat: The format code for subtitles
aa6a10c4 165 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
166 keepvideo: Keep the video file after post-processing
167 daterange: A DateRange object, download only if the upload_date is in the range.
168 skip_download: Skip the actual download of the video file
c35f9e72 169 cachedir: Location of the cache files in the filesystem.
a0e07d31 170 False to disable filesystem cache.
47192f92 171 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
172 age_limit: An integer representing the user's age in years.
173 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
174 min_views: An integer representing the minimum view count the video
175 must have in order to not be skipped.
176 Videos without view count information are always
177 downloaded. None for no limit.
178 max_views: An integer representing the maximum view count.
179 Videos that are more popular than that are not
180 downloaded.
181 Videos without view count information are always
182 downloaded. None for no limit.
183 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
184 Videos already present in the file are not downloaded
185 again.
dca08720 186 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 187 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
188 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
189 At the moment, this is only supported by YouTube.
a1ee09e8 190 proxy: URL of the proxy server to use
91410c9b
PH
191 cn_verification_proxy: URL of the proxy to use for IP address verification
192 on Chinese sites. (Experimental)
e344693b 193 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
194 bidi_workaround: Work around buggy terminals without bidirectional text
195 support, using fridibi
a0ddb8a2 196 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 197 include_ads: Download ads as well
04b4d394
PH
198 default_search: Prepend this string if an input url is not valid.
199 'auto' for elaborate guessing
62fec3b2 200 encoding: Use this encoding instead of the system-specified.
e8ee972c 201 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
202 Pass in 'in_playlist' to only show this behavior for
203 playlist items.
4f026faf 204 postprocessors: A list of dictionaries, each with an entry
71b640cc
PH
205 * key: The name of the postprocessor. See
206 youtube_dl/postprocessor/__init__.py for a list.
4f026faf
PH
207 as well as any further keyword arguments for the
208 postprocessor.
71b640cc
PH
209 progress_hooks: A list of functions that get called on download
210 progress, with a dictionary with the entries
5cda4eda 211 * status: One of "downloading", "error", or "finished".
ee69b99a 212 Check this first and ignore unknown values.
71b640cc 213
5cda4eda 214 If status is one of "downloading", or "finished", the
ee69b99a
PH
215 following properties may also be present:
216 * filename: The final filename (always present)
5cda4eda 217 * tmpfilename: The filename we're currently writing to
71b640cc
PH
218 * downloaded_bytes: Bytes on disk
219 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
220 * total_bytes_estimate: Guess of the eventual file size,
221 None if unavailable.
222 * elapsed: The number of seconds since download started.
71b640cc
PH
223 * eta: The estimated time in seconds, None if unknown
224 * speed: The download speed in bytes/second, None if
225 unknown
5cda4eda
PH
226 * fragment_index: The counter of the currently
227 downloaded video fragment.
228 * fragment_count: The number of fragments (= individual
229 files that will be merged)
71b640cc
PH
230
231 Progress hooks are guaranteed to be called at least once
232 (with status "finished") if the download is successful.
45598f15 233 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
234 fixup: Automatically correct known faults of the file.
235 One of:
236 - "never": do nothing
237 - "warn": only emit a warning
238 - "detect_or_warn": check whether we can do anything
62cd676c 239 about it, warn otherwise (default)
be4a824d 240 source_address: (Experimental) Client-side IP address to bind to.
8bfa7545
PH
241 call_home: Boolean, true iff we are allowed to contact the
242 youtube-dl servers for debugging.
5f0d813d 243 sleep_interval: Number of seconds to sleep before each download.
cfb56d1a
PH
244 listformats: Print an overview of available video formats and exit.
245 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
246 match_filter: A function that gets called with the info_dict of
247 every video.
248 If it returns a message, the video is ignored.
249 If it returns None, the video is downloaded.
250 match_filter_func in utils.py is one example for this.
7e5db8c9 251 no_color: Do not emit color codes in output.
71b640cc 252
85729c51
PH
253 The following options determine which downloader is picked:
254 external_downloader: Executable of the external downloader to call.
255 None or unset for standard (built-in) downloader.
256 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
fe7e0c98 257
8222d8de 258 The following parameters are not used by YoutubeDL itself, they are used by
c75f0b36 259 the downloader (see youtube_dl/downloader/common.py):
8222d8de 260 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 261 noresizebuffer, retries, continuedl, noprogress, consoletitle,
c75f0b36 262 xattr_set_filesize, external_downloader_args.
76b1bd67
JMF
263
264 The following options are used by the post processors:
265 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
266 otherwise prefer avconv.
f72b0a60
S
267 postprocessor_args: A list of additional command-line arguments for the
268 postprocessor.
8222d8de
JMF
269 """
270
271 params = None
272 _ies = []
273 _pps = []
274 _download_retcode = None
275 _num_downloads = None
276 _screen_file = None
277
3511266b 278 def __init__(self, params=None, auto_init=True):
8222d8de 279 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
280 if params is None:
281 params = {}
8222d8de 282 self._ies = []
56c73665 283 self._ies_instances = {}
8222d8de 284 self._pps = []
933605d7 285 self._progress_hooks = []
8222d8de
JMF
286 self._download_retcode = 0
287 self._num_downloads = 0
288 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 289 self._err_file = sys.stderr
e9f9a10f 290 self.params = params
a0e07d31 291 self.cache = Cache(self)
34308b30 292
0783b09b 293 if params.get('bidi_workaround', False):
1c088fa8
PH
294 try:
295 import pty
296 master, slave = pty.openpty()
003c69a8 297 width = compat_get_terminal_size().columns
1c088fa8
PH
298 if width is None:
299 width_args = []
300 else:
301 width_args = ['-w', str(width)]
5d681e96 302 sp_kwargs = dict(
1c088fa8
PH
303 stdin=subprocess.PIPE,
304 stdout=slave,
305 stderr=self._err_file)
5d681e96
PH
306 try:
307 self._output_process = subprocess.Popen(
308 ['bidiv'] + width_args, **sp_kwargs
309 )
310 except OSError:
5d681e96
PH
311 self._output_process = subprocess.Popen(
312 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
313 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
314 except OSError as ose:
315 if ose.errno == 2:
6febd1c1 316 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
317 else:
318 raise
0783b09b 319
34308b30 320 if (sys.version_info >= (3,) and sys.platform != 'win32' and
8fb3ac36
PH
321 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
322 not params.get('restrictfilenames', False)):
34308b30
PH
323 # On Python 3, the Unicode filesystem API will throw errors (#1474)
324 self.report_warning(
6febd1c1 325 'Assuming --restrict-filenames since file system encoding '
1b725173 326 'cannot encode all characters. '
6febd1c1 327 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 328 self.params['restrictfilenames'] = True
34308b30 329
486dd09e
PH
330 if isinstance(params.get('outtmpl'), bytes):
331 self.report_warning(
332 'Parameter outtmpl is bytes, but should be a unicode string. '
333 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
334
dca08720
PH
335 self._setup_opener()
336
3511266b
PH
337 if auto_init:
338 self.print_debug_header()
339 self.add_default_info_extractors()
340
4f026faf
PH
341 for pp_def_raw in self.params.get('postprocessors', []):
342 pp_class = get_postprocessor(pp_def_raw['key'])
343 pp_def = dict(pp_def_raw)
344 del pp_def['key']
345 pp = pp_class(self, **compat_kwargs(pp_def))
346 self.add_post_processor(pp)
347
71b640cc
PH
348 for ph in self.params.get('progress_hooks', []):
349 self.add_progress_hook(ph)
350
7d4111ed
PH
351 def warn_if_short_id(self, argv):
352 # short YouTube ID starting with dash?
353 idxs = [
354 i for i, a in enumerate(argv)
355 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
356 if idxs:
357 correct_argv = (
358 ['youtube-dl'] +
359 [a for i, a in enumerate(argv) if i not in idxs] +
360 ['--'] + [argv[i] for i in idxs]
361 )
362 self.report_warning(
363 'Long argument string detected. '
364 'Use -- to separate parameters and URLs, like this:\n%s\n' %
365 args_to_str(correct_argv))
366
8222d8de
JMF
367 def add_info_extractor(self, ie):
368 """Add an InfoExtractor object to the end of the list."""
369 self._ies.append(ie)
56c73665 370 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
371 ie.set_downloader(self)
372
56c73665
JMF
373 def get_info_extractor(self, ie_key):
374 """
375 Get an instance of an IE with name ie_key, it will try to get one from
376 the _ies list, if there's no instance it will create a new one and add
377 it to the extractor list.
378 """
379 ie = self._ies_instances.get(ie_key)
380 if ie is None:
381 ie = get_info_extractor(ie_key)()
382 self.add_info_extractor(ie)
383 return ie
384
023fa8c4
JMF
385 def add_default_info_extractors(self):
386 """
387 Add the InfoExtractors returned by gen_extractors to the end of the list
388 """
389 for ie in gen_extractors():
390 self.add_info_extractor(ie)
391
8222d8de
JMF
392 def add_post_processor(self, pp):
393 """Add a PostProcessor object to the end of the chain."""
394 self._pps.append(pp)
395 pp.set_downloader(self)
396
933605d7
JMF
397 def add_progress_hook(self, ph):
398 """Add the progress hook (currently only for the file downloader)"""
399 self._progress_hooks.append(ph)
8ab470f1 400
1c088fa8 401 def _bidi_workaround(self, message):
5d681e96 402 if not hasattr(self, '_output_channel'):
1c088fa8
PH
403 return message
404
5d681e96 405 assert hasattr(self, '_output_process')
11b85ce6 406 assert isinstance(message, compat_str)
6febd1c1
PH
407 line_count = message.count('\n') + 1
408 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 409 self._output_process.stdin.flush()
6febd1c1 410 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 411 for _ in range(line_count))
6febd1c1 412 return res[:-len('\n')]
1c088fa8 413
8222d8de 414 def to_screen(self, message, skip_eol=False):
0783b09b
PH
415 """Print message to stdout if not in quiet mode."""
416 return self.to_stdout(message, skip_eol, check_quiet=True)
417
734f90bb 418 def _write_string(self, s, out=None):
b58ddb32 419 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 420
0783b09b 421 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 422 """Print message to stdout if not in quiet mode."""
8bf9319e 423 if self.params.get('logger'):
43afe285 424 self.params['logger'].debug(message)
0783b09b 425 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 426 message = self._bidi_workaround(message)
6febd1c1 427 terminator = ['\n', ''][skip_eol]
8222d8de 428 output = message + terminator
1c088fa8 429
734f90bb 430 self._write_string(output, self._screen_file)
8222d8de
JMF
431
432 def to_stderr(self, message):
433 """Print message to stderr."""
11b85ce6 434 assert isinstance(message, compat_str)
8bf9319e 435 if self.params.get('logger'):
43afe285
IB
436 self.params['logger'].error(message)
437 else:
1c088fa8 438 message = self._bidi_workaround(message)
6febd1c1 439 output = message + '\n'
734f90bb 440 self._write_string(output, self._err_file)
8222d8de 441
1e5b9a95
PH
442 def to_console_title(self, message):
443 if not self.params.get('consoletitle', False):
444 return
445 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
446 # c_wchar_p() might not be necessary if `message` is
447 # already of type unicode()
448 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
449 elif 'TERM' in os.environ:
734f90bb 450 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 451
bdde425c
PH
452 def save_console_title(self):
453 if not self.params.get('consoletitle', False):
454 return
455 if 'TERM' in os.environ:
efd6c574 456 # Save the title on stack
734f90bb 457 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
458
459 def restore_console_title(self):
460 if not self.params.get('consoletitle', False):
461 return
462 if 'TERM' in os.environ:
efd6c574 463 # Restore the title from stack
734f90bb 464 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
465
466 def __enter__(self):
467 self.save_console_title()
468 return self
469
470 def __exit__(self, *args):
471 self.restore_console_title()
f89197d7 472
dca08720
PH
473 if self.params.get('cookiefile') is not None:
474 self.cookiejar.save()
bdde425c 475
8222d8de
JMF
476 def trouble(self, message=None, tb=None):
477 """Determine action to take when a download problem appears.
478
479 Depending on if the downloader has been configured to ignore
480 download errors or not, this method may throw an exception or
481 not when errors are found, after printing the message.
482
483 tb, if given, is additional traceback information.
484 """
485 if message is not None:
486 self.to_stderr(message)
487 if self.params.get('verbose'):
488 if tb is None:
489 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 490 tb = ''
8222d8de 491 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 492 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
493 tb += compat_str(traceback.format_exc())
494 else:
495 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 496 tb = ''.join(tb_data)
8222d8de
JMF
497 self.to_stderr(tb)
498 if not self.params.get('ignoreerrors', False):
499 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
500 exc_info = sys.exc_info()[1].exc_info
501 else:
502 exc_info = sys.exc_info()
503 raise DownloadError(message, exc_info)
504 self._download_retcode = 1
505
506 def report_warning(self, message):
507 '''
508 Print the message to stderr, it will be prefixed with 'WARNING:'
509 If stderr is a tty file the 'WARNING:' will be colored
510 '''
6d07ce01
JMF
511 if self.params.get('logger') is not None:
512 self.params['logger'].warning(message)
8222d8de 513 else:
ad8915b7
PH
514 if self.params.get('no_warnings'):
515 return
7e5db8c9 516 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
6d07ce01
JMF
517 _msg_header = '\033[0;33mWARNING:\033[0m'
518 else:
519 _msg_header = 'WARNING:'
520 warning_message = '%s %s' % (_msg_header, message)
521 self.to_stderr(warning_message)
8222d8de
JMF
522
523 def report_error(self, message, tb=None):
524 '''
525 Do the same as trouble, but prefixes the message with 'ERROR:', colored
526 in red if stderr is a tty file.
527 '''
7e5db8c9 528 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
6febd1c1 529 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 530 else:
6febd1c1
PH
531 _msg_header = 'ERROR:'
532 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
533 self.trouble(error_message, tb)
534
8222d8de
JMF
535 def report_file_already_downloaded(self, file_name):
536 """Report file has already been fully downloaded."""
537 try:
6febd1c1 538 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 539 except UnicodeEncodeError:
6febd1c1 540 self.to_screen('[download] The file has already been downloaded')
8222d8de 541
8222d8de
JMF
542 def prepare_filename(self, info_dict):
543 """Generate the output filename."""
544 try:
545 template_dict = dict(info_dict)
546
547 template_dict['epoch'] = int(time.time())
548 autonumber_size = self.params.get('autonumber_size')
549 if autonumber_size is None:
550 autonumber_size = 5
6febd1c1 551 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 552 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 553 if template_dict.get('playlist_index') is not None:
c6b4132a 554 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
555 if template_dict.get('resolution') is None:
556 if template_dict.get('width') and template_dict.get('height'):
557 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
558 elif template_dict.get('height'):
805ef3c6 559 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 560 elif template_dict.get('width'):
805ef3c6 561 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 562
586a91b6 563 sanitize = lambda k, v: sanitize_filename(
45598aab 564 compat_str(v),
1bb5c511 565 restricted=self.params.get('restrictfilenames'),
6febd1c1 566 is_id=(k == 'id'))
586a91b6 567 template_dict = dict((k, sanitize(k, v))
45598aab
PH
568 for k, v in template_dict.items()
569 if v is not None)
6febd1c1 570 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 571
1bb5c511 572 outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
4644ac55 573 tmpl = compat_expanduser(outtmpl)
586a91b6 574 filename = tmpl % template_dict
3a0d2f52
S
575 # Temporary fix for #4787
576 # 'Treat' all problem characters by passing filename through preferredencoding
577 # to workaround encoding issues with subprocess on python2 @ Windows
578 if sys.version_info < (3, 0) and sys.platform == 'win32':
579 filename = encodeFilename(filename, True).decode(preferredencoding())
8222d8de 580 return filename
8222d8de 581 except ValueError as err:
6febd1c1 582 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
583 return None
584
442c37b7 585 def _match_entry(self, info_dict, incomplete):
8222d8de
JMF
586 """ Returns None iff the file should be downloaded """
587
6febd1c1 588 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
589 if 'title' in info_dict:
590 # This can happen when we're just evaluating the playlist
591 title = info_dict['title']
592 matchtitle = self.params.get('matchtitle', False)
593 if matchtitle:
594 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 595 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
596 rejecttitle = self.params.get('rejecttitle', False)
597 if rejecttitle:
598 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 599 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
600 date = info_dict.get('upload_date', None)
601 if date is not None:
602 dateRange = self.params.get('daterange', DateRange())
603 if date not in dateRange:
6febd1c1 604 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
605 view_count = info_dict.get('view_count', None)
606 if view_count is not None:
607 min_views = self.params.get('min_views')
608 if min_views is not None and view_count < min_views:
6febd1c1 609 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
610 max_views = self.params.get('max_views')
611 if max_views is not None and view_count > max_views:
6febd1c1 612 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629 613 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
347de493 614 return 'Skipping "%s" because it is age restricted' % video_title
c1c9a79c 615 if self.in_download_archive(info_dict):
6febd1c1 616 return '%s has already been recorded in archive' % video_title
347de493 617
442c37b7
PH
618 if not incomplete:
619 match_filter = self.params.get('match_filter')
620 if match_filter is not None:
621 ret = match_filter(info_dict)
622 if ret is not None:
623 return ret
347de493 624
8222d8de 625 return None
fe7e0c98 626
b6c45014
JMF
627 @staticmethod
628 def add_extra_info(info_dict, extra_info):
629 '''Set the keys from extra_info in info dict if they are missing'''
630 for key, value in extra_info.items():
631 info_dict.setdefault(key, value)
632
7fc3fa05 633 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 634 process=True, force_generic_extractor=False):
8222d8de
JMF
635 '''
636 Returns a list with a dictionary for each video we find.
637 If 'download', also downloads the videos.
638 extra_info is a dict containing the extra values to add to each result
613b2d9d 639 '''
fe7e0c98 640
61aa5ba3 641 if not ie_key and force_generic_extractor:
d22dec74
S
642 ie_key = 'Generic'
643
8222d8de 644 if ie_key:
56c73665 645 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
646 else:
647 ies = self._ies
648
649 for ie in ies:
650 if not ie.suitable(url):
651 continue
652
653 if not ie.working():
6febd1c1
PH
654 self.report_warning('The program functionality for this site has been marked as broken, '
655 'and will probably not work.')
8222d8de
JMF
656
657 try:
658 ie_result = ie.extract(url)
5f6a1245 659 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
660 break
661 if isinstance(ie_result, list):
662 # Backwards compatibility: old IE result format
8222d8de
JMF
663 ie_result = {
664 '_type': 'compat_list',
665 'entries': ie_result,
666 }
ea38e55f 667 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
668 if process:
669 return self.process_ie_result(ie_result, download, extra_info)
670 else:
671 return ie_result
5f6a1245 672 except ExtractorError as de: # An error we somewhat expected
8222d8de
JMF
673 self.report_error(compat_str(de), de.format_traceback())
674 break
d3e5bbf4
PH
675 except MaxDownloadsReached:
676 raise
8222d8de
JMF
677 except Exception as e:
678 if self.params.get('ignoreerrors', False):
679 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
680 break
681 else:
682 raise
683 else:
1a489545 684 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 685
ea38e55f
PH
686 def add_default_extra_info(self, ie_result, ie, url):
687 self.add_extra_info(ie_result, {
688 'extractor': ie.IE_NAME,
689 'webpage_url': url,
690 'webpage_url_basename': url_basename(url),
691 'extractor_key': ie.ie_key(),
692 })
693
8222d8de
JMF
694 def process_ie_result(self, ie_result, download=True, extra_info={}):
695 """
696 Take the result of the ie(may be modified) and resolve all unresolved
697 references (URLs, playlist items).
698
699 It will also download the videos if 'download'.
700 Returns the resolved ie_result.
701 """
702
e8ee972c
PH
703 result_type = ie_result.get('_type', 'video')
704
057a5206
PH
705 if result_type in ('url', 'url_transparent'):
706 extract_flat = self.params.get('extract_flat', False)
707 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
708 extract_flat is True):
057a5206
PH
709 if self.params.get('forcejson', False):
710 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
711 return ie_result
712
8222d8de 713 if result_type == 'video':
b6c45014 714 self.add_extra_info(ie_result, extra_info)
feee2ecf 715 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
716 elif result_type == 'url':
717 # We have to add extra_info to the results because it may be
718 # contained in a playlist
719 return self.extract_info(ie_result['url'],
720 download,
721 ie_key=ie_result.get('ie_key'),
722 extra_info=extra_info)
7fc3fa05
PH
723 elif result_type == 'url_transparent':
724 # Use the information from the embedding page
725 info = self.extract_info(
726 ie_result['url'], ie_key=ie_result.get('ie_key'),
727 extra_info=extra_info, download=False, process=False)
728
412c617d
PH
729 force_properties = dict(
730 (k, v) for k, v in ie_result.items() if v is not None)
731 for f in ('_type', 'url'):
732 if f in force_properties:
733 del force_properties[f]
734 new_result = info.copy()
735 new_result.update(force_properties)
7fc3fa05
PH
736
737 assert new_result.get('_type') != 'url_transparent'
7fc3fa05
PH
738
739 return self.process_ie_result(
740 new_result, download=download, extra_info=extra_info)
42e12102 741 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de
JMF
742 # We process each entry in the playlist
743 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 744 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
745
746 playlist_results = []
747
8222d8de 748 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
749 playlistend = self.params.get('playlistend', None)
750 # For backwards compatibility, interpret -1 as whole list
8222d8de 751 if playlistend == -1:
a19fd00c 752 playlistend = None
8222d8de 753
c14e88f0
PH
754 playlistitems_str = self.params.get('playlist_items', None)
755 playlistitems = None
756 if playlistitems_str is not None:
757 def iter_playlistitems(format):
758 for string_segment in format.split(','):
759 if '-' in string_segment:
760 start, end = string_segment.split('-')
761 for item in range(int(start), int(end) + 1):
762 yield int(item)
763 else:
764 yield int(string_segment)
765 playlistitems = iter_playlistitems(playlistitems_str)
766
b82f815f
PH
767 ie_entries = ie_result['entries']
768 if isinstance(ie_entries, list):
769 n_all_entries = len(ie_entries)
c14e88f0 770 if playlistitems:
3884dcf3
JMF
771 entries = [
772 ie_entries[i - 1] for i in playlistitems
773 if -n_all_entries <= i - 1 < n_all_entries]
c14e88f0
PH
774 else:
775 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
776 n_entries = len(entries)
777 self.to_screen(
778 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
779 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 780 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
781 if playlistitems:
782 entries = []
783 for item in playlistitems:
784 entries.extend(ie_entries.getslice(
785 item - 1, item
786 ))
787 else:
788 entries = ie_entries.getslice(
789 playliststart, playlistend)
b7ab0590
PH
790 n_entries = len(entries)
791 self.to_screen(
792 "[%s] playlist %s: Downloading %d videos" %
793 (ie_result['extractor'], playlist, n_entries))
b82f815f 794 else: # iterable
c14e88f0
PH
795 if playlistitems:
796 entry_list = list(ie_entries)
797 entries = [entry_list[i - 1] for i in playlistitems]
798 else:
799 entries = list(itertools.islice(
800 ie_entries, playliststart, playlistend))
b82f815f
PH
801 n_entries = len(entries)
802 self.to_screen(
803 "[%s] playlist %s: Downloading %d videos" %
804 (ie_result['extractor'], playlist, n_entries))
8222d8de 805
ff815fe6
MS
806 if self.params.get('playlistreverse', False):
807 entries = entries[::-1]
808
fe7e0c98 809 for i, entry in enumerate(entries, 1):
734ea11e 810 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
8222d8de 811 extra = {
c6b4132a 812 'n_entries': n_entries,
fe7e0c98 813 'playlist': playlist,
a1cf99d0
PH
814 'playlist_id': ie_result.get('id'),
815 'playlist_title': ie_result.get('title'),
fe7e0c98 816 'playlist_index': i + playliststart,
b6c45014 817 'extractor': ie_result['extractor'],
9103bbc5 818 'webpage_url': ie_result['webpage_url'],
29eb5174 819 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 820 'extractor_key': ie_result['extractor_key'],
fe7e0c98 821 }
7012b23c 822
442c37b7 823 reason = self._match_entry(entry, incomplete=True)
7012b23c 824 if reason is not None:
6febd1c1 825 self.to_screen('[download] ' + reason)
7012b23c
PH
826 continue
827
8222d8de
JMF
828 entry_result = self.process_ie_result(entry,
829 download=download,
830 extra_info=extra)
831 playlist_results.append(entry_result)
832 ie_result['entries'] = playlist_results
833 return ie_result
834 elif result_type == 'compat_list':
c9bf4114
PH
835 self.report_warning(
836 'Extractor %s returned a compat_list result. '
837 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 838
8222d8de 839 def _fixup(r):
9e1a5b84
JW
840 self.add_extra_info(
841 r,
9103bbc5
JMF
842 {
843 'extractor': ie_result['extractor'],
844 'webpage_url': ie_result['webpage_url'],
29eb5174 845 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 846 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
847 }
848 )
8222d8de
JMF
849 return r
850 ie_result['entries'] = [
b6c45014 851 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
852 for r in ie_result['entries']
853 ]
854 return ie_result
855 else:
856 raise Exception('Invalid result type: %s' % result_type)
857
67134eab
JMF
858 def _build_format_filter(self, filter_spec):
859 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
860
861 OPERATORS = {
862 '<': operator.lt,
863 '<=': operator.le,
864 '>': operator.gt,
865 '>=': operator.ge,
866 '=': operator.eq,
867 '!=': operator.ne,
868 }
67134eab 869 operator_rex = re.compile(r'''(?x)\s*
2ec19e95 870 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
083c9df9
PH
871 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
872 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 873 $
083c9df9 874 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 875 m = operator_rex.search(filter_spec)
9ddb6925
S
876 if m:
877 try:
878 comparison_value = int(m.group('value'))
879 except ValueError:
880 comparison_value = parse_filesize(m.group('value'))
881 if comparison_value is None:
882 comparison_value = parse_filesize(m.group('value') + 'B')
883 if comparison_value is None:
884 raise ValueError(
885 'Invalid value %r in format specification %r' % (
67134eab 886 m.group('value'), filter_spec))
9ddb6925
S
887 op = OPERATORS[m.group('op')]
888
083c9df9 889 if not m:
9ddb6925
S
890 STR_OPERATORS = {
891 '=': operator.eq,
892 '!=': operator.ne,
893 }
67134eab 894 str_operator_rex = re.compile(r'''(?x)
9ddb6925
S
895 \s*(?P<key>ext|acodec|vcodec|container|protocol)
896 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
897 \s*(?P<value>[a-zA-Z0-9_-]+)
67134eab 898 \s*$
9ddb6925 899 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 900 m = str_operator_rex.search(filter_spec)
9ddb6925
S
901 if m:
902 comparison_value = m.group('value')
903 op = STR_OPERATORS[m.group('op')]
083c9df9 904
9ddb6925 905 if not m:
67134eab 906 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
907
908 def _filter(f):
909 actual_value = f.get(m.group('key'))
910 if actual_value is None:
911 return m.group('none_inclusive')
912 return op(actual_value, comparison_value)
67134eab
JMF
913 return _filter
914
915 def build_format_selector(self, format_spec):
916 def syntax_error(note, start):
917 message = (
918 'Invalid format specification: '
919 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
920 return SyntaxError(message)
921
922 PICKFIRST = 'PICKFIRST'
923 MERGE = 'MERGE'
924 SINGLE = 'SINGLE'
0130afb7 925 GROUP = 'GROUP'
67134eab
JMF
926 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
927
928 def _parse_filter(tokens):
929 filter_parts = []
930 for type, string, start, _, _ in tokens:
931 if type == tokenize.OP and string == ']':
932 return ''.join(filter_parts)
933 else:
934 filter_parts.append(string)
935
232541df
JMF
936 def _remove_unused_ops(tokens):
937 # Remove operators that we don't use and join them with the sourrounding strings
938 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
939 ALLOWED_OPS = ('/', '+', ',', '(', ')')
940 last_string, last_start, last_end, last_line = None, None, None, None
941 for type, string, start, end, line in tokens:
942 if type == tokenize.OP and string == '[':
943 if last_string:
944 yield tokenize.NAME, last_string, last_start, last_end, last_line
945 last_string = None
946 yield type, string, start, end, line
947 # everything inside brackets will be handled by _parse_filter
948 for type, string, start, end, line in tokens:
949 yield type, string, start, end, line
950 if type == tokenize.OP and string == ']':
951 break
952 elif type == tokenize.OP and string in ALLOWED_OPS:
953 if last_string:
954 yield tokenize.NAME, last_string, last_start, last_end, last_line
955 last_string = None
956 yield type, string, start, end, line
957 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
958 if not last_string:
959 last_string = string
960 last_start = start
961 last_end = end
962 else:
963 last_string += string
964 if last_string:
965 yield tokenize.NAME, last_string, last_start, last_end, last_line
966
cf2ac6df 967 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
968 selectors = []
969 current_selector = None
970 for type, string, start, _, _ in tokens:
971 # ENCODING is only defined in python 3.x
972 if type == getattr(tokenize, 'ENCODING', None):
973 continue
974 elif type in [tokenize.NAME, tokenize.NUMBER]:
975 current_selector = FormatSelector(SINGLE, string, [])
976 elif type == tokenize.OP:
cf2ac6df
JMF
977 if string == ')':
978 if not inside_group:
979 # ')' will be handled by the parentheses group
980 tokens.restore_last_token()
67134eab 981 break
cf2ac6df 982 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
983 tokens.restore_last_token()
984 break
cf2ac6df
JMF
985 elif inside_choice and string == ',':
986 tokens.restore_last_token()
987 break
988 elif string == ',':
0a31a350
JMF
989 if not current_selector:
990 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
991 selectors.append(current_selector)
992 current_selector = None
993 elif string == '/':
d96d604e
JMF
994 if not current_selector:
995 raise syntax_error('"/" must follow a format selector', start)
67134eab 996 first_choice = current_selector
cf2ac6df 997 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 998 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
999 elif string == '[':
1000 if not current_selector:
1001 current_selector = FormatSelector(SINGLE, 'best', [])
1002 format_filter = _parse_filter(tokens)
1003 current_selector.filters.append(format_filter)
0130afb7
JMF
1004 elif string == '(':
1005 if current_selector:
1006 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1007 group = _parse_format_selection(tokens, inside_group=True)
1008 current_selector = FormatSelector(GROUP, group, [])
67134eab
JMF
1009 elif string == '+':
1010 video_selector = current_selector
cf2ac6df 1011 audio_selector = _parse_format_selection(tokens, inside_merge=True)
0a31a350
JMF
1012 if not video_selector or not audio_selector:
1013 raise syntax_error('"+" must be between two format selectors', start)
cf2ac6df 1014 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
67134eab
JMF
1015 else:
1016 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1017 elif type == tokenize.ENDMARKER:
1018 break
1019 if current_selector:
1020 selectors.append(current_selector)
1021 return selectors
1022
1023 def _build_selector_function(selector):
1024 if isinstance(selector, list):
1025 fs = [_build_selector_function(s) for s in selector]
1026
1027 def selector_function(formats):
1028 for f in fs:
1029 for format in f(formats):
1030 yield format
1031 return selector_function
0130afb7
JMF
1032 elif selector.type == GROUP:
1033 selector_function = _build_selector_function(selector.selector)
67134eab
JMF
1034 elif selector.type == PICKFIRST:
1035 fs = [_build_selector_function(s) for s in selector.selector]
1036
1037 def selector_function(formats):
1038 for f in fs:
1039 picked_formats = list(f(formats))
1040 if picked_formats:
1041 return picked_formats
1042 return []
1043 elif selector.type == SINGLE:
1044 format_spec = selector.selector
1045
1046 def selector_function(formats):
bb8e5536
JMF
1047 formats = list(formats)
1048 if not formats:
1049 return
5acfa126
JMF
1050 if format_spec == 'all':
1051 for f in formats:
1052 yield f
1053 elif format_spec in ['best', 'worst', None]:
67134eab
JMF
1054 format_idx = 0 if format_spec == 'worst' else -1
1055 audiovideo_formats = [
1056 f for f in formats
1057 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1058 if audiovideo_formats:
1059 yield audiovideo_formats[format_idx]
1060 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1061 elif (all(f.get('acodec') != 'none' for f in formats) or
1062 all(f.get('vcodec') != 'none' for f in formats)):
1063 yield formats[format_idx]
1064 elif format_spec == 'bestaudio':
1065 audio_formats = [
1066 f for f in formats
1067 if f.get('vcodec') == 'none']
1068 if audio_formats:
1069 yield audio_formats[-1]
1070 elif format_spec == 'worstaudio':
1071 audio_formats = [
1072 f for f in formats
1073 if f.get('vcodec') == 'none']
1074 if audio_formats:
1075 yield audio_formats[0]
1076 elif format_spec == 'bestvideo':
1077 video_formats = [
1078 f for f in formats
1079 if f.get('acodec') == 'none']
1080 if video_formats:
1081 yield video_formats[-1]
1082 elif format_spec == 'worstvideo':
1083 video_formats = [
1084 f for f in formats
1085 if f.get('acodec') == 'none']
1086 if video_formats:
1087 yield video_formats[0]
1088 else:
1089 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1090 if format_spec in extensions:
1091 filter_f = lambda f: f['ext'] == format_spec
1092 else:
1093 filter_f = lambda f: f['format_id'] == format_spec
1094 matches = list(filter(filter_f, formats))
1095 if matches:
1096 yield matches[-1]
1097 elif selector.type == MERGE:
1098 def _merge(formats_info):
1099 format_1, format_2 = [f['format_id'] for f in formats_info]
1100 # The first format must contain the video and the
1101 # second the audio
1102 if formats_info[0].get('vcodec') == 'none':
1103 self.report_error('The first format must '
1104 'contain the video, try using '
1105 '"-f %s+%s"' % (format_2, format_1))
1106 return
1107 output_ext = (
1108 formats_info[0]['ext']
1109 if self.params.get('merge_output_format') is None
1110 else self.params['merge_output_format'])
1111 return {
1112 'requested_formats': formats_info,
1113 'format': '%s+%s' % (formats_info[0].get('format'),
1114 formats_info[1].get('format')),
1115 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1116 formats_info[1].get('format_id')),
1117 'width': formats_info[0].get('width'),
1118 'height': formats_info[0].get('height'),
1119 'resolution': formats_info[0].get('resolution'),
1120 'fps': formats_info[0].get('fps'),
1121 'vcodec': formats_info[0].get('vcodec'),
1122 'vbr': formats_info[0].get('vbr'),
1123 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1124 'acodec': formats_info[1].get('acodec'),
1125 'abr': formats_info[1].get('abr'),
1126 'ext': output_ext,
1127 }
1128 video_selector, audio_selector = map(_build_selector_function, selector.selector)
083c9df9 1129
67134eab
JMF
1130 def selector_function(formats):
1131 formats = list(formats)
1132 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1133 yield _merge(pair)
083c9df9 1134
67134eab 1135 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1136
67134eab
JMF
1137 def final_selector(formats):
1138 for _filter in filters:
1139 formats = list(filter(_filter, formats))
1140 return selector_function(formats)
1141 return final_selector
083c9df9 1142
67134eab 1143 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1144 try:
232541df 1145 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1146 except tokenize.TokenError:
1147 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1148
1149 class TokenIterator(object):
1150 def __init__(self, tokens):
1151 self.tokens = tokens
1152 self.counter = 0
1153
1154 def __iter__(self):
1155 return self
1156
1157 def __next__(self):
1158 if self.counter >= len(self.tokens):
1159 raise StopIteration()
1160 value = self.tokens[self.counter]
1161 self.counter += 1
1162 return value
1163
1164 next = __next__
1165
1166 def restore_last_token(self):
1167 self.counter -= 1
1168
1169 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1170 return _build_selector_function(parsed_selector)
a9c58ad9 1171
e5660ee6
JMF
1172 def _calc_headers(self, info_dict):
1173 res = std_headers.copy()
1174
1175 add_headers = info_dict.get('http_headers')
1176 if add_headers:
1177 res.update(add_headers)
1178
1179 cookies = self._calc_cookies(info_dict)
1180 if cookies:
1181 res['Cookie'] = cookies
1182
1183 return res
1184
1185 def _calc_cookies(self, info_dict):
662435f7 1186 pr = compat_urllib_request.Request(info_dict['url'])
e5660ee6 1187 self.cookiejar.add_cookie_header(pr)
662435f7 1188 return pr.get_header('Cookie')
e5660ee6 1189
dd82ffea
JMF
1190 def process_video_result(self, info_dict, download=True):
1191 assert info_dict.get('_type', 'video') == 'video'
1192
bec1fad2
PH
1193 if 'id' not in info_dict:
1194 raise ExtractorError('Missing "id" field in extractor result')
1195 if 'title' not in info_dict:
1196 raise ExtractorError('Missing "title" field in extractor result')
1197
dd82ffea
JMF
1198 if 'playlist' not in info_dict:
1199 # It isn't part of a playlist
1200 info_dict['playlist'] = None
1201 info_dict['playlist_index'] = None
1202
d5519808 1203 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1204 if thumbnails is None:
1205 thumbnail = info_dict.get('thumbnail')
1206 if thumbnail:
a7a14d95 1207 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1208 if thumbnails:
be6d7229 1209 thumbnails.sort(key=lambda t: (
cfb56d1a
PH
1210 t.get('preference'), t.get('width'), t.get('height'),
1211 t.get('id'), t.get('url')))
f6c24009 1212 for i, t in enumerate(thumbnails):
9603e8a7 1213 if t.get('width') and t.get('height'):
d5519808 1214 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1215 if t.get('id') is None:
1216 t['id'] = '%d' % i
d5519808
PH
1217
1218 if thumbnails and 'thumbnail' not in info_dict:
1219 info_dict['thumbnail'] = thumbnails[-1]['url']
1220
c9ae7b95 1221 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1222 info_dict['display_id'] = info_dict['id']
1223
955c4514 1224 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1225 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1226 # see http://bugs.python.org/issue1646728)
1227 try:
1228 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1229 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1230 except (ValueError, OverflowError, OSError):
1231 pass
9d2ecdbc 1232
a504ced0 1233 if self.params.get('listsubtitles', False):
360e1ca5
JMF
1234 if 'automatic_captions' in info_dict:
1235 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1236 self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
a504ced0 1237 return
360e1ca5
JMF
1238 info_dict['requested_subtitles'] = self.process_subtitles(
1239 info_dict['id'], info_dict.get('subtitles'),
1240 info_dict.get('automatic_captions'))
a504ced0 1241
dd82ffea
JMF
1242 # We now pick which formats have to be downloaded
1243 if info_dict.get('formats') is None:
1244 # There's only one format available
1245 formats = [info_dict]
1246 else:
1247 formats = info_dict['formats']
1248
db95dc13
PH
1249 if not formats:
1250 raise ExtractorError('No video formats found!')
1251
181c7053
S
1252 formats_dict = {}
1253
dd82ffea 1254 # We check that all the formats have the format and format_id fields
db95dc13 1255 for i, format in enumerate(formats):
bec1fad2
PH
1256 if 'url' not in format:
1257 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1258
dd82ffea 1259 if format.get('format_id') is None:
8016c922 1260 format['format_id'] = compat_str(i)
181c7053
S
1261 format_id = format['format_id']
1262 if format_id not in formats_dict:
1263 formats_dict[format_id] = []
1264 formats_dict[format_id].append(format)
1265
1266 # Make sure all formats have unique format_id
1267 for format_id, ambiguous_formats in formats_dict.items():
1268 if len(ambiguous_formats) > 1:
1269 for i, format in enumerate(ambiguous_formats):
1270 format['format_id'] = '%s-%d' % (format_id, i)
1271
1272 for i, format in enumerate(formats):
8c51aa65 1273 if format.get('format') is None:
6febd1c1 1274 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1275 id=format['format_id'],
1276 res=self.format_resolution(format),
6febd1c1 1277 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1278 )
c1002e96
PH
1279 # Automatically determine file extension if missing
1280 if 'ext' not in format:
cce929ea 1281 format['ext'] = determine_ext(format['url']).lower()
e5660ee6
JMF
1282 # Add HTTP headers, so that external programs can use them from the
1283 # json output
1284 full_format_info = info_dict.copy()
1285 full_format_info.update(format)
1286 format['http_headers'] = self._calc_headers(full_format_info)
dd82ffea 1287
4bcc7bd1 1288 # TODO Central sorting goes here
99e206d5 1289
f89197d7 1290 if formats[0] is not info_dict:
b3d9ef88
JMF
1291 # only set the 'formats' fields if the original info_dict list them
1292 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1293 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
1294 # wich can't be exported to json
1295 info_dict['formats'] = formats
cfb56d1a 1296 if self.params.get('listformats'):
bfaae0a7 1297 self.list_formats(info_dict)
1298 return
cfb56d1a
PH
1299 if self.params.get('list_thumbnails'):
1300 self.list_thumbnails(info_dict)
1301 return
bfaae0a7 1302
de3ef3ed 1303 req_format = self.params.get('format')
a9c58ad9 1304 if req_format is None:
feccf29c 1305 req_format_list = []
3749e36e 1306 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
8250c32f
JMF
1307 info_dict['extractor'] in ['youtube', 'ted'] and
1308 not info_dict.get('is_live')):
7fcb605b 1309 merger = FFmpegMergerPP(self)
97fcf1bb 1310 if merger.available and merger.can_merge():
7fcb605b 1311 req_format_list.append('bestvideo+bestaudio')
feccf29c
S
1312 req_format_list.append('best')
1313 req_format = '/'.join(req_format_list)
5acfa126
JMF
1314 format_selector = self.build_format_selector(req_format)
1315 formats_to_download = list(format_selector(formats))
dd82ffea 1316 if not formats_to_download:
6febd1c1 1317 raise ExtractorError('requested format not available',
78a3a9f8 1318 expected=True)
dd82ffea
JMF
1319
1320 if download:
1321 if len(formats_to_download) > 1:
6febd1c1 1322 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1323 for format in formats_to_download:
1324 new_info = dict(info_dict)
1325 new_info.update(format)
1326 self.process_info(new_info)
1327 # We update the info dict with the best quality format (backwards compatibility)
1328 info_dict.update(formats_to_download[-1])
1329 return info_dict
1330
98c70d6f 1331 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1332 """Select the requested subtitles and their format"""
98c70d6f
JMF
1333 available_subs = {}
1334 if normal_subtitles and self.params.get('writesubtitles'):
1335 available_subs.update(normal_subtitles)
1336 if automatic_captions and self.params.get('writeautomaticsub'):
1337 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1338 if lang not in available_subs:
1339 available_subs[lang] = cap_info
1340
4d171848
JMF
1341 if (not self.params.get('writesubtitles') and not
1342 self.params.get('writeautomaticsub') or not
1343 available_subs):
1344 return None
a504ced0
JMF
1345
1346 if self.params.get('allsubtitles', False):
1347 requested_langs = available_subs.keys()
1348 else:
1349 if self.params.get('subtitleslangs', False):
1350 requested_langs = self.params.get('subtitleslangs')
1351 elif 'en' in available_subs:
1352 requested_langs = ['en']
1353 else:
1354 requested_langs = [list(available_subs.keys())[0]]
1355
1356 formats_query = self.params.get('subtitlesformat', 'best')
1357 formats_preference = formats_query.split('/') if formats_query else []
1358 subs = {}
1359 for lang in requested_langs:
1360 formats = available_subs.get(lang)
1361 if formats is None:
1362 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1363 continue
a504ced0
JMF
1364 for ext in formats_preference:
1365 if ext == 'best':
1366 f = formats[-1]
1367 break
1368 matches = list(filter(lambda f: f['ext'] == ext, formats))
1369 if matches:
1370 f = matches[-1]
1371 break
1372 else:
1373 f = formats[-1]
1374 self.report_warning(
1375 'No subtitle format found matching "%s" for language %s, '
1376 'using %s' % (formats_query, lang, f['ext']))
1377 subs[lang] = f
1378 return subs
1379
8222d8de
JMF
1380 def process_info(self, info_dict):
1381 """Process a single resolved IE result."""
1382
1383 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1384
1385 max_downloads = self.params.get('max_downloads')
1386 if max_downloads is not None:
1387 if self._num_downloads >= int(max_downloads):
1388 raise MaxDownloadsReached()
8222d8de
JMF
1389
1390 info_dict['fulltitle'] = info_dict['title']
1391 if len(info_dict['title']) > 200:
6febd1c1 1392 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de 1393
11b85ce6 1394 if 'format' not in info_dict:
8222d8de
JMF
1395 info_dict['format'] = info_dict['ext']
1396
442c37b7 1397 reason = self._match_entry(info_dict, incomplete=False)
8222d8de 1398 if reason is not None:
6febd1c1 1399 self.to_screen('[download] ' + reason)
8222d8de
JMF
1400 return
1401
fd288278 1402 self._num_downloads += 1
8222d8de 1403
e72c7e41 1404 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1405
1406 # Forced printings
1407 if self.params.get('forcetitle', False):
0783b09b 1408 self.to_stdout(info_dict['fulltitle'])
8222d8de 1409 if self.params.get('forceid', False):
0783b09b 1410 self.to_stdout(info_dict['id'])
8222d8de 1411 if self.params.get('forceurl', False):
16ae61f6 1412 if info_dict.get('requested_formats') is not None:
1413 for f in info_dict['requested_formats']:
1414 self.to_stdout(f['url'] + f.get('play_path', ''))
1415 else:
1416 # For RTMP URLs, also include the playpath
1417 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 1418 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 1419 self.to_stdout(info_dict['thumbnail'])
216d71d0 1420 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 1421 self.to_stdout(info_dict['description'])
8222d8de 1422 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 1423 self.to_stdout(filename)
525ef922
PH
1424 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1425 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 1426 if self.params.get('forceformat', False):
0783b09b 1427 self.to_stdout(info_dict['format'])
9d153818 1428 if self.params.get('forcejson', False):
0783b09b 1429 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
1430
1431 # Do nothing else if in simulate mode
1432 if self.params.get('simulate', False):
1433 return
1434
1435 if filename is None:
1436 return
1437
1438 try:
e5a11a22 1439 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
d26e981d 1440 if dn and not os.path.exists(dn):
8222d8de
JMF
1441 os.makedirs(dn)
1442 except (OSError, IOError) as err:
6febd1c1 1443 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
1444 return
1445
1446 if self.params.get('writedescription', False):
2699da80 1447 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
7b6fefc9 1448 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1449 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1450 elif info_dict.get('description') is None:
1451 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1452 else:
1453 try:
6febd1c1 1454 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1455 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1456 descfile.write(info_dict['description'])
7b6fefc9 1457 except (OSError, IOError):
6febd1c1 1458 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1459 return
8222d8de 1460
1fb07d10 1461 if self.params.get('writeannotations', False):
98727e12 1462 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
7b6fefc9 1463 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1464 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
1465 else:
1466 try:
6febd1c1 1467 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1468 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1469 annofile.write(info_dict['annotations'])
1470 except (KeyError, TypeError):
6febd1c1 1471 self.report_warning('There are no annotations to write.')
7b6fefc9 1472 except (OSError, IOError):
6febd1c1 1473 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1474 return
1fb07d10 1475
c4a91be7 1476 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1477 self.params.get('writeautomaticsub')])
c4a91be7 1478
c84dd8a9 1479 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1480 # subtitles download errors are already managed as troubles in relevant IE
1481 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1482 subtitles = info_dict['requested_subtitles']
0f2c0d33 1483 ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1484 for sub_lang, sub_info in subtitles.items():
1485 sub_format = sub_info['ext']
1486 if sub_info.get('data') is not None:
1487 sub_data = sub_info['data']
1488 else:
1489 try:
0f2c0d33
JMF
1490 sub_data = ie._download_webpage(
1491 sub_info['url'], info_dict['id'], note=False)
1492 except ExtractorError as err:
a504ced0 1493 self.report_warning('Unable to download subtitle for "%s": %s' %
0f2c0d33 1494 (sub_lang, compat_str(err.cause)))
a504ced0 1495 continue
8222d8de 1496 try:
d4051a8e 1497 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1498 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1499 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1500 else:
6febd1c1 1501 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9 1502 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
a504ced0 1503 subfile.write(sub_data)
8222d8de 1504 except (OSError, IOError):
e4db1951 1505 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1506 return
1507
8222d8de 1508 if self.params.get('writeinfojson', False):
b29e0000 1509 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
7b6fefc9 1510 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1511 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1512 else:
6febd1c1 1513 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1514 try:
cb202fd2 1515 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 1516 except (OSError, IOError):
6febd1c1 1517 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1518 return
8222d8de 1519
ec82d85a 1520 self._write_thumbnails(info_dict, filename)
8222d8de
JMF
1521
1522 if not self.params.get('skip_download', False):
4340deca
P
1523 try:
1524 def dl(name, info):
a055469f 1525 fd = get_suitable_downloader(info, self.params)(self, self.params)
4340deca
P
1526 for ph in self._progress_hooks:
1527 fd.add_progress_hook(ph)
1528 if self.params.get('verbose'):
1529 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1530 return fd.download(name, info)
ee69b99a 1531
4340deca
P
1532 if info_dict.get('requested_formats') is not None:
1533 downloaded = []
1534 success = True
d47aeb22 1535 merger = FFmpegMergerPP(self)
f740fae2 1536 if not merger.available:
4340deca
P
1537 postprocessors = []
1538 self.report_warning('You have requested multiple '
1539 'formats but ffmpeg or avconv are not installed.'
4a5a898a 1540 ' The formats won\'t be merged.')
6350728b 1541 else:
4340deca 1542 postprocessors = [merger]
81cd954a
S
1543
1544 def compatible_formats(formats):
1545 video, audio = formats
1546 # Check extension
1547 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1548 if video_ext and audio_ext:
1549 COMPATIBLE_EXTS = (
6728187a 1550 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
81cd954a
S
1551 ('webm')
1552 )
1553 for exts in COMPATIBLE_EXTS:
1554 if video_ext in exts and audio_ext in exts:
1555 return True
1556 # TODO: Check acodec/vcodec
1557 return False
1558
38c6902b
S
1559 filename_real_ext = os.path.splitext(filename)[1][1:]
1560 filename_wo_ext = (
1561 os.path.splitext(filename)[0]
1562 if filename_real_ext == info_dict['ext']
1563 else filename)
81cd954a 1564 requested_formats = info_dict['requested_formats']
c0dea0a7 1565 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 1566 info_dict['ext'] = 'mkv'
4a5a898a
S
1567 self.report_warning(
1568 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
1569 # Ensure filename always has a correct extension for successful merge
1570 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
5b5fbc08
JMF
1571 if os.path.exists(encodeFilename(filename)):
1572 self.to_screen(
1573 '[download] %s has already been downloaded and '
1574 'merged' % filename)
1575 else:
81cd954a 1576 for f in requested_formats:
5b5fbc08
JMF
1577 new_info = dict(info_dict)
1578 new_info.update(f)
1579 fname = self.prepare_filename(new_info)
666a9a2b 1580 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
5b5fbc08
JMF
1581 downloaded.append(fname)
1582 partial_success = dl(fname, new_info)
1583 success = success and partial_success
1584 info_dict['__postprocessors'] = postprocessors
1585 info_dict['__files_to_merge'] = downloaded
4340deca
P
1586 else:
1587 # Just a single file
1588 success = dl(filename, info_dict)
1589 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1590 self.report_error('unable to download video data: %s' % str(err))
1591 return
1592 except (OSError, IOError) as err:
1593 raise UnavailableVideoError(err)
1594 except (ContentTooShortError, ) as err:
1595 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1596 return
8222d8de
JMF
1597
1598 if success:
6271f1ca 1599 # Fixup content
62cd676c
PH
1600 fixup_policy = self.params.get('fixup')
1601 if fixup_policy is None:
1602 fixup_policy = 'detect_or_warn'
1603
6271f1ca
PH
1604 stretched_ratio = info_dict.get('stretched_ratio')
1605 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
1606 if fixup_policy == 'warn':
1607 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1608 info_dict['id'], stretched_ratio))
1609 elif fixup_policy == 'detect_or_warn':
1610 stretched_pp = FFmpegFixupStretchedPP(self)
1611 if stretched_pp.available:
1612 info_dict.setdefault('__postprocessors', [])
1613 info_dict['__postprocessors'].append(stretched_pp)
1614 else:
1615 self.report_warning(
1616 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1617 info_dict['id'], stretched_ratio))
1618 else:
62cd676c
PH
1619 assert fixup_policy in ('ignore', 'never')
1620
1621 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1622 if fixup_policy == 'warn':
1623 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1624 info_dict['id']))
1625 elif fixup_policy == 'detect_or_warn':
1626 fixup_pp = FFmpegFixupM4aPP(self)
1627 if fixup_pp.available:
1628 info_dict.setdefault('__postprocessors', [])
1629 info_dict['__postprocessors'].append(fixup_pp)
1630 else:
1631 self.report_warning(
1632 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1633 info_dict['id']))
1634 else:
1635 assert fixup_policy in ('ignore', 'never')
6271f1ca 1636
8222d8de
JMF
1637 try:
1638 self.post_process(filename, info_dict)
1639 except (PostProcessingError) as err:
6febd1c1 1640 self.report_error('postprocessing: %s' % str(err))
8222d8de 1641 return
cd58dc3e 1642 self.record_download_archive(info_dict)
8222d8de
JMF
1643
1644 def download(self, url_list):
1645 """Download a given list of URLs."""
acd69589 1646 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1647 if (len(url_list) > 1 and
8fb3ac36
PH
1648 '%' not in outtmpl and
1649 self.params.get('max_downloads') != 1):
acd69589 1650 raise SameFileError(outtmpl)
8222d8de
JMF
1651
1652 for url in url_list:
1653 try:
5f6a1245 1654 # It also downloads the videos
61aa5ba3
S
1655 res = self.extract_info(
1656 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 1657 except UnavailableVideoError:
6febd1c1 1658 self.report_error('unable to download video')
8222d8de 1659 except MaxDownloadsReached:
6febd1c1 1660 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1661 raise
63e0be34
PH
1662 else:
1663 if self.params.get('dump_single_json', False):
1664 self.to_stdout(json.dumps(res))
8222d8de
JMF
1665
1666 return self._download_retcode
1667
1dcc4c0c 1668 def download_with_info_file(self, info_filename):
31bd3925
JMF
1669 with contextlib.closing(fileinput.FileInput(
1670 [info_filename], mode='r',
1671 openhook=fileinput.hook_encoded('utf-8'))) as f:
1672 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 1673 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
1674 try:
1675 self.process_ie_result(info, download=True)
1676 except DownloadError:
1677 webpage_url = info.get('webpage_url')
1678 if webpage_url is not None:
6febd1c1 1679 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1680 return self.download([webpage_url])
1681 else:
1682 raise
1683 return self._download_retcode
1dcc4c0c 1684
cb202fd2
S
1685 @staticmethod
1686 def filter_requested_info(info_dict):
1687 return dict(
1688 (k, v) for k, v in info_dict.items()
1689 if k not in ['requested_formats', 'requested_subtitles'])
1690
8222d8de
JMF
1691 def post_process(self, filename, ie_info):
1692 """Run all the postprocessors on the given file."""
1693 info = dict(ie_info)
1694 info['filepath'] = filename
6350728b
JMF
1695 pps_chain = []
1696 if ie_info.get('__postprocessors') is not None:
1697 pps_chain.extend(ie_info['__postprocessors'])
1698 pps_chain.extend(self._pps)
1699 for pp in pps_chain:
71646e46 1700 files_to_delete = []
8222d8de 1701 try:
592e97e8 1702 files_to_delete, info = pp.run(info)
8222d8de 1703 except PostProcessingError as e:
bbcbf4d4 1704 self.report_error(e.msg)
592e97e8
JMF
1705 if files_to_delete and not self.params.get('keepvideo', False):
1706 for old_filename in files_to_delete:
f3ff1a36 1707 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
1708 try:
1709 os.remove(encodeFilename(old_filename))
1710 except (IOError, OSError):
1711 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 1712
5db07df6
PH
1713 def _make_archive_id(self, info_dict):
1714 # Future-proof against any change in case
1715 # and backwards compatibility with prior versions
d31209a1 1716 extractor = info_dict.get('extractor_key')
7012b23c
PH
1717 if extractor is None:
1718 if 'id' in info_dict:
1719 extractor = info_dict.get('ie_key') # key in a playlist
1720 if extractor is None:
5db07df6 1721 return None # Incomplete video information
6febd1c1 1722 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1723
1724 def in_download_archive(self, info_dict):
1725 fn = self.params.get('download_archive')
1726 if fn is None:
1727 return False
1728
1729 vid_id = self._make_archive_id(info_dict)
1730 if vid_id is None:
7012b23c 1731 return False # Incomplete video information
5db07df6 1732
c1c9a79c
PH
1733 try:
1734 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1735 for line in archive_file:
1736 if line.strip() == vid_id:
1737 return True
1738 except IOError as ioe:
1739 if ioe.errno != errno.ENOENT:
1740 raise
1741 return False
1742
1743 def record_download_archive(self, info_dict):
1744 fn = self.params.get('download_archive')
1745 if fn is None:
1746 return
5db07df6
PH
1747 vid_id = self._make_archive_id(info_dict)
1748 assert vid_id
c1c9a79c 1749 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1750 archive_file.write(vid_id + '\n')
dd82ffea 1751
8c51aa65 1752 @staticmethod
8abeeb94 1753 def format_resolution(format, default='unknown'):
fb04e403
PH
1754 if format.get('vcodec') == 'none':
1755 return 'audio only'
f49d89ee
PH
1756 if format.get('resolution') is not None:
1757 return format['resolution']
8c51aa65
JMF
1758 if format.get('height') is not None:
1759 if format.get('width') is not None:
6febd1c1 1760 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1761 else:
6febd1c1 1762 res = '%sp' % format['height']
f49d89ee 1763 elif format.get('width') is not None:
6febd1c1 1764 res = '?x%d' % format['width']
8c51aa65 1765 else:
8abeeb94 1766 res = default
8c51aa65
JMF
1767 return res
1768
c57f7757
PH
1769 def _format_note(self, fdict):
1770 res = ''
1771 if fdict.get('ext') in ['f4f', 'f4m']:
1772 res += '(unsupported) '
1773 if fdict.get('format_note') is not None:
1774 res += fdict['format_note'] + ' '
1775 if fdict.get('tbr') is not None:
1776 res += '%4dk ' % fdict['tbr']
1777 if fdict.get('container') is not None:
1778 if res:
1779 res += ', '
1780 res += '%s container' % fdict['container']
1781 if (fdict.get('vcodec') is not None and
1782 fdict.get('vcodec') != 'none'):
1783 if res:
1784 res += ', '
1785 res += fdict['vcodec']
91c7271a 1786 if fdict.get('vbr') is not None:
c57f7757
PH
1787 res += '@'
1788 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1789 res += 'video@'
1790 if fdict.get('vbr') is not None:
1791 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1792 if fdict.get('fps') is not None:
1793 res += ', %sfps' % fdict['fps']
c57f7757
PH
1794 if fdict.get('acodec') is not None:
1795 if res:
1796 res += ', '
1797 if fdict['acodec'] == 'none':
1798 res += 'video only'
1799 else:
1800 res += '%-5s' % fdict['acodec']
1801 elif fdict.get('abr') is not None:
1802 if res:
1803 res += ', '
1804 res += 'audio'
1805 if fdict.get('abr') is not None:
1806 res += '@%3dk' % fdict['abr']
1807 if fdict.get('asr') is not None:
1808 res += ' (%5dHz)' % fdict['asr']
1809 if fdict.get('filesize') is not None:
1810 if res:
1811 res += ', '
1812 res += format_bytes(fdict['filesize'])
9732d77e
PH
1813 elif fdict.get('filesize_approx') is not None:
1814 if res:
1815 res += ', '
1816 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1817 return res
91c7271a 1818
c57f7757 1819 def list_formats(self, info_dict):
94badb25 1820 formats = info_dict.get('formats', [info_dict])
b81a359e
PH
1821 table = [
1822 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1823 for f in formats
e65566a9 1824 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 1825 if len(formats) > 1:
b81a359e 1826 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
57dd9a8f 1827
b81a359e 1828 header_line = ['format code', 'extension', 'resolution', 'note']
cfb56d1a 1829 self.to_screen(
b81a359e
PH
1830 '[info] Available formats for %s:\n%s' %
1831 (info_dict['id'], render_table(header_line, table)))
cfb56d1a
PH
1832
1833 def list_thumbnails(self, info_dict):
1834 thumbnails = info_dict.get('thumbnails')
1835 if not thumbnails:
1836 tn_url = info_dict.get('thumbnail')
1837 if tn_url:
1838 thumbnails = [{'id': '0', 'url': tn_url}]
1839 else:
1840 self.to_screen(
1841 '[info] No thumbnails present for %s' % info_dict['id'])
1842 return
1843
1844 self.to_screen(
1845 '[info] Thumbnails for %s:' % info_dict['id'])
1846 self.to_screen(render_table(
1847 ['ID', 'width', 'height', 'URL'],
1848 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 1849
360e1ca5 1850 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 1851 if not subtitles:
360e1ca5 1852 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 1853 return
a504ced0 1854 self.to_screen(
edab9dbf
JMF
1855 'Available %s for %s:' % (name, video_id))
1856 self.to_screen(render_table(
1857 ['Language', 'formats'],
1858 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1859 for lang, formats in subtitles.items()]))
a504ced0 1860
dca08720
PH
1861 def urlopen(self, req):
1862 """ Start an HTTP download """
37419b4f 1863
d05cfe06
S
1864 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1865 # always respected by websites, some tend to give out URLs with non percent-encoded
1866 # non-ASCII characters (see telemb.py, ard.py [#3412])
37419b4f 1867 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
d05cfe06
S
1868 # To work around aforementioned issue we will replace request's original URL with
1869 # percent-encoded one
8f9312c3 1870 req_is_string = isinstance(req, compat_basestring)
68b09730 1871 url = req if req_is_string else req.get_full_url()
d05cfe06 1872 url_escaped = escape_url(url)
37419b4f
S
1873
1874 # Substitute URL if any change after escaping
1875 if url != url_escaped:
68b09730 1876 if req_is_string:
37419b4f
S
1877 req = url_escaped
1878 else:
931bc3c3
S
1879 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1880 req = req_type(
37419b4f
S
1881 url_escaped, data=req.data, headers=req.headers,
1882 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1883
19a41fc6 1884 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1885
1886 def print_debug_header(self):
1887 if not self.params.get('verbose'):
1888 return
62fec3b2 1889
4192b51c
PH
1890 if type('') is not compat_str:
1891 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1892 self.report_warning(
1893 'Your Python is broken! Update to a newer and supported version')
1894
c6afed48
PH
1895 stdout_encoding = getattr(
1896 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1897 encoding_str = (
734f90bb
PH
1898 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1899 locale.getpreferredencoding(),
1900 sys.getfilesystemencoding(),
c6afed48 1901 stdout_encoding,
b0472057 1902 self.get_encoding()))
4192b51c 1903 write_string(encoding_str, encoding=None)
734f90bb
PH
1904
1905 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1906 try:
1907 sp = subprocess.Popen(
1908 ['git', 'rev-parse', '--short', 'HEAD'],
1909 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1910 cwd=os.path.dirname(os.path.abspath(__file__)))
1911 out, err = sp.communicate()
1912 out = out.decode().strip()
1913 if re.match('[0-9a-f]+', out):
734f90bb 1914 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 1915 except Exception:
dca08720
PH
1916 try:
1917 sys.exc_clear()
70a1165b 1918 except Exception:
dca08720 1919 pass
d28b5171
PH
1920 self._write_string('[debug] Python version %s - %s\n' % (
1921 platform.python_version(), platform_name()))
1922
73fac4e9 1923 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 1924 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1925 exe_str = ', '.join(
1926 '%s %s' % (exe, v)
1927 for exe, v in sorted(exe_versions.items())
1928 if v
1929 )
1930 if not exe_str:
1931 exe_str = 'none'
1932 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1933
1934 proxy_map = {}
1935 for handler in self._opener.handlers:
1936 if hasattr(handler, 'proxies'):
1937 proxy_map.update(handler.proxies)
734f90bb 1938 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1939
58b1f00d
PH
1940 if self.params.get('call_home', False):
1941 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1942 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1943 latest_version = self.urlopen(
1944 'https://yt-dl.org/latest/version').read().decode('utf-8')
1945 if version_tuple(latest_version) > version_tuple(__version__):
1946 self.report_warning(
1947 'You are using an outdated version (newest version: %s)! '
1948 'See https://yt-dl.org/update if you need help updating.' %
1949 latest_version)
1950
e344693b 1951 def _setup_opener(self):
6ad14cab 1952 timeout_val = self.params.get('socket_timeout')
19a41fc6 1953 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1954
dca08720
PH
1955 opts_cookiefile = self.params.get('cookiefile')
1956 opts_proxy = self.params.get('proxy')
1957
1958 if opts_cookiefile is None:
1959 self.cookiejar = compat_cookiejar.CookieJar()
1960 else:
1961 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1962 opts_cookiefile)
1963 if os.access(opts_cookiefile, os.R_OK):
1964 self.cookiejar.load()
1965
1966 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1967 self.cookiejar)
1968 if opts_proxy is not None:
1969 if opts_proxy == '':
1970 proxies = {}
1971 else:
1972 proxies = {'http': opts_proxy, 'https': opts_proxy}
1973 else:
1974 proxies = compat_urllib_request.getproxies()
1975 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1976 if 'http' in proxies and 'https' not in proxies:
1977 proxies['https'] = proxies['http']
91410c9b 1978 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
1979
1980 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
1981 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1982 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
dca08720 1983 opener = compat_urllib_request.build_opener(
2461f79d
PH
1984 proxy_handler, https_handler, cookie_processor, ydlh)
1985
dca08720
PH
1986 # Delete the default user-agent header, which would otherwise apply in
1987 # cases where our custom HTTP handler doesn't come into play
1988 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1989 opener.addheaders = []
1990 self._opener = opener
62fec3b2
PH
1991
1992 def encode(self, s):
1993 if isinstance(s, bytes):
1994 return s # Already encoded
1995
1996 try:
1997 return s.encode(self.get_encoding())
1998 except UnicodeEncodeError as err:
1999 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2000 raise
2001
2002 def get_encoding(self):
2003 encoding = self.params.get('encoding')
2004 if encoding is None:
2005 encoding = preferredencoding()
2006 return encoding
ec82d85a
PH
2007
2008 def _write_thumbnails(self, info_dict, filename):
2009 if self.params.get('writethumbnail', False):
2010 thumbnails = info_dict.get('thumbnails')
2011 if thumbnails:
2012 thumbnails = [thumbnails[-1]]
2013 elif self.params.get('write_all_thumbnails', False):
2014 thumbnails = info_dict.get('thumbnails')
2015 else:
2016 return
2017
2018 if not thumbnails:
2019 # No thumbnails present, so return immediately
2020 return
2021
2022 for t in thumbnails:
2023 thumb_ext = determine_ext(t['url'], 'jpg')
2024 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2025 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
82245a6d 2026 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
ec82d85a
PH
2027
2028 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2029 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2030 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2031 else:
2032 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2033 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2034 try:
2035 uf = self.urlopen(t['url'])
2036 with open(thumb_filename, 'wb') as thumbf:
2037 shutil.copyfileobj(uf, thumbf)
2038 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2039 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2040 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2041 self.report_warning('Unable to download thumbnail "%s": %s' %
2042 (t['url'], compat_str(err)))