]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[kuwo] treat the offline error as an expected ExtractorError
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
8222d8de 11import io
b82f815f 12import itertools
8694c600 13import json
62fec3b2 14import locale
083c9df9 15import operator
8222d8de 16import os
dca08720 17import platform
8222d8de
JMF
18import re
19import shutil
dca08720 20import subprocess
8222d8de
JMF
21import socket
22import sys
23import time
67134eab 24import tokenize
8222d8de
JMF
25import traceback
26
1e5b9a95
PH
27if os.name == 'nt':
28 import ctypes
29
8c25f81b 30from .compat import (
dca08720 31 compat_cookiejar,
4644ac55 32 compat_expanduser,
003c69a8 33 compat_get_terminal_size,
ce02ed60 34 compat_http_client,
4f026faf 35 compat_kwargs,
ce02ed60 36 compat_str,
67134eab 37 compat_tokenize_tokenize,
ce02ed60
PH
38 compat_urllib_error,
39 compat_urllib_request,
8c25f81b
PH
40)
41from .utils import (
ce02ed60
PH
42 ContentTooShortError,
43 date_from_str,
44 DateRange,
acd69589 45 DEFAULT_OUTTMPL,
ce02ed60
PH
46 determine_ext,
47 DownloadError,
48 encodeFilename,
49 ExtractorError,
02dbf93f 50 format_bytes,
525ef922 51 formatSeconds,
ce02ed60 52 locked_file,
dca08720 53 make_HTTPS_handler,
ce02ed60 54 MaxDownloadsReached,
b7ab0590 55 PagedList,
083c9df9 56 parse_filesize,
91410c9b 57 PerRequestProxyHandler,
ce02ed60 58 PostProcessingError,
dca08720 59 platform_name,
ce02ed60 60 preferredencoding,
cfb56d1a 61 render_table,
ce02ed60
PH
62 SameFileError,
63 sanitize_filename,
1bb5c511 64 sanitize_path,
e5660ee6 65 std_headers,
ce02ed60 66 subtitles_filename,
ce02ed60 67 UnavailableVideoError,
29eb5174 68 url_basename,
58b1f00d 69 version_tuple,
ce02ed60
PH
70 write_json_file,
71 write_string,
6a3f4c3f 72 YoutubeDLCookieProcessor,
dca08720 73 YoutubeDLHandler,
6350728b 74 prepend_extension,
b29e0000 75 replace_extension,
7d4111ed 76 args_to_str,
05900629 77 age_restricted,
ce02ed60 78)
a0e07d31 79from .cache import Cache
023fa8c4 80from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 81from .downloader import get_suitable_downloader
4c83c967 82from .downloader.rtmp import rtmpdump_version
4f026faf 83from .postprocessor import (
62cd676c 84 FFmpegFixupM4aPP,
6271f1ca 85 FFmpegFixupStretchedPP,
4f026faf
PH
86 FFmpegMergerPP,
87 FFmpegPostProcessor,
88 get_postprocessor,
89)
dca08720 90from .version import __version__
8222d8de
JMF
91
92
93class YoutubeDL(object):
94 """YoutubeDL class.
95
96 YoutubeDL objects are the ones responsible of downloading the
97 actual video file and writing it to disk if the user has requested
98 it, among some other tasks. In most cases there should be one per
99 program. As, given a video URL, the downloader doesn't know how to
100 extract all the needed information, task that InfoExtractors do, it
101 has to pass the URL to one of them.
102
103 For this, YoutubeDL objects have a method that allows
104 InfoExtractors to be registered in a given order. When it is passed
105 a URL, the YoutubeDL object handles it to the first InfoExtractor it
106 finds that reports being able to handle it. The InfoExtractor extracts
107 all the information about the video or videos the URL refers to, and
108 YoutubeDL process the extracted information, possibly using a File
109 Downloader to download the video.
110
111 YoutubeDL objects accept a lot of parameters. In order not to saturate
112 the object constructor with arguments, it receives a dictionary of
113 options instead. These options are available through the params
114 attribute for the InfoExtractors to use. The YoutubeDL also
115 registers itself as the downloader in charge for the InfoExtractors
116 that are added to it, so this is a "mutual registration".
117
118 Available options:
119
120 username: Username for authentication purposes.
121 password: Password for authentication purposes.
180940e0 122 videopassword: Password for accessing a video.
8222d8de
JMF
123 usenetrc: Use netrc for authentication instead.
124 verbose: Print additional info to stdout.
125 quiet: Do not print messages to stdout.
ad8915b7 126 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
127 forceurl: Force printing final URL.
128 forcetitle: Force printing title.
129 forceid: Force printing ID.
130 forcethumbnail: Force printing thumbnail URL.
131 forcedescription: Force printing description.
132 forcefilename: Force printing final filename.
525ef922 133 forceduration: Force printing duration.
8694c600 134 forcejson: Force printing info_dict as JSON.
63e0be34
PH
135 dump_single_json: Force printing the info_dict of the whole playlist
136 (or video) as a single JSON line.
8222d8de 137 simulate: Do not download the video files.
d8600787 138 format: Video format code. See options.py for more information.
8222d8de
JMF
139 outtmpl: Template for output names.
140 restrictfilenames: Do not allow "&" and spaces in file names
141 ignoreerrors: Do not stop on download errors.
d22dec74 142 force_generic_extractor: Force downloader to use the generic extractor
8222d8de
JMF
143 nooverwrites: Prevent overwriting files.
144 playliststart: Playlist item to start at.
145 playlistend: Playlist item to end at.
c14e88f0 146 playlist_items: Specific indices of playlist to download.
ff815fe6 147 playlistreverse: Download playlist items in reverse order.
8222d8de
JMF
148 matchtitle: Download only matching titles.
149 rejecttitle: Reject downloads for matching titles.
8bf9319e 150 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
151 logtostderr: Log messages to stderr instead of stdout.
152 writedescription: Write the video description to a .description file
153 writeinfojson: Write the video description to a .info.json file
1fb07d10 154 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 155 writethumbnail: Write the thumbnail image to a file
ec82d85a 156 write_all_thumbnails: Write all thumbnail formats to files
8222d8de 157 writesubtitles: Write the video subtitles to a file
b004821f 158 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 159 allsubtitles: Downloads all the subtitles of the video
0b7f3118 160 (requires writesubtitles or writeautomaticsub)
8222d8de 161 listsubtitles: Lists all available subtitles for the video
a504ced0 162 subtitlesformat: The format code for subtitles
aa6a10c4 163 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
164 keepvideo: Keep the video file after post-processing
165 daterange: A DateRange object, download only if the upload_date is in the range.
166 skip_download: Skip the actual download of the video file
c35f9e72 167 cachedir: Location of the cache files in the filesystem.
a0e07d31 168 False to disable filesystem cache.
47192f92 169 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
170 age_limit: An integer representing the user's age in years.
171 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
172 min_views: An integer representing the minimum view count the video
173 must have in order to not be skipped.
174 Videos without view count information are always
175 downloaded. None for no limit.
176 max_views: An integer representing the maximum view count.
177 Videos that are more popular than that are not
178 downloaded.
179 Videos without view count information are always
180 downloaded. None for no limit.
181 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
182 Videos already present in the file are not downloaded
183 again.
dca08720 184 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 185 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
186 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
187 At the moment, this is only supported by YouTube.
a1ee09e8 188 proxy: URL of the proxy server to use
91410c9b
PH
189 cn_verification_proxy: URL of the proxy to use for IP address verification
190 on Chinese sites. (Experimental)
e344693b 191 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
192 bidi_workaround: Work around buggy terminals without bidirectional text
193 support, using fridibi
a0ddb8a2 194 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 195 include_ads: Download ads as well
04b4d394
PH
196 default_search: Prepend this string if an input url is not valid.
197 'auto' for elaborate guessing
62fec3b2 198 encoding: Use this encoding instead of the system-specified.
e8ee972c 199 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
200 Pass in 'in_playlist' to only show this behavior for
201 playlist items.
4f026faf 202 postprocessors: A list of dictionaries, each with an entry
71b640cc
PH
203 * key: The name of the postprocessor. See
204 youtube_dl/postprocessor/__init__.py for a list.
4f026faf
PH
205 as well as any further keyword arguments for the
206 postprocessor.
71b640cc
PH
207 progress_hooks: A list of functions that get called on download
208 progress, with a dictionary with the entries
5cda4eda 209 * status: One of "downloading", "error", or "finished".
ee69b99a 210 Check this first and ignore unknown values.
71b640cc 211
5cda4eda 212 If status is one of "downloading", or "finished", the
ee69b99a
PH
213 following properties may also be present:
214 * filename: The final filename (always present)
5cda4eda 215 * tmpfilename: The filename we're currently writing to
71b640cc
PH
216 * downloaded_bytes: Bytes on disk
217 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
218 * total_bytes_estimate: Guess of the eventual file size,
219 None if unavailable.
220 * elapsed: The number of seconds since download started.
71b640cc
PH
221 * eta: The estimated time in seconds, None if unknown
222 * speed: The download speed in bytes/second, None if
223 unknown
5cda4eda
PH
224 * fragment_index: The counter of the currently
225 downloaded video fragment.
226 * fragment_count: The number of fragments (= individual
227 files that will be merged)
71b640cc
PH
228
229 Progress hooks are guaranteed to be called at least once
230 (with status "finished") if the download is successful.
45598f15 231 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
232 fixup: Automatically correct known faults of the file.
233 One of:
234 - "never": do nothing
235 - "warn": only emit a warning
236 - "detect_or_warn": check whether we can do anything
62cd676c 237 about it, warn otherwise (default)
be4a824d 238 source_address: (Experimental) Client-side IP address to bind to.
8bfa7545
PH
239 call_home: Boolean, true iff we are allowed to contact the
240 youtube-dl servers for debugging.
5f0d813d 241 sleep_interval: Number of seconds to sleep before each download.
cfb56d1a
PH
242 listformats: Print an overview of available video formats and exit.
243 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
244 match_filter: A function that gets called with the info_dict of
245 every video.
246 If it returns a message, the video is ignored.
247 If it returns None, the video is downloaded.
248 match_filter_func in utils.py is one example for this.
7e5db8c9 249 no_color: Do not emit color codes in output.
71b640cc 250
85729c51
PH
251 The following options determine which downloader is picked:
252 external_downloader: Executable of the external downloader to call.
253 None or unset for standard (built-in) downloader.
254 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
fe7e0c98 255
8222d8de 256 The following parameters are not used by YoutubeDL itself, they are used by
c75f0b36 257 the downloader (see youtube_dl/downloader/common.py):
8222d8de 258 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 259 noresizebuffer, retries, continuedl, noprogress, consoletitle,
c75f0b36 260 xattr_set_filesize, external_downloader_args.
76b1bd67
JMF
261
262 The following options are used by the post processors:
263 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
264 otherwise prefer avconv.
f72b0a60
S
265 postprocessor_args: A list of additional command-line arguments for the
266 postprocessor.
8222d8de
JMF
267 """
268
269 params = None
270 _ies = []
271 _pps = []
272 _download_retcode = None
273 _num_downloads = None
274 _screen_file = None
275
3511266b 276 def __init__(self, params=None, auto_init=True):
8222d8de 277 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
278 if params is None:
279 params = {}
8222d8de 280 self._ies = []
56c73665 281 self._ies_instances = {}
8222d8de 282 self._pps = []
933605d7 283 self._progress_hooks = []
8222d8de
JMF
284 self._download_retcode = 0
285 self._num_downloads = 0
286 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 287 self._err_file = sys.stderr
4abf617b
S
288 self.params = {
289 # Default parameters
290 'nocheckcertificate': False,
291 }
292 self.params.update(params)
a0e07d31 293 self.cache = Cache(self)
34308b30 294
0783b09b 295 if params.get('bidi_workaround', False):
1c088fa8
PH
296 try:
297 import pty
298 master, slave = pty.openpty()
003c69a8 299 width = compat_get_terminal_size().columns
1c088fa8
PH
300 if width is None:
301 width_args = []
302 else:
303 width_args = ['-w', str(width)]
5d681e96 304 sp_kwargs = dict(
1c088fa8
PH
305 stdin=subprocess.PIPE,
306 stdout=slave,
307 stderr=self._err_file)
5d681e96
PH
308 try:
309 self._output_process = subprocess.Popen(
310 ['bidiv'] + width_args, **sp_kwargs
311 )
312 except OSError:
5d681e96
PH
313 self._output_process = subprocess.Popen(
314 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
315 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
316 except OSError as ose:
317 if ose.errno == 2:
6febd1c1 318 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
319 else:
320 raise
0783b09b 321
34308b30 322 if (sys.version_info >= (3,) and sys.platform != 'win32' and
8fb3ac36
PH
323 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
324 not params.get('restrictfilenames', False)):
34308b30
PH
325 # On Python 3, the Unicode filesystem API will throw errors (#1474)
326 self.report_warning(
6febd1c1 327 'Assuming --restrict-filenames since file system encoding '
1b725173 328 'cannot encode all characters. '
6febd1c1 329 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 330 self.params['restrictfilenames'] = True
34308b30 331
486dd09e
PH
332 if isinstance(params.get('outtmpl'), bytes):
333 self.report_warning(
334 'Parameter outtmpl is bytes, but should be a unicode string. '
335 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
336
dca08720
PH
337 self._setup_opener()
338
3511266b
PH
339 if auto_init:
340 self.print_debug_header()
341 self.add_default_info_extractors()
342
4f026faf
PH
343 for pp_def_raw in self.params.get('postprocessors', []):
344 pp_class = get_postprocessor(pp_def_raw['key'])
345 pp_def = dict(pp_def_raw)
346 del pp_def['key']
347 pp = pp_class(self, **compat_kwargs(pp_def))
348 self.add_post_processor(pp)
349
71b640cc
PH
350 for ph in self.params.get('progress_hooks', []):
351 self.add_progress_hook(ph)
352
7d4111ed
PH
353 def warn_if_short_id(self, argv):
354 # short YouTube ID starting with dash?
355 idxs = [
356 i for i, a in enumerate(argv)
357 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
358 if idxs:
359 correct_argv = (
360 ['youtube-dl'] +
361 [a for i, a in enumerate(argv) if i not in idxs] +
362 ['--'] + [argv[i] for i in idxs]
363 )
364 self.report_warning(
365 'Long argument string detected. '
366 'Use -- to separate parameters and URLs, like this:\n%s\n' %
367 args_to_str(correct_argv))
368
8222d8de
JMF
369 def add_info_extractor(self, ie):
370 """Add an InfoExtractor object to the end of the list."""
371 self._ies.append(ie)
56c73665 372 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
373 ie.set_downloader(self)
374
56c73665
JMF
375 def get_info_extractor(self, ie_key):
376 """
377 Get an instance of an IE with name ie_key, it will try to get one from
378 the _ies list, if there's no instance it will create a new one and add
379 it to the extractor list.
380 """
381 ie = self._ies_instances.get(ie_key)
382 if ie is None:
383 ie = get_info_extractor(ie_key)()
384 self.add_info_extractor(ie)
385 return ie
386
023fa8c4
JMF
387 def add_default_info_extractors(self):
388 """
389 Add the InfoExtractors returned by gen_extractors to the end of the list
390 """
391 for ie in gen_extractors():
392 self.add_info_extractor(ie)
393
8222d8de
JMF
394 def add_post_processor(self, pp):
395 """Add a PostProcessor object to the end of the chain."""
396 self._pps.append(pp)
397 pp.set_downloader(self)
398
933605d7
JMF
399 def add_progress_hook(self, ph):
400 """Add the progress hook (currently only for the file downloader)"""
401 self._progress_hooks.append(ph)
8ab470f1 402
1c088fa8 403 def _bidi_workaround(self, message):
5d681e96 404 if not hasattr(self, '_output_channel'):
1c088fa8
PH
405 return message
406
5d681e96 407 assert hasattr(self, '_output_process')
11b85ce6 408 assert isinstance(message, compat_str)
6febd1c1
PH
409 line_count = message.count('\n') + 1
410 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 411 self._output_process.stdin.flush()
6febd1c1 412 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 413 for _ in range(line_count))
6febd1c1 414 return res[:-len('\n')]
1c088fa8 415
8222d8de 416 def to_screen(self, message, skip_eol=False):
0783b09b
PH
417 """Print message to stdout if not in quiet mode."""
418 return self.to_stdout(message, skip_eol, check_quiet=True)
419
734f90bb 420 def _write_string(self, s, out=None):
b58ddb32 421 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 422
0783b09b 423 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 424 """Print message to stdout if not in quiet mode."""
8bf9319e 425 if self.params.get('logger'):
43afe285 426 self.params['logger'].debug(message)
0783b09b 427 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 428 message = self._bidi_workaround(message)
6febd1c1 429 terminator = ['\n', ''][skip_eol]
8222d8de 430 output = message + terminator
1c088fa8 431
734f90bb 432 self._write_string(output, self._screen_file)
8222d8de
JMF
433
434 def to_stderr(self, message):
435 """Print message to stderr."""
11b85ce6 436 assert isinstance(message, compat_str)
8bf9319e 437 if self.params.get('logger'):
43afe285
IB
438 self.params['logger'].error(message)
439 else:
1c088fa8 440 message = self._bidi_workaround(message)
6febd1c1 441 output = message + '\n'
734f90bb 442 self._write_string(output, self._err_file)
8222d8de 443
1e5b9a95
PH
444 def to_console_title(self, message):
445 if not self.params.get('consoletitle', False):
446 return
447 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
448 # c_wchar_p() might not be necessary if `message` is
449 # already of type unicode()
450 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
451 elif 'TERM' in os.environ:
734f90bb 452 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 453
bdde425c
PH
454 def save_console_title(self):
455 if not self.params.get('consoletitle', False):
456 return
457 if 'TERM' in os.environ:
efd6c574 458 # Save the title on stack
734f90bb 459 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
460
461 def restore_console_title(self):
462 if not self.params.get('consoletitle', False):
463 return
464 if 'TERM' in os.environ:
efd6c574 465 # Restore the title from stack
734f90bb 466 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
467
468 def __enter__(self):
469 self.save_console_title()
470 return self
471
472 def __exit__(self, *args):
473 self.restore_console_title()
f89197d7 474
dca08720
PH
475 if self.params.get('cookiefile') is not None:
476 self.cookiejar.save()
bdde425c 477
8222d8de
JMF
478 def trouble(self, message=None, tb=None):
479 """Determine action to take when a download problem appears.
480
481 Depending on if the downloader has been configured to ignore
482 download errors or not, this method may throw an exception or
483 not when errors are found, after printing the message.
484
485 tb, if given, is additional traceback information.
486 """
487 if message is not None:
488 self.to_stderr(message)
489 if self.params.get('verbose'):
490 if tb is None:
491 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 492 tb = ''
8222d8de 493 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 494 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
495 tb += compat_str(traceback.format_exc())
496 else:
497 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 498 tb = ''.join(tb_data)
8222d8de
JMF
499 self.to_stderr(tb)
500 if not self.params.get('ignoreerrors', False):
501 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
502 exc_info = sys.exc_info()[1].exc_info
503 else:
504 exc_info = sys.exc_info()
505 raise DownloadError(message, exc_info)
506 self._download_retcode = 1
507
508 def report_warning(self, message):
509 '''
510 Print the message to stderr, it will be prefixed with 'WARNING:'
511 If stderr is a tty file the 'WARNING:' will be colored
512 '''
6d07ce01
JMF
513 if self.params.get('logger') is not None:
514 self.params['logger'].warning(message)
8222d8de 515 else:
ad8915b7
PH
516 if self.params.get('no_warnings'):
517 return
7e5db8c9 518 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
6d07ce01
JMF
519 _msg_header = '\033[0;33mWARNING:\033[0m'
520 else:
521 _msg_header = 'WARNING:'
522 warning_message = '%s %s' % (_msg_header, message)
523 self.to_stderr(warning_message)
8222d8de
JMF
524
525 def report_error(self, message, tb=None):
526 '''
527 Do the same as trouble, but prefixes the message with 'ERROR:', colored
528 in red if stderr is a tty file.
529 '''
7e5db8c9 530 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
6febd1c1 531 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 532 else:
6febd1c1
PH
533 _msg_header = 'ERROR:'
534 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
535 self.trouble(error_message, tb)
536
8222d8de
JMF
537 def report_file_already_downloaded(self, file_name):
538 """Report file has already been fully downloaded."""
539 try:
6febd1c1 540 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 541 except UnicodeEncodeError:
6febd1c1 542 self.to_screen('[download] The file has already been downloaded')
8222d8de 543
8222d8de
JMF
544 def prepare_filename(self, info_dict):
545 """Generate the output filename."""
546 try:
547 template_dict = dict(info_dict)
548
549 template_dict['epoch'] = int(time.time())
550 autonumber_size = self.params.get('autonumber_size')
551 if autonumber_size is None:
552 autonumber_size = 5
6febd1c1 553 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 554 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 555 if template_dict.get('playlist_index') is not None:
c6b4132a 556 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
557 if template_dict.get('resolution') is None:
558 if template_dict.get('width') and template_dict.get('height'):
559 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
560 elif template_dict.get('height'):
805ef3c6 561 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 562 elif template_dict.get('width'):
805ef3c6 563 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 564
586a91b6 565 sanitize = lambda k, v: sanitize_filename(
45598aab 566 compat_str(v),
1bb5c511 567 restricted=self.params.get('restrictfilenames'),
6febd1c1 568 is_id=(k == 'id'))
586a91b6 569 template_dict = dict((k, sanitize(k, v))
45598aab
PH
570 for k, v in template_dict.items()
571 if v is not None)
6febd1c1 572 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 573
1bb5c511 574 outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
4644ac55 575 tmpl = compat_expanduser(outtmpl)
586a91b6 576 filename = tmpl % template_dict
3a0d2f52
S
577 # Temporary fix for #4787
578 # 'Treat' all problem characters by passing filename through preferredencoding
579 # to workaround encoding issues with subprocess on python2 @ Windows
580 if sys.version_info < (3, 0) and sys.platform == 'win32':
581 filename = encodeFilename(filename, True).decode(preferredencoding())
8222d8de 582 return filename
8222d8de 583 except ValueError as err:
6febd1c1 584 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
585 return None
586
442c37b7 587 def _match_entry(self, info_dict, incomplete):
8222d8de
JMF
588 """ Returns None iff the file should be downloaded """
589
6febd1c1 590 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
591 if 'title' in info_dict:
592 # This can happen when we're just evaluating the playlist
593 title = info_dict['title']
594 matchtitle = self.params.get('matchtitle', False)
595 if matchtitle:
596 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 597 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
598 rejecttitle = self.params.get('rejecttitle', False)
599 if rejecttitle:
600 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 601 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
602 date = info_dict.get('upload_date', None)
603 if date is not None:
604 dateRange = self.params.get('daterange', DateRange())
605 if date not in dateRange:
6febd1c1 606 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
607 view_count = info_dict.get('view_count', None)
608 if view_count is not None:
609 min_views = self.params.get('min_views')
610 if min_views is not None and view_count < min_views:
6febd1c1 611 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
612 max_views = self.params.get('max_views')
613 if max_views is not None and view_count > max_views:
6febd1c1 614 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629 615 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
347de493 616 return 'Skipping "%s" because it is age restricted' % video_title
c1c9a79c 617 if self.in_download_archive(info_dict):
6febd1c1 618 return '%s has already been recorded in archive' % video_title
347de493 619
442c37b7
PH
620 if not incomplete:
621 match_filter = self.params.get('match_filter')
622 if match_filter is not None:
623 ret = match_filter(info_dict)
624 if ret is not None:
625 return ret
347de493 626
8222d8de 627 return None
fe7e0c98 628
b6c45014
JMF
629 @staticmethod
630 def add_extra_info(info_dict, extra_info):
631 '''Set the keys from extra_info in info dict if they are missing'''
632 for key, value in extra_info.items():
633 info_dict.setdefault(key, value)
634
7fc3fa05 635 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 636 process=True, force_generic_extractor=False):
8222d8de
JMF
637 '''
638 Returns a list with a dictionary for each video we find.
639 If 'download', also downloads the videos.
640 extra_info is a dict containing the extra values to add to each result
613b2d9d 641 '''
fe7e0c98 642
61aa5ba3 643 if not ie_key and force_generic_extractor:
d22dec74
S
644 ie_key = 'Generic'
645
8222d8de 646 if ie_key:
56c73665 647 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
648 else:
649 ies = self._ies
650
651 for ie in ies:
652 if not ie.suitable(url):
653 continue
654
655 if not ie.working():
6febd1c1
PH
656 self.report_warning('The program functionality for this site has been marked as broken, '
657 'and will probably not work.')
8222d8de
JMF
658
659 try:
660 ie_result = ie.extract(url)
5f6a1245 661 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
662 break
663 if isinstance(ie_result, list):
664 # Backwards compatibility: old IE result format
8222d8de
JMF
665 ie_result = {
666 '_type': 'compat_list',
667 'entries': ie_result,
668 }
ea38e55f 669 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
670 if process:
671 return self.process_ie_result(ie_result, download, extra_info)
672 else:
673 return ie_result
5f6a1245 674 except ExtractorError as de: # An error we somewhat expected
8222d8de
JMF
675 self.report_error(compat_str(de), de.format_traceback())
676 break
d3e5bbf4
PH
677 except MaxDownloadsReached:
678 raise
8222d8de
JMF
679 except Exception as e:
680 if self.params.get('ignoreerrors', False):
681 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
682 break
683 else:
684 raise
685 else:
1a489545 686 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 687
ea38e55f
PH
688 def add_default_extra_info(self, ie_result, ie, url):
689 self.add_extra_info(ie_result, {
690 'extractor': ie.IE_NAME,
691 'webpage_url': url,
692 'webpage_url_basename': url_basename(url),
693 'extractor_key': ie.ie_key(),
694 })
695
8222d8de
JMF
696 def process_ie_result(self, ie_result, download=True, extra_info={}):
697 """
698 Take the result of the ie(may be modified) and resolve all unresolved
699 references (URLs, playlist items).
700
701 It will also download the videos if 'download'.
702 Returns the resolved ie_result.
703 """
704
e8ee972c
PH
705 result_type = ie_result.get('_type', 'video')
706
057a5206
PH
707 if result_type in ('url', 'url_transparent'):
708 extract_flat = self.params.get('extract_flat', False)
709 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
710 extract_flat is True):
057a5206
PH
711 if self.params.get('forcejson', False):
712 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
713 return ie_result
714
8222d8de 715 if result_type == 'video':
b6c45014 716 self.add_extra_info(ie_result, extra_info)
feee2ecf 717 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
718 elif result_type == 'url':
719 # We have to add extra_info to the results because it may be
720 # contained in a playlist
721 return self.extract_info(ie_result['url'],
722 download,
723 ie_key=ie_result.get('ie_key'),
724 extra_info=extra_info)
7fc3fa05
PH
725 elif result_type == 'url_transparent':
726 # Use the information from the embedding page
727 info = self.extract_info(
728 ie_result['url'], ie_key=ie_result.get('ie_key'),
729 extra_info=extra_info, download=False, process=False)
730
412c617d
PH
731 force_properties = dict(
732 (k, v) for k, v in ie_result.items() if v is not None)
733 for f in ('_type', 'url'):
734 if f in force_properties:
735 del force_properties[f]
736 new_result = info.copy()
737 new_result.update(force_properties)
7fc3fa05
PH
738
739 assert new_result.get('_type') != 'url_transparent'
7fc3fa05
PH
740
741 return self.process_ie_result(
742 new_result, download=download, extra_info=extra_info)
42e12102 743 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de
JMF
744 # We process each entry in the playlist
745 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 746 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
747
748 playlist_results = []
749
8222d8de 750 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
751 playlistend = self.params.get('playlistend', None)
752 # For backwards compatibility, interpret -1 as whole list
8222d8de 753 if playlistend == -1:
a19fd00c 754 playlistend = None
8222d8de 755
c14e88f0
PH
756 playlistitems_str = self.params.get('playlist_items', None)
757 playlistitems = None
758 if playlistitems_str is not None:
759 def iter_playlistitems(format):
760 for string_segment in format.split(','):
761 if '-' in string_segment:
762 start, end = string_segment.split('-')
763 for item in range(int(start), int(end) + 1):
764 yield int(item)
765 else:
766 yield int(string_segment)
767 playlistitems = iter_playlistitems(playlistitems_str)
768
b82f815f
PH
769 ie_entries = ie_result['entries']
770 if isinstance(ie_entries, list):
771 n_all_entries = len(ie_entries)
c14e88f0 772 if playlistitems:
3884dcf3
JMF
773 entries = [
774 ie_entries[i - 1] for i in playlistitems
775 if -n_all_entries <= i - 1 < n_all_entries]
c14e88f0
PH
776 else:
777 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
778 n_entries = len(entries)
779 self.to_screen(
780 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
781 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 782 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
783 if playlistitems:
784 entries = []
785 for item in playlistitems:
786 entries.extend(ie_entries.getslice(
787 item - 1, item
788 ))
789 else:
790 entries = ie_entries.getslice(
791 playliststart, playlistend)
b7ab0590
PH
792 n_entries = len(entries)
793 self.to_screen(
794 "[%s] playlist %s: Downloading %d videos" %
795 (ie_result['extractor'], playlist, n_entries))
b82f815f 796 else: # iterable
c14e88f0
PH
797 if playlistitems:
798 entry_list = list(ie_entries)
799 entries = [entry_list[i - 1] for i in playlistitems]
800 else:
801 entries = list(itertools.islice(
802 ie_entries, playliststart, playlistend))
b82f815f
PH
803 n_entries = len(entries)
804 self.to_screen(
805 "[%s] playlist %s: Downloading %d videos" %
806 (ie_result['extractor'], playlist, n_entries))
8222d8de 807
ff815fe6
MS
808 if self.params.get('playlistreverse', False):
809 entries = entries[::-1]
810
fe7e0c98 811 for i, entry in enumerate(entries, 1):
734ea11e 812 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
8222d8de 813 extra = {
c6b4132a 814 'n_entries': n_entries,
fe7e0c98 815 'playlist': playlist,
a1cf99d0
PH
816 'playlist_id': ie_result.get('id'),
817 'playlist_title': ie_result.get('title'),
fe7e0c98 818 'playlist_index': i + playliststart,
b6c45014 819 'extractor': ie_result['extractor'],
9103bbc5 820 'webpage_url': ie_result['webpage_url'],
29eb5174 821 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 822 'extractor_key': ie_result['extractor_key'],
fe7e0c98 823 }
7012b23c 824
442c37b7 825 reason = self._match_entry(entry, incomplete=True)
7012b23c 826 if reason is not None:
6febd1c1 827 self.to_screen('[download] ' + reason)
7012b23c
PH
828 continue
829
8222d8de
JMF
830 entry_result = self.process_ie_result(entry,
831 download=download,
832 extra_info=extra)
833 playlist_results.append(entry_result)
834 ie_result['entries'] = playlist_results
835 return ie_result
836 elif result_type == 'compat_list':
c9bf4114
PH
837 self.report_warning(
838 'Extractor %s returned a compat_list result. '
839 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 840
8222d8de 841 def _fixup(r):
9e1a5b84
JW
842 self.add_extra_info(
843 r,
9103bbc5
JMF
844 {
845 'extractor': ie_result['extractor'],
846 'webpage_url': ie_result['webpage_url'],
29eb5174 847 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 848 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
849 }
850 )
8222d8de
JMF
851 return r
852 ie_result['entries'] = [
b6c45014 853 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
854 for r in ie_result['entries']
855 ]
856 return ie_result
857 else:
858 raise Exception('Invalid result type: %s' % result_type)
859
67134eab
JMF
860 def _build_format_filter(self, filter_spec):
861 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
862
863 OPERATORS = {
864 '<': operator.lt,
865 '<=': operator.le,
866 '>': operator.gt,
867 '>=': operator.ge,
868 '=': operator.eq,
869 '!=': operator.ne,
870 }
67134eab 871 operator_rex = re.compile(r'''(?x)\s*
2ec19e95 872 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
083c9df9
PH
873 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
874 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 875 $
083c9df9 876 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 877 m = operator_rex.search(filter_spec)
9ddb6925
S
878 if m:
879 try:
880 comparison_value = int(m.group('value'))
881 except ValueError:
882 comparison_value = parse_filesize(m.group('value'))
883 if comparison_value is None:
884 comparison_value = parse_filesize(m.group('value') + 'B')
885 if comparison_value is None:
886 raise ValueError(
887 'Invalid value %r in format specification %r' % (
67134eab 888 m.group('value'), filter_spec))
9ddb6925
S
889 op = OPERATORS[m.group('op')]
890
083c9df9 891 if not m:
9ddb6925
S
892 STR_OPERATORS = {
893 '=': operator.eq,
894 '!=': operator.ne,
895 }
67134eab 896 str_operator_rex = re.compile(r'''(?x)
9ddb6925
S
897 \s*(?P<key>ext|acodec|vcodec|container|protocol)
898 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
899 \s*(?P<value>[a-zA-Z0-9_-]+)
67134eab 900 \s*$
9ddb6925 901 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 902 m = str_operator_rex.search(filter_spec)
9ddb6925
S
903 if m:
904 comparison_value = m.group('value')
905 op = STR_OPERATORS[m.group('op')]
083c9df9 906
9ddb6925 907 if not m:
67134eab 908 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
909
910 def _filter(f):
911 actual_value = f.get(m.group('key'))
912 if actual_value is None:
913 return m.group('none_inclusive')
914 return op(actual_value, comparison_value)
67134eab
JMF
915 return _filter
916
917 def build_format_selector(self, format_spec):
918 def syntax_error(note, start):
919 message = (
920 'Invalid format specification: '
921 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
922 return SyntaxError(message)
923
924 PICKFIRST = 'PICKFIRST'
925 MERGE = 'MERGE'
926 SINGLE = 'SINGLE'
0130afb7 927 GROUP = 'GROUP'
67134eab
JMF
928 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
929
930 def _parse_filter(tokens):
931 filter_parts = []
932 for type, string, start, _, _ in tokens:
933 if type == tokenize.OP and string == ']':
934 return ''.join(filter_parts)
935 else:
936 filter_parts.append(string)
937
232541df
JMF
938 def _remove_unused_ops(tokens):
939 # Remove operators that we don't use and join them with the sourrounding strings
940 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
941 ALLOWED_OPS = ('/', '+', ',', '(', ')')
942 last_string, last_start, last_end, last_line = None, None, None, None
943 for type, string, start, end, line in tokens:
944 if type == tokenize.OP and string == '[':
945 if last_string:
946 yield tokenize.NAME, last_string, last_start, last_end, last_line
947 last_string = None
948 yield type, string, start, end, line
949 # everything inside brackets will be handled by _parse_filter
950 for type, string, start, end, line in tokens:
951 yield type, string, start, end, line
952 if type == tokenize.OP and string == ']':
953 break
954 elif type == tokenize.OP and string in ALLOWED_OPS:
955 if last_string:
956 yield tokenize.NAME, last_string, last_start, last_end, last_line
957 last_string = None
958 yield type, string, start, end, line
959 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
960 if not last_string:
961 last_string = string
962 last_start = start
963 last_end = end
964 else:
965 last_string += string
966 if last_string:
967 yield tokenize.NAME, last_string, last_start, last_end, last_line
968
cf2ac6df 969 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
970 selectors = []
971 current_selector = None
972 for type, string, start, _, _ in tokens:
973 # ENCODING is only defined in python 3.x
974 if type == getattr(tokenize, 'ENCODING', None):
975 continue
976 elif type in [tokenize.NAME, tokenize.NUMBER]:
977 current_selector = FormatSelector(SINGLE, string, [])
978 elif type == tokenize.OP:
cf2ac6df
JMF
979 if string == ')':
980 if not inside_group:
981 # ')' will be handled by the parentheses group
982 tokens.restore_last_token()
67134eab 983 break
cf2ac6df 984 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
985 tokens.restore_last_token()
986 break
cf2ac6df
JMF
987 elif inside_choice and string == ',':
988 tokens.restore_last_token()
989 break
990 elif string == ',':
0a31a350
JMF
991 if not current_selector:
992 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
993 selectors.append(current_selector)
994 current_selector = None
995 elif string == '/':
d96d604e
JMF
996 if not current_selector:
997 raise syntax_error('"/" must follow a format selector', start)
67134eab 998 first_choice = current_selector
cf2ac6df 999 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1000 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1001 elif string == '[':
1002 if not current_selector:
1003 current_selector = FormatSelector(SINGLE, 'best', [])
1004 format_filter = _parse_filter(tokens)
1005 current_selector.filters.append(format_filter)
0130afb7
JMF
1006 elif string == '(':
1007 if current_selector:
1008 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1009 group = _parse_format_selection(tokens, inside_group=True)
1010 current_selector = FormatSelector(GROUP, group, [])
67134eab
JMF
1011 elif string == '+':
1012 video_selector = current_selector
cf2ac6df 1013 audio_selector = _parse_format_selection(tokens, inside_merge=True)
0a31a350
JMF
1014 if not video_selector or not audio_selector:
1015 raise syntax_error('"+" must be between two format selectors', start)
cf2ac6df 1016 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
67134eab
JMF
1017 else:
1018 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1019 elif type == tokenize.ENDMARKER:
1020 break
1021 if current_selector:
1022 selectors.append(current_selector)
1023 return selectors
1024
1025 def _build_selector_function(selector):
1026 if isinstance(selector, list):
1027 fs = [_build_selector_function(s) for s in selector]
1028
1029 def selector_function(formats):
1030 for f in fs:
1031 for format in f(formats):
1032 yield format
1033 return selector_function
0130afb7
JMF
1034 elif selector.type == GROUP:
1035 selector_function = _build_selector_function(selector.selector)
67134eab
JMF
1036 elif selector.type == PICKFIRST:
1037 fs = [_build_selector_function(s) for s in selector.selector]
1038
1039 def selector_function(formats):
1040 for f in fs:
1041 picked_formats = list(f(formats))
1042 if picked_formats:
1043 return picked_formats
1044 return []
1045 elif selector.type == SINGLE:
1046 format_spec = selector.selector
1047
1048 def selector_function(formats):
bb8e5536
JMF
1049 formats = list(formats)
1050 if not formats:
1051 return
5acfa126
JMF
1052 if format_spec == 'all':
1053 for f in formats:
1054 yield f
1055 elif format_spec in ['best', 'worst', None]:
67134eab
JMF
1056 format_idx = 0 if format_spec == 'worst' else -1
1057 audiovideo_formats = [
1058 f for f in formats
1059 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1060 if audiovideo_formats:
1061 yield audiovideo_formats[format_idx]
1062 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1063 elif (all(f.get('acodec') != 'none' for f in formats) or
1064 all(f.get('vcodec') != 'none' for f in formats)):
1065 yield formats[format_idx]
1066 elif format_spec == 'bestaudio':
1067 audio_formats = [
1068 f for f in formats
1069 if f.get('vcodec') == 'none']
1070 if audio_formats:
1071 yield audio_formats[-1]
1072 elif format_spec == 'worstaudio':
1073 audio_formats = [
1074 f for f in formats
1075 if f.get('vcodec') == 'none']
1076 if audio_formats:
1077 yield audio_formats[0]
1078 elif format_spec == 'bestvideo':
1079 video_formats = [
1080 f for f in formats
1081 if f.get('acodec') == 'none']
1082 if video_formats:
1083 yield video_formats[-1]
1084 elif format_spec == 'worstvideo':
1085 video_formats = [
1086 f for f in formats
1087 if f.get('acodec') == 'none']
1088 if video_formats:
1089 yield video_formats[0]
1090 else:
1091 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1092 if format_spec in extensions:
1093 filter_f = lambda f: f['ext'] == format_spec
1094 else:
1095 filter_f = lambda f: f['format_id'] == format_spec
1096 matches = list(filter(filter_f, formats))
1097 if matches:
1098 yield matches[-1]
1099 elif selector.type == MERGE:
1100 def _merge(formats_info):
1101 format_1, format_2 = [f['format_id'] for f in formats_info]
1102 # The first format must contain the video and the
1103 # second the audio
1104 if formats_info[0].get('vcodec') == 'none':
1105 self.report_error('The first format must '
1106 'contain the video, try using '
1107 '"-f %s+%s"' % (format_2, format_1))
1108 return
1109 output_ext = (
1110 formats_info[0]['ext']
1111 if self.params.get('merge_output_format') is None
1112 else self.params['merge_output_format'])
1113 return {
1114 'requested_formats': formats_info,
1115 'format': '%s+%s' % (formats_info[0].get('format'),
1116 formats_info[1].get('format')),
1117 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1118 formats_info[1].get('format_id')),
1119 'width': formats_info[0].get('width'),
1120 'height': formats_info[0].get('height'),
1121 'resolution': formats_info[0].get('resolution'),
1122 'fps': formats_info[0].get('fps'),
1123 'vcodec': formats_info[0].get('vcodec'),
1124 'vbr': formats_info[0].get('vbr'),
1125 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1126 'acodec': formats_info[1].get('acodec'),
1127 'abr': formats_info[1].get('abr'),
1128 'ext': output_ext,
1129 }
1130 video_selector, audio_selector = map(_build_selector_function, selector.selector)
083c9df9 1131
67134eab
JMF
1132 def selector_function(formats):
1133 formats = list(formats)
1134 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1135 yield _merge(pair)
083c9df9 1136
67134eab 1137 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1138
67134eab
JMF
1139 def final_selector(formats):
1140 for _filter in filters:
1141 formats = list(filter(_filter, formats))
1142 return selector_function(formats)
1143 return final_selector
083c9df9 1144
67134eab 1145 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1146 try:
232541df 1147 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1148 except tokenize.TokenError:
1149 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1150
1151 class TokenIterator(object):
1152 def __init__(self, tokens):
1153 self.tokens = tokens
1154 self.counter = 0
1155
1156 def __iter__(self):
1157 return self
1158
1159 def __next__(self):
1160 if self.counter >= len(self.tokens):
1161 raise StopIteration()
1162 value = self.tokens[self.counter]
1163 self.counter += 1
1164 return value
1165
1166 next = __next__
1167
1168 def restore_last_token(self):
1169 self.counter -= 1
1170
1171 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1172 return _build_selector_function(parsed_selector)
a9c58ad9 1173
e5660ee6
JMF
1174 def _calc_headers(self, info_dict):
1175 res = std_headers.copy()
1176
1177 add_headers = info_dict.get('http_headers')
1178 if add_headers:
1179 res.update(add_headers)
1180
1181 cookies = self._calc_cookies(info_dict)
1182 if cookies:
1183 res['Cookie'] = cookies
1184
1185 return res
1186
1187 def _calc_cookies(self, info_dict):
662435f7 1188 pr = compat_urllib_request.Request(info_dict['url'])
e5660ee6 1189 self.cookiejar.add_cookie_header(pr)
662435f7 1190 return pr.get_header('Cookie')
e5660ee6 1191
dd82ffea
JMF
1192 def process_video_result(self, info_dict, download=True):
1193 assert info_dict.get('_type', 'video') == 'video'
1194
bec1fad2
PH
1195 if 'id' not in info_dict:
1196 raise ExtractorError('Missing "id" field in extractor result')
1197 if 'title' not in info_dict:
1198 raise ExtractorError('Missing "title" field in extractor result')
1199
dd82ffea
JMF
1200 if 'playlist' not in info_dict:
1201 # It isn't part of a playlist
1202 info_dict['playlist'] = None
1203 info_dict['playlist_index'] = None
1204
d5519808 1205 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1206 if thumbnails is None:
1207 thumbnail = info_dict.get('thumbnail')
1208 if thumbnail:
a7a14d95 1209 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1210 if thumbnails:
be6d7229 1211 thumbnails.sort(key=lambda t: (
cfb56d1a
PH
1212 t.get('preference'), t.get('width'), t.get('height'),
1213 t.get('id'), t.get('url')))
f6c24009 1214 for i, t in enumerate(thumbnails):
9603e8a7 1215 if t.get('width') and t.get('height'):
d5519808 1216 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1217 if t.get('id') is None:
1218 t['id'] = '%d' % i
d5519808
PH
1219
1220 if thumbnails and 'thumbnail' not in info_dict:
1221 info_dict['thumbnail'] = thumbnails[-1]['url']
1222
c9ae7b95 1223 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1224 info_dict['display_id'] = info_dict['id']
1225
955c4514 1226 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1227 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1228 # see http://bugs.python.org/issue1646728)
1229 try:
1230 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1231 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1232 except (ValueError, OverflowError, OSError):
1233 pass
9d2ecdbc 1234
a504ced0 1235 if self.params.get('listsubtitles', False):
360e1ca5
JMF
1236 if 'automatic_captions' in info_dict:
1237 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1238 self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
a504ced0 1239 return
360e1ca5
JMF
1240 info_dict['requested_subtitles'] = self.process_subtitles(
1241 info_dict['id'], info_dict.get('subtitles'),
1242 info_dict.get('automatic_captions'))
a504ced0 1243
dd82ffea
JMF
1244 # We now pick which formats have to be downloaded
1245 if info_dict.get('formats') is None:
1246 # There's only one format available
1247 formats = [info_dict]
1248 else:
1249 formats = info_dict['formats']
1250
db95dc13
PH
1251 if not formats:
1252 raise ExtractorError('No video formats found!')
1253
181c7053
S
1254 formats_dict = {}
1255
dd82ffea 1256 # We check that all the formats have the format and format_id fields
db95dc13 1257 for i, format in enumerate(formats):
bec1fad2
PH
1258 if 'url' not in format:
1259 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1260
dd82ffea 1261 if format.get('format_id') is None:
8016c922 1262 format['format_id'] = compat_str(i)
181c7053
S
1263 format_id = format['format_id']
1264 if format_id not in formats_dict:
1265 formats_dict[format_id] = []
1266 formats_dict[format_id].append(format)
1267
1268 # Make sure all formats have unique format_id
1269 for format_id, ambiguous_formats in formats_dict.items():
1270 if len(ambiguous_formats) > 1:
1271 for i, format in enumerate(ambiguous_formats):
1272 format['format_id'] = '%s-%d' % (format_id, i)
1273
1274 for i, format in enumerate(formats):
8c51aa65 1275 if format.get('format') is None:
6febd1c1 1276 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1277 id=format['format_id'],
1278 res=self.format_resolution(format),
6febd1c1 1279 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1280 )
c1002e96
PH
1281 # Automatically determine file extension if missing
1282 if 'ext' not in format:
cce929ea 1283 format['ext'] = determine_ext(format['url']).lower()
e5660ee6
JMF
1284 # Add HTTP headers, so that external programs can use them from the
1285 # json output
1286 full_format_info = info_dict.copy()
1287 full_format_info.update(format)
1288 format['http_headers'] = self._calc_headers(full_format_info)
dd82ffea 1289
4bcc7bd1 1290 # TODO Central sorting goes here
99e206d5 1291
f89197d7 1292 if formats[0] is not info_dict:
b3d9ef88
JMF
1293 # only set the 'formats' fields if the original info_dict list them
1294 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1295 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
1296 # wich can't be exported to json
1297 info_dict['formats'] = formats
cfb56d1a 1298 if self.params.get('listformats'):
bfaae0a7 1299 self.list_formats(info_dict)
1300 return
cfb56d1a
PH
1301 if self.params.get('list_thumbnails'):
1302 self.list_thumbnails(info_dict)
1303 return
bfaae0a7 1304
de3ef3ed 1305 req_format = self.params.get('format')
a9c58ad9 1306 if req_format is None:
feccf29c 1307 req_format_list = []
3749e36e 1308 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
8250c32f
JMF
1309 info_dict['extractor'] in ['youtube', 'ted'] and
1310 not info_dict.get('is_live')):
7fcb605b 1311 merger = FFmpegMergerPP(self)
97fcf1bb 1312 if merger.available and merger.can_merge():
7fcb605b 1313 req_format_list.append('bestvideo+bestaudio')
feccf29c
S
1314 req_format_list.append('best')
1315 req_format = '/'.join(req_format_list)
5acfa126
JMF
1316 format_selector = self.build_format_selector(req_format)
1317 formats_to_download = list(format_selector(formats))
dd82ffea 1318 if not formats_to_download:
6febd1c1 1319 raise ExtractorError('requested format not available',
78a3a9f8 1320 expected=True)
dd82ffea
JMF
1321
1322 if download:
1323 if len(formats_to_download) > 1:
6febd1c1 1324 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1325 for format in formats_to_download:
1326 new_info = dict(info_dict)
1327 new_info.update(format)
1328 self.process_info(new_info)
1329 # We update the info dict with the best quality format (backwards compatibility)
1330 info_dict.update(formats_to_download[-1])
1331 return info_dict
1332
98c70d6f 1333 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1334 """Select the requested subtitles and their format"""
98c70d6f
JMF
1335 available_subs = {}
1336 if normal_subtitles and self.params.get('writesubtitles'):
1337 available_subs.update(normal_subtitles)
1338 if automatic_captions and self.params.get('writeautomaticsub'):
1339 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1340 if lang not in available_subs:
1341 available_subs[lang] = cap_info
1342
4d171848
JMF
1343 if (not self.params.get('writesubtitles') and not
1344 self.params.get('writeautomaticsub') or not
1345 available_subs):
1346 return None
a504ced0
JMF
1347
1348 if self.params.get('allsubtitles', False):
1349 requested_langs = available_subs.keys()
1350 else:
1351 if self.params.get('subtitleslangs', False):
1352 requested_langs = self.params.get('subtitleslangs')
1353 elif 'en' in available_subs:
1354 requested_langs = ['en']
1355 else:
1356 requested_langs = [list(available_subs.keys())[0]]
1357
1358 formats_query = self.params.get('subtitlesformat', 'best')
1359 formats_preference = formats_query.split('/') if formats_query else []
1360 subs = {}
1361 for lang in requested_langs:
1362 formats = available_subs.get(lang)
1363 if formats is None:
1364 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1365 continue
a504ced0
JMF
1366 for ext in formats_preference:
1367 if ext == 'best':
1368 f = formats[-1]
1369 break
1370 matches = list(filter(lambda f: f['ext'] == ext, formats))
1371 if matches:
1372 f = matches[-1]
1373 break
1374 else:
1375 f = formats[-1]
1376 self.report_warning(
1377 'No subtitle format found matching "%s" for language %s, '
1378 'using %s' % (formats_query, lang, f['ext']))
1379 subs[lang] = f
1380 return subs
1381
8222d8de
JMF
1382 def process_info(self, info_dict):
1383 """Process a single resolved IE result."""
1384
1385 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1386
1387 max_downloads = self.params.get('max_downloads')
1388 if max_downloads is not None:
1389 if self._num_downloads >= int(max_downloads):
1390 raise MaxDownloadsReached()
8222d8de
JMF
1391
1392 info_dict['fulltitle'] = info_dict['title']
1393 if len(info_dict['title']) > 200:
6febd1c1 1394 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de 1395
11b85ce6 1396 if 'format' not in info_dict:
8222d8de
JMF
1397 info_dict['format'] = info_dict['ext']
1398
442c37b7 1399 reason = self._match_entry(info_dict, incomplete=False)
8222d8de 1400 if reason is not None:
6febd1c1 1401 self.to_screen('[download] ' + reason)
8222d8de
JMF
1402 return
1403
fd288278 1404 self._num_downloads += 1
8222d8de 1405
e72c7e41 1406 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1407
1408 # Forced printings
1409 if self.params.get('forcetitle', False):
0783b09b 1410 self.to_stdout(info_dict['fulltitle'])
8222d8de 1411 if self.params.get('forceid', False):
0783b09b 1412 self.to_stdout(info_dict['id'])
8222d8de 1413 if self.params.get('forceurl', False):
16ae61f6 1414 if info_dict.get('requested_formats') is not None:
1415 for f in info_dict['requested_formats']:
1416 self.to_stdout(f['url'] + f.get('play_path', ''))
1417 else:
1418 # For RTMP URLs, also include the playpath
1419 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 1420 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 1421 self.to_stdout(info_dict['thumbnail'])
216d71d0 1422 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 1423 self.to_stdout(info_dict['description'])
8222d8de 1424 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 1425 self.to_stdout(filename)
525ef922
PH
1426 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1427 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 1428 if self.params.get('forceformat', False):
0783b09b 1429 self.to_stdout(info_dict['format'])
9d153818 1430 if self.params.get('forcejson', False):
0783b09b 1431 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
1432
1433 # Do nothing else if in simulate mode
1434 if self.params.get('simulate', False):
1435 return
1436
1437 if filename is None:
1438 return
1439
1440 try:
e5a11a22 1441 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
d26e981d 1442 if dn and not os.path.exists(dn):
8222d8de
JMF
1443 os.makedirs(dn)
1444 except (OSError, IOError) as err:
6febd1c1 1445 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
1446 return
1447
1448 if self.params.get('writedescription', False):
2699da80 1449 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
7b6fefc9 1450 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1451 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1452 elif info_dict.get('description') is None:
1453 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1454 else:
1455 try:
6febd1c1 1456 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1457 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1458 descfile.write(info_dict['description'])
7b6fefc9 1459 except (OSError, IOError):
6febd1c1 1460 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1461 return
8222d8de 1462
1fb07d10 1463 if self.params.get('writeannotations', False):
98727e12 1464 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
7b6fefc9 1465 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1466 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
1467 else:
1468 try:
6febd1c1 1469 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1470 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1471 annofile.write(info_dict['annotations'])
1472 except (KeyError, TypeError):
6febd1c1 1473 self.report_warning('There are no annotations to write.')
7b6fefc9 1474 except (OSError, IOError):
6febd1c1 1475 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1476 return
1fb07d10 1477
c4a91be7 1478 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1479 self.params.get('writeautomaticsub')])
c4a91be7 1480
c84dd8a9 1481 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1482 # subtitles download errors are already managed as troubles in relevant IE
1483 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1484 subtitles = info_dict['requested_subtitles']
0f2c0d33 1485 ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1486 for sub_lang, sub_info in subtitles.items():
1487 sub_format = sub_info['ext']
1488 if sub_info.get('data') is not None:
1489 sub_data = sub_info['data']
1490 else:
1491 try:
0f2c0d33
JMF
1492 sub_data = ie._download_webpage(
1493 sub_info['url'], info_dict['id'], note=False)
1494 except ExtractorError as err:
a504ced0 1495 self.report_warning('Unable to download subtitle for "%s": %s' %
0f2c0d33 1496 (sub_lang, compat_str(err.cause)))
a504ced0 1497 continue
8222d8de 1498 try:
d4051a8e 1499 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1500 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1501 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1502 else:
6febd1c1 1503 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9 1504 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
a504ced0 1505 subfile.write(sub_data)
8222d8de 1506 except (OSError, IOError):
e4db1951 1507 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1508 return
1509
8222d8de 1510 if self.params.get('writeinfojson', False):
b29e0000 1511 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
7b6fefc9 1512 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1513 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1514 else:
6febd1c1 1515 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1516 try:
cb202fd2 1517 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 1518 except (OSError, IOError):
6febd1c1 1519 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1520 return
8222d8de 1521
ec82d85a 1522 self._write_thumbnails(info_dict, filename)
8222d8de
JMF
1523
1524 if not self.params.get('skip_download', False):
4340deca
P
1525 try:
1526 def dl(name, info):
a055469f 1527 fd = get_suitable_downloader(info, self.params)(self, self.params)
4340deca
P
1528 for ph in self._progress_hooks:
1529 fd.add_progress_hook(ph)
1530 if self.params.get('verbose'):
1531 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1532 return fd.download(name, info)
ee69b99a 1533
4340deca
P
1534 if info_dict.get('requested_formats') is not None:
1535 downloaded = []
1536 success = True
d47aeb22 1537 merger = FFmpegMergerPP(self)
f740fae2 1538 if not merger.available:
4340deca
P
1539 postprocessors = []
1540 self.report_warning('You have requested multiple '
1541 'formats but ffmpeg or avconv are not installed.'
4a5a898a 1542 ' The formats won\'t be merged.')
6350728b 1543 else:
4340deca 1544 postprocessors = [merger]
81cd954a
S
1545
1546 def compatible_formats(formats):
1547 video, audio = formats
1548 # Check extension
1549 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1550 if video_ext and audio_ext:
1551 COMPATIBLE_EXTS = (
6728187a 1552 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
81cd954a
S
1553 ('webm')
1554 )
1555 for exts in COMPATIBLE_EXTS:
1556 if video_ext in exts and audio_ext in exts:
1557 return True
1558 # TODO: Check acodec/vcodec
1559 return False
1560
38c6902b
S
1561 filename_real_ext = os.path.splitext(filename)[1][1:]
1562 filename_wo_ext = (
1563 os.path.splitext(filename)[0]
1564 if filename_real_ext == info_dict['ext']
1565 else filename)
81cd954a 1566 requested_formats = info_dict['requested_formats']
c0dea0a7 1567 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 1568 info_dict['ext'] = 'mkv'
4a5a898a
S
1569 self.report_warning(
1570 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
1571 # Ensure filename always has a correct extension for successful merge
1572 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
5b5fbc08
JMF
1573 if os.path.exists(encodeFilename(filename)):
1574 self.to_screen(
1575 '[download] %s has already been downloaded and '
1576 'merged' % filename)
1577 else:
81cd954a 1578 for f in requested_formats:
5b5fbc08
JMF
1579 new_info = dict(info_dict)
1580 new_info.update(f)
1581 fname = self.prepare_filename(new_info)
666a9a2b 1582 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
5b5fbc08
JMF
1583 downloaded.append(fname)
1584 partial_success = dl(fname, new_info)
1585 success = success and partial_success
1586 info_dict['__postprocessors'] = postprocessors
1587 info_dict['__files_to_merge'] = downloaded
4340deca
P
1588 else:
1589 # Just a single file
1590 success = dl(filename, info_dict)
1591 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1592 self.report_error('unable to download video data: %s' % str(err))
1593 return
1594 except (OSError, IOError) as err:
1595 raise UnavailableVideoError(err)
1596 except (ContentTooShortError, ) as err:
1597 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1598 return
8222d8de
JMF
1599
1600 if success:
6271f1ca 1601 # Fixup content
62cd676c
PH
1602 fixup_policy = self.params.get('fixup')
1603 if fixup_policy is None:
1604 fixup_policy = 'detect_or_warn'
1605
6271f1ca
PH
1606 stretched_ratio = info_dict.get('stretched_ratio')
1607 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
1608 if fixup_policy == 'warn':
1609 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1610 info_dict['id'], stretched_ratio))
1611 elif fixup_policy == 'detect_or_warn':
1612 stretched_pp = FFmpegFixupStretchedPP(self)
1613 if stretched_pp.available:
1614 info_dict.setdefault('__postprocessors', [])
1615 info_dict['__postprocessors'].append(stretched_pp)
1616 else:
1617 self.report_warning(
1618 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1619 info_dict['id'], stretched_ratio))
1620 else:
62cd676c
PH
1621 assert fixup_policy in ('ignore', 'never')
1622
1623 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1624 if fixup_policy == 'warn':
1625 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1626 info_dict['id']))
1627 elif fixup_policy == 'detect_or_warn':
1628 fixup_pp = FFmpegFixupM4aPP(self)
1629 if fixup_pp.available:
1630 info_dict.setdefault('__postprocessors', [])
1631 info_dict['__postprocessors'].append(fixup_pp)
1632 else:
1633 self.report_warning(
1634 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1635 info_dict['id']))
1636 else:
1637 assert fixup_policy in ('ignore', 'never')
6271f1ca 1638
8222d8de
JMF
1639 try:
1640 self.post_process(filename, info_dict)
1641 except (PostProcessingError) as err:
6febd1c1 1642 self.report_error('postprocessing: %s' % str(err))
8222d8de 1643 return
cd58dc3e 1644 self.record_download_archive(info_dict)
8222d8de
JMF
1645
1646 def download(self, url_list):
1647 """Download a given list of URLs."""
acd69589 1648 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1649 if (len(url_list) > 1 and
8fb3ac36
PH
1650 '%' not in outtmpl and
1651 self.params.get('max_downloads') != 1):
acd69589 1652 raise SameFileError(outtmpl)
8222d8de
JMF
1653
1654 for url in url_list:
1655 try:
5f6a1245 1656 # It also downloads the videos
61aa5ba3
S
1657 res = self.extract_info(
1658 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 1659 except UnavailableVideoError:
6febd1c1 1660 self.report_error('unable to download video')
8222d8de 1661 except MaxDownloadsReached:
6febd1c1 1662 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1663 raise
63e0be34
PH
1664 else:
1665 if self.params.get('dump_single_json', False):
1666 self.to_stdout(json.dumps(res))
8222d8de
JMF
1667
1668 return self._download_retcode
1669
1dcc4c0c 1670 def download_with_info_file(self, info_filename):
31bd3925
JMF
1671 with contextlib.closing(fileinput.FileInput(
1672 [info_filename], mode='r',
1673 openhook=fileinput.hook_encoded('utf-8'))) as f:
1674 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 1675 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
1676 try:
1677 self.process_ie_result(info, download=True)
1678 except DownloadError:
1679 webpage_url = info.get('webpage_url')
1680 if webpage_url is not None:
6febd1c1 1681 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1682 return self.download([webpage_url])
1683 else:
1684 raise
1685 return self._download_retcode
1dcc4c0c 1686
cb202fd2
S
1687 @staticmethod
1688 def filter_requested_info(info_dict):
1689 return dict(
1690 (k, v) for k, v in info_dict.items()
1691 if k not in ['requested_formats', 'requested_subtitles'])
1692
8222d8de
JMF
1693 def post_process(self, filename, ie_info):
1694 """Run all the postprocessors on the given file."""
1695 info = dict(ie_info)
1696 info['filepath'] = filename
6350728b
JMF
1697 pps_chain = []
1698 if ie_info.get('__postprocessors') is not None:
1699 pps_chain.extend(ie_info['__postprocessors'])
1700 pps_chain.extend(self._pps)
1701 for pp in pps_chain:
71646e46 1702 files_to_delete = []
8222d8de 1703 try:
592e97e8 1704 files_to_delete, info = pp.run(info)
8222d8de 1705 except PostProcessingError as e:
bbcbf4d4 1706 self.report_error(e.msg)
592e97e8
JMF
1707 if files_to_delete and not self.params.get('keepvideo', False):
1708 for old_filename in files_to_delete:
f3ff1a36 1709 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
1710 try:
1711 os.remove(encodeFilename(old_filename))
1712 except (IOError, OSError):
1713 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 1714
5db07df6
PH
1715 def _make_archive_id(self, info_dict):
1716 # Future-proof against any change in case
1717 # and backwards compatibility with prior versions
d31209a1 1718 extractor = info_dict.get('extractor_key')
7012b23c
PH
1719 if extractor is None:
1720 if 'id' in info_dict:
1721 extractor = info_dict.get('ie_key') # key in a playlist
1722 if extractor is None:
5db07df6 1723 return None # Incomplete video information
6febd1c1 1724 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1725
1726 def in_download_archive(self, info_dict):
1727 fn = self.params.get('download_archive')
1728 if fn is None:
1729 return False
1730
1731 vid_id = self._make_archive_id(info_dict)
1732 if vid_id is None:
7012b23c 1733 return False # Incomplete video information
5db07df6 1734
c1c9a79c
PH
1735 try:
1736 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1737 for line in archive_file:
1738 if line.strip() == vid_id:
1739 return True
1740 except IOError as ioe:
1741 if ioe.errno != errno.ENOENT:
1742 raise
1743 return False
1744
1745 def record_download_archive(self, info_dict):
1746 fn = self.params.get('download_archive')
1747 if fn is None:
1748 return
5db07df6
PH
1749 vid_id = self._make_archive_id(info_dict)
1750 assert vid_id
c1c9a79c 1751 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1752 archive_file.write(vid_id + '\n')
dd82ffea 1753
8c51aa65 1754 @staticmethod
8abeeb94 1755 def format_resolution(format, default='unknown'):
fb04e403
PH
1756 if format.get('vcodec') == 'none':
1757 return 'audio only'
f49d89ee
PH
1758 if format.get('resolution') is not None:
1759 return format['resolution']
8c51aa65
JMF
1760 if format.get('height') is not None:
1761 if format.get('width') is not None:
6febd1c1 1762 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1763 else:
6febd1c1 1764 res = '%sp' % format['height']
f49d89ee 1765 elif format.get('width') is not None:
6febd1c1 1766 res = '?x%d' % format['width']
8c51aa65 1767 else:
8abeeb94 1768 res = default
8c51aa65
JMF
1769 return res
1770
c57f7757
PH
1771 def _format_note(self, fdict):
1772 res = ''
1773 if fdict.get('ext') in ['f4f', 'f4m']:
1774 res += '(unsupported) '
1775 if fdict.get('format_note') is not None:
1776 res += fdict['format_note'] + ' '
1777 if fdict.get('tbr') is not None:
1778 res += '%4dk ' % fdict['tbr']
1779 if fdict.get('container') is not None:
1780 if res:
1781 res += ', '
1782 res += '%s container' % fdict['container']
1783 if (fdict.get('vcodec') is not None and
1784 fdict.get('vcodec') != 'none'):
1785 if res:
1786 res += ', '
1787 res += fdict['vcodec']
91c7271a 1788 if fdict.get('vbr') is not None:
c57f7757
PH
1789 res += '@'
1790 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1791 res += 'video@'
1792 if fdict.get('vbr') is not None:
1793 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1794 if fdict.get('fps') is not None:
1795 res += ', %sfps' % fdict['fps']
c57f7757
PH
1796 if fdict.get('acodec') is not None:
1797 if res:
1798 res += ', '
1799 if fdict['acodec'] == 'none':
1800 res += 'video only'
1801 else:
1802 res += '%-5s' % fdict['acodec']
1803 elif fdict.get('abr') is not None:
1804 if res:
1805 res += ', '
1806 res += 'audio'
1807 if fdict.get('abr') is not None:
1808 res += '@%3dk' % fdict['abr']
1809 if fdict.get('asr') is not None:
1810 res += ' (%5dHz)' % fdict['asr']
1811 if fdict.get('filesize') is not None:
1812 if res:
1813 res += ', '
1814 res += format_bytes(fdict['filesize'])
9732d77e
PH
1815 elif fdict.get('filesize_approx') is not None:
1816 if res:
1817 res += ', '
1818 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1819 return res
91c7271a 1820
c57f7757 1821 def list_formats(self, info_dict):
94badb25 1822 formats = info_dict.get('formats', [info_dict])
b81a359e
PH
1823 table = [
1824 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1825 for f in formats
e65566a9 1826 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 1827 if len(formats) > 1:
b81a359e 1828 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
57dd9a8f 1829
b81a359e 1830 header_line = ['format code', 'extension', 'resolution', 'note']
cfb56d1a 1831 self.to_screen(
b81a359e
PH
1832 '[info] Available formats for %s:\n%s' %
1833 (info_dict['id'], render_table(header_line, table)))
cfb56d1a
PH
1834
1835 def list_thumbnails(self, info_dict):
1836 thumbnails = info_dict.get('thumbnails')
1837 if not thumbnails:
1838 tn_url = info_dict.get('thumbnail')
1839 if tn_url:
1840 thumbnails = [{'id': '0', 'url': tn_url}]
1841 else:
1842 self.to_screen(
1843 '[info] No thumbnails present for %s' % info_dict['id'])
1844 return
1845
1846 self.to_screen(
1847 '[info] Thumbnails for %s:' % info_dict['id'])
1848 self.to_screen(render_table(
1849 ['ID', 'width', 'height', 'URL'],
1850 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 1851
360e1ca5 1852 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 1853 if not subtitles:
360e1ca5 1854 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 1855 return
a504ced0 1856 self.to_screen(
edab9dbf
JMF
1857 'Available %s for %s:' % (name, video_id))
1858 self.to_screen(render_table(
1859 ['Language', 'formats'],
1860 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1861 for lang, formats in subtitles.items()]))
a504ced0 1862
dca08720
PH
1863 def urlopen(self, req):
1864 """ Start an HTTP download """
19a41fc6 1865 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1866
1867 def print_debug_header(self):
1868 if not self.params.get('verbose'):
1869 return
62fec3b2 1870
4192b51c
PH
1871 if type('') is not compat_str:
1872 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1873 self.report_warning(
1874 'Your Python is broken! Update to a newer and supported version')
1875
c6afed48
PH
1876 stdout_encoding = getattr(
1877 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1878 encoding_str = (
734f90bb
PH
1879 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1880 locale.getpreferredencoding(),
1881 sys.getfilesystemencoding(),
c6afed48 1882 stdout_encoding,
b0472057 1883 self.get_encoding()))
4192b51c 1884 write_string(encoding_str, encoding=None)
734f90bb
PH
1885
1886 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1887 try:
1888 sp = subprocess.Popen(
1889 ['git', 'rev-parse', '--short', 'HEAD'],
1890 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1891 cwd=os.path.dirname(os.path.abspath(__file__)))
1892 out, err = sp.communicate()
1893 out = out.decode().strip()
1894 if re.match('[0-9a-f]+', out):
734f90bb 1895 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 1896 except Exception:
dca08720
PH
1897 try:
1898 sys.exc_clear()
70a1165b 1899 except Exception:
dca08720 1900 pass
d28b5171
PH
1901 self._write_string('[debug] Python version %s - %s\n' % (
1902 platform.python_version(), platform_name()))
1903
73fac4e9 1904 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 1905 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1906 exe_str = ', '.join(
1907 '%s %s' % (exe, v)
1908 for exe, v in sorted(exe_versions.items())
1909 if v
1910 )
1911 if not exe_str:
1912 exe_str = 'none'
1913 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1914
1915 proxy_map = {}
1916 for handler in self._opener.handlers:
1917 if hasattr(handler, 'proxies'):
1918 proxy_map.update(handler.proxies)
734f90bb 1919 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1920
58b1f00d
PH
1921 if self.params.get('call_home', False):
1922 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1923 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1924 latest_version = self.urlopen(
1925 'https://yt-dl.org/latest/version').read().decode('utf-8')
1926 if version_tuple(latest_version) > version_tuple(__version__):
1927 self.report_warning(
1928 'You are using an outdated version (newest version: %s)! '
1929 'See https://yt-dl.org/update if you need help updating.' %
1930 latest_version)
1931
e344693b 1932 def _setup_opener(self):
6ad14cab 1933 timeout_val = self.params.get('socket_timeout')
19a41fc6 1934 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1935
dca08720
PH
1936 opts_cookiefile = self.params.get('cookiefile')
1937 opts_proxy = self.params.get('proxy')
1938
1939 if opts_cookiefile is None:
1940 self.cookiejar = compat_cookiejar.CookieJar()
1941 else:
1942 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1943 opts_cookiefile)
1944 if os.access(opts_cookiefile, os.R_OK):
1945 self.cookiejar.load()
1946
6a3f4c3f 1947 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
1948 if opts_proxy is not None:
1949 if opts_proxy == '':
1950 proxies = {}
1951 else:
1952 proxies = {'http': opts_proxy, 'https': opts_proxy}
1953 else:
1954 proxies = compat_urllib_request.getproxies()
1955 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1956 if 'http' in proxies and 'https' not in proxies:
1957 proxies['https'] = proxies['http']
91410c9b 1958 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
1959
1960 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
1961 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1962 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
dca08720 1963 opener = compat_urllib_request.build_opener(
2461f79d
PH
1964 proxy_handler, https_handler, cookie_processor, ydlh)
1965
dca08720
PH
1966 # Delete the default user-agent header, which would otherwise apply in
1967 # cases where our custom HTTP handler doesn't come into play
1968 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1969 opener.addheaders = []
1970 self._opener = opener
62fec3b2
PH
1971
1972 def encode(self, s):
1973 if isinstance(s, bytes):
1974 return s # Already encoded
1975
1976 try:
1977 return s.encode(self.get_encoding())
1978 except UnicodeEncodeError as err:
1979 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1980 raise
1981
1982 def get_encoding(self):
1983 encoding = self.params.get('encoding')
1984 if encoding is None:
1985 encoding = preferredencoding()
1986 return encoding
ec82d85a
PH
1987
1988 def _write_thumbnails(self, info_dict, filename):
1989 if self.params.get('writethumbnail', False):
1990 thumbnails = info_dict.get('thumbnails')
1991 if thumbnails:
1992 thumbnails = [thumbnails[-1]]
1993 elif self.params.get('write_all_thumbnails', False):
1994 thumbnails = info_dict.get('thumbnails')
1995 else:
1996 return
1997
1998 if not thumbnails:
1999 # No thumbnails present, so return immediately
2000 return
2001
2002 for t in thumbnails:
2003 thumb_ext = determine_ext(t['url'], 'jpg')
2004 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2005 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
82245a6d 2006 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
ec82d85a
PH
2007
2008 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2009 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2010 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2011 else:
2012 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2013 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2014 try:
2015 uf = self.urlopen(t['url'])
d3d89c32 2016 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a
PH
2017 shutil.copyfileobj(uf, thumbf)
2018 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2019 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2020 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2021 self.report_warning('Unable to download thumbnail "%s": %s' %
2022 (t['url'], compat_str(err)))