]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
Clarify that automatic subtitles are generated.
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
8222d8de 11import io
b82f815f 12import itertools
8694c600 13import json
62fec3b2 14import locale
083c9df9 15import operator
8222d8de 16import os
dca08720 17import platform
8222d8de
JMF
18import re
19import shutil
dca08720 20import subprocess
8222d8de
JMF
21import socket
22import sys
23import time
67134eab 24import tokenize
8222d8de
JMF
25import traceback
26
1e5b9a95
PH
27if os.name == 'nt':
28 import ctypes
29
8c25f81b 30from .compat import (
dca08720 31 compat_cookiejar,
4644ac55 32 compat_expanduser,
003c69a8 33 compat_get_terminal_size,
ce02ed60 34 compat_http_client,
4f026faf 35 compat_kwargs,
ce02ed60 36 compat_str,
67134eab 37 compat_tokenize_tokenize,
ce02ed60
PH
38 compat_urllib_error,
39 compat_urllib_request,
8b172c2e 40 compat_urllib_request_DataHandler,
8c25f81b
PH
41)
42from .utils import (
ce02ed60
PH
43 ContentTooShortError,
44 date_from_str,
45 DateRange,
acd69589 46 DEFAULT_OUTTMPL,
ce02ed60
PH
47 determine_ext,
48 DownloadError,
49 encodeFilename,
50 ExtractorError,
02dbf93f 51 format_bytes,
525ef922 52 formatSeconds,
ce02ed60 53 locked_file,
dca08720 54 make_HTTPS_handler,
ce02ed60 55 MaxDownloadsReached,
b7ab0590 56 PagedList,
083c9df9 57 parse_filesize,
91410c9b 58 PerRequestProxyHandler,
ce02ed60 59 PostProcessingError,
dca08720 60 platform_name,
ce02ed60 61 preferredencoding,
cfb56d1a 62 render_table,
ce02ed60
PH
63 SameFileError,
64 sanitize_filename,
1bb5c511 65 sanitize_path,
e5660ee6 66 std_headers,
ce02ed60 67 subtitles_filename,
ce02ed60 68 UnavailableVideoError,
29eb5174 69 url_basename,
58b1f00d 70 version_tuple,
ce02ed60
PH
71 write_json_file,
72 write_string,
6a3f4c3f 73 YoutubeDLCookieProcessor,
dca08720 74 YoutubeDLHandler,
6350728b 75 prepend_extension,
b29e0000 76 replace_extension,
7d4111ed 77 args_to_str,
05900629 78 age_restricted,
ce02ed60 79)
a0e07d31 80from .cache import Cache
023fa8c4 81from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 82from .downloader import get_suitable_downloader
4c83c967 83from .downloader.rtmp import rtmpdump_version
4f026faf 84from .postprocessor import (
62cd676c 85 FFmpegFixupM4aPP,
6271f1ca 86 FFmpegFixupStretchedPP,
4f026faf
PH
87 FFmpegMergerPP,
88 FFmpegPostProcessor,
89 get_postprocessor,
90)
dca08720 91from .version import __version__
8222d8de
JMF
92
93
94class YoutubeDL(object):
95 """YoutubeDL class.
96
97 YoutubeDL objects are the ones responsible of downloading the
98 actual video file and writing it to disk if the user has requested
99 it, among some other tasks. In most cases there should be one per
100 program. As, given a video URL, the downloader doesn't know how to
101 extract all the needed information, task that InfoExtractors do, it
102 has to pass the URL to one of them.
103
104 For this, YoutubeDL objects have a method that allows
105 InfoExtractors to be registered in a given order. When it is passed
106 a URL, the YoutubeDL object handles it to the first InfoExtractor it
107 finds that reports being able to handle it. The InfoExtractor extracts
108 all the information about the video or videos the URL refers to, and
109 YoutubeDL process the extracted information, possibly using a File
110 Downloader to download the video.
111
112 YoutubeDL objects accept a lot of parameters. In order not to saturate
113 the object constructor with arguments, it receives a dictionary of
114 options instead. These options are available through the params
115 attribute for the InfoExtractors to use. The YoutubeDL also
116 registers itself as the downloader in charge for the InfoExtractors
117 that are added to it, so this is a "mutual registration".
118
119 Available options:
120
121 username: Username for authentication purposes.
122 password: Password for authentication purposes.
180940e0 123 videopassword: Password for accessing a video.
8222d8de
JMF
124 usenetrc: Use netrc for authentication instead.
125 verbose: Print additional info to stdout.
126 quiet: Do not print messages to stdout.
ad8915b7 127 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
128 forceurl: Force printing final URL.
129 forcetitle: Force printing title.
130 forceid: Force printing ID.
131 forcethumbnail: Force printing thumbnail URL.
132 forcedescription: Force printing description.
133 forcefilename: Force printing final filename.
525ef922 134 forceduration: Force printing duration.
8694c600 135 forcejson: Force printing info_dict as JSON.
63e0be34
PH
136 dump_single_json: Force printing the info_dict of the whole playlist
137 (or video) as a single JSON line.
8222d8de 138 simulate: Do not download the video files.
d8600787 139 format: Video format code. See options.py for more information.
8222d8de
JMF
140 outtmpl: Template for output names.
141 restrictfilenames: Do not allow "&" and spaces in file names
142 ignoreerrors: Do not stop on download errors.
d22dec74 143 force_generic_extractor: Force downloader to use the generic extractor
8222d8de
JMF
144 nooverwrites: Prevent overwriting files.
145 playliststart: Playlist item to start at.
146 playlistend: Playlist item to end at.
c14e88f0 147 playlist_items: Specific indices of playlist to download.
ff815fe6 148 playlistreverse: Download playlist items in reverse order.
8222d8de
JMF
149 matchtitle: Download only matching titles.
150 rejecttitle: Reject downloads for matching titles.
8bf9319e 151 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
152 logtostderr: Log messages to stderr instead of stdout.
153 writedescription: Write the video description to a .description file
154 writeinfojson: Write the video description to a .info.json file
1fb07d10 155 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 156 writethumbnail: Write the thumbnail image to a file
ec82d85a 157 write_all_thumbnails: Write all thumbnail formats to files
8222d8de 158 writesubtitles: Write the video subtitles to a file
741dd8ea 159 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 160 allsubtitles: Downloads all the subtitles of the video
0b7f3118 161 (requires writesubtitles or writeautomaticsub)
8222d8de 162 listsubtitles: Lists all available subtitles for the video
a504ced0 163 subtitlesformat: The format code for subtitles
aa6a10c4 164 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
165 keepvideo: Keep the video file after post-processing
166 daterange: A DateRange object, download only if the upload_date is in the range.
167 skip_download: Skip the actual download of the video file
c35f9e72 168 cachedir: Location of the cache files in the filesystem.
a0e07d31 169 False to disable filesystem cache.
47192f92 170 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
171 age_limit: An integer representing the user's age in years.
172 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
173 min_views: An integer representing the minimum view count the video
174 must have in order to not be skipped.
175 Videos without view count information are always
176 downloaded. None for no limit.
177 max_views: An integer representing the maximum view count.
178 Videos that are more popular than that are not
179 downloaded.
180 Videos without view count information are always
181 downloaded. None for no limit.
182 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
183 Videos already present in the file are not downloaded
184 again.
dca08720 185 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 186 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
187 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
188 At the moment, this is only supported by YouTube.
a1ee09e8 189 proxy: URL of the proxy server to use
91410c9b
PH
190 cn_verification_proxy: URL of the proxy to use for IP address verification
191 on Chinese sites. (Experimental)
e344693b 192 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
193 bidi_workaround: Work around buggy terminals without bidirectional text
194 support, using fridibi
a0ddb8a2 195 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 196 include_ads: Download ads as well
04b4d394
PH
197 default_search: Prepend this string if an input url is not valid.
198 'auto' for elaborate guessing
62fec3b2 199 encoding: Use this encoding instead of the system-specified.
e8ee972c 200 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
201 Pass in 'in_playlist' to only show this behavior for
202 playlist items.
4f026faf 203 postprocessors: A list of dictionaries, each with an entry
71b640cc
PH
204 * key: The name of the postprocessor. See
205 youtube_dl/postprocessor/__init__.py for a list.
4f026faf
PH
206 as well as any further keyword arguments for the
207 postprocessor.
71b640cc
PH
208 progress_hooks: A list of functions that get called on download
209 progress, with a dictionary with the entries
5cda4eda 210 * status: One of "downloading", "error", or "finished".
ee69b99a 211 Check this first and ignore unknown values.
71b640cc 212
5cda4eda 213 If status is one of "downloading", or "finished", the
ee69b99a
PH
214 following properties may also be present:
215 * filename: The final filename (always present)
5cda4eda 216 * tmpfilename: The filename we're currently writing to
71b640cc
PH
217 * downloaded_bytes: Bytes on disk
218 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
219 * total_bytes_estimate: Guess of the eventual file size,
220 None if unavailable.
221 * elapsed: The number of seconds since download started.
71b640cc
PH
222 * eta: The estimated time in seconds, None if unknown
223 * speed: The download speed in bytes/second, None if
224 unknown
5cda4eda
PH
225 * fragment_index: The counter of the currently
226 downloaded video fragment.
227 * fragment_count: The number of fragments (= individual
228 files that will be merged)
71b640cc
PH
229
230 Progress hooks are guaranteed to be called at least once
231 (with status "finished") if the download is successful.
45598f15 232 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
233 fixup: Automatically correct known faults of the file.
234 One of:
235 - "never": do nothing
236 - "warn": only emit a warning
237 - "detect_or_warn": check whether we can do anything
62cd676c 238 about it, warn otherwise (default)
be4a824d 239 source_address: (Experimental) Client-side IP address to bind to.
8bfa7545
PH
240 call_home: Boolean, true iff we are allowed to contact the
241 youtube-dl servers for debugging.
5f0d813d 242 sleep_interval: Number of seconds to sleep before each download.
cfb56d1a
PH
243 listformats: Print an overview of available video formats and exit.
244 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
245 match_filter: A function that gets called with the info_dict of
246 every video.
247 If it returns a message, the video is ignored.
248 If it returns None, the video is downloaded.
249 match_filter_func in utils.py is one example for this.
7e5db8c9 250 no_color: Do not emit color codes in output.
71b640cc 251
85729c51
PH
252 The following options determine which downloader is picked:
253 external_downloader: Executable of the external downloader to call.
254 None or unset for standard (built-in) downloader.
255 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
fe7e0c98 256
8222d8de 257 The following parameters are not used by YoutubeDL itself, they are used by
c75f0b36 258 the downloader (see youtube_dl/downloader/common.py):
8222d8de 259 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 260 noresizebuffer, retries, continuedl, noprogress, consoletitle,
c75f0b36 261 xattr_set_filesize, external_downloader_args.
76b1bd67
JMF
262
263 The following options are used by the post processors:
264 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
265 otherwise prefer avconv.
f72b0a60
S
266 postprocessor_args: A list of additional command-line arguments for the
267 postprocessor.
8222d8de
JMF
268 """
269
270 params = None
271 _ies = []
272 _pps = []
273 _download_retcode = None
274 _num_downloads = None
275 _screen_file = None
276
3511266b 277 def __init__(self, params=None, auto_init=True):
8222d8de 278 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
279 if params is None:
280 params = {}
8222d8de 281 self._ies = []
56c73665 282 self._ies_instances = {}
8222d8de 283 self._pps = []
933605d7 284 self._progress_hooks = []
8222d8de
JMF
285 self._download_retcode = 0
286 self._num_downloads = 0
287 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 288 self._err_file = sys.stderr
4abf617b
S
289 self.params = {
290 # Default parameters
291 'nocheckcertificate': False,
292 }
293 self.params.update(params)
a0e07d31 294 self.cache = Cache(self)
34308b30 295
0783b09b 296 if params.get('bidi_workaround', False):
1c088fa8
PH
297 try:
298 import pty
299 master, slave = pty.openpty()
003c69a8 300 width = compat_get_terminal_size().columns
1c088fa8
PH
301 if width is None:
302 width_args = []
303 else:
304 width_args = ['-w', str(width)]
5d681e96 305 sp_kwargs = dict(
1c088fa8
PH
306 stdin=subprocess.PIPE,
307 stdout=slave,
308 stderr=self._err_file)
5d681e96
PH
309 try:
310 self._output_process = subprocess.Popen(
311 ['bidiv'] + width_args, **sp_kwargs
312 )
313 except OSError:
5d681e96
PH
314 self._output_process = subprocess.Popen(
315 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
316 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
317 except OSError as ose:
318 if ose.errno == 2:
6febd1c1 319 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
320 else:
321 raise
0783b09b 322
34308b30 323 if (sys.version_info >= (3,) and sys.platform != 'win32' and
8fb3ac36
PH
324 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
325 not params.get('restrictfilenames', False)):
34308b30
PH
326 # On Python 3, the Unicode filesystem API will throw errors (#1474)
327 self.report_warning(
6febd1c1 328 'Assuming --restrict-filenames since file system encoding '
1b725173 329 'cannot encode all characters. '
6febd1c1 330 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 331 self.params['restrictfilenames'] = True
34308b30 332
486dd09e
PH
333 if isinstance(params.get('outtmpl'), bytes):
334 self.report_warning(
335 'Parameter outtmpl is bytes, but should be a unicode string. '
336 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
337
dca08720
PH
338 self._setup_opener()
339
3511266b
PH
340 if auto_init:
341 self.print_debug_header()
342 self.add_default_info_extractors()
343
4f026faf
PH
344 for pp_def_raw in self.params.get('postprocessors', []):
345 pp_class = get_postprocessor(pp_def_raw['key'])
346 pp_def = dict(pp_def_raw)
347 del pp_def['key']
348 pp = pp_class(self, **compat_kwargs(pp_def))
349 self.add_post_processor(pp)
350
71b640cc
PH
351 for ph in self.params.get('progress_hooks', []):
352 self.add_progress_hook(ph)
353
7d4111ed
PH
354 def warn_if_short_id(self, argv):
355 # short YouTube ID starting with dash?
356 idxs = [
357 i for i, a in enumerate(argv)
358 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
359 if idxs:
360 correct_argv = (
361 ['youtube-dl'] +
362 [a for i, a in enumerate(argv) if i not in idxs] +
363 ['--'] + [argv[i] for i in idxs]
364 )
365 self.report_warning(
366 'Long argument string detected. '
367 'Use -- to separate parameters and URLs, like this:\n%s\n' %
368 args_to_str(correct_argv))
369
8222d8de
JMF
370 def add_info_extractor(self, ie):
371 """Add an InfoExtractor object to the end of the list."""
372 self._ies.append(ie)
56c73665 373 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
374 ie.set_downloader(self)
375
56c73665
JMF
376 def get_info_extractor(self, ie_key):
377 """
378 Get an instance of an IE with name ie_key, it will try to get one from
379 the _ies list, if there's no instance it will create a new one and add
380 it to the extractor list.
381 """
382 ie = self._ies_instances.get(ie_key)
383 if ie is None:
384 ie = get_info_extractor(ie_key)()
385 self.add_info_extractor(ie)
386 return ie
387
023fa8c4
JMF
388 def add_default_info_extractors(self):
389 """
390 Add the InfoExtractors returned by gen_extractors to the end of the list
391 """
392 for ie in gen_extractors():
393 self.add_info_extractor(ie)
394
8222d8de
JMF
395 def add_post_processor(self, pp):
396 """Add a PostProcessor object to the end of the chain."""
397 self._pps.append(pp)
398 pp.set_downloader(self)
399
933605d7
JMF
400 def add_progress_hook(self, ph):
401 """Add the progress hook (currently only for the file downloader)"""
402 self._progress_hooks.append(ph)
8ab470f1 403
1c088fa8 404 def _bidi_workaround(self, message):
5d681e96 405 if not hasattr(self, '_output_channel'):
1c088fa8
PH
406 return message
407
5d681e96 408 assert hasattr(self, '_output_process')
11b85ce6 409 assert isinstance(message, compat_str)
6febd1c1
PH
410 line_count = message.count('\n') + 1
411 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 412 self._output_process.stdin.flush()
6febd1c1 413 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 414 for _ in range(line_count))
6febd1c1 415 return res[:-len('\n')]
1c088fa8 416
8222d8de 417 def to_screen(self, message, skip_eol=False):
0783b09b
PH
418 """Print message to stdout if not in quiet mode."""
419 return self.to_stdout(message, skip_eol, check_quiet=True)
420
734f90bb 421 def _write_string(self, s, out=None):
b58ddb32 422 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 423
0783b09b 424 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 425 """Print message to stdout if not in quiet mode."""
8bf9319e 426 if self.params.get('logger'):
43afe285 427 self.params['logger'].debug(message)
0783b09b 428 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 429 message = self._bidi_workaround(message)
6febd1c1 430 terminator = ['\n', ''][skip_eol]
8222d8de 431 output = message + terminator
1c088fa8 432
734f90bb 433 self._write_string(output, self._screen_file)
8222d8de
JMF
434
435 def to_stderr(self, message):
436 """Print message to stderr."""
11b85ce6 437 assert isinstance(message, compat_str)
8bf9319e 438 if self.params.get('logger'):
43afe285
IB
439 self.params['logger'].error(message)
440 else:
1c088fa8 441 message = self._bidi_workaround(message)
6febd1c1 442 output = message + '\n'
734f90bb 443 self._write_string(output, self._err_file)
8222d8de 444
1e5b9a95
PH
445 def to_console_title(self, message):
446 if not self.params.get('consoletitle', False):
447 return
448 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
449 # c_wchar_p() might not be necessary if `message` is
450 # already of type unicode()
451 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
452 elif 'TERM' in os.environ:
734f90bb 453 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 454
bdde425c
PH
455 def save_console_title(self):
456 if not self.params.get('consoletitle', False):
457 return
458 if 'TERM' in os.environ:
efd6c574 459 # Save the title on stack
734f90bb 460 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
461
462 def restore_console_title(self):
463 if not self.params.get('consoletitle', False):
464 return
465 if 'TERM' in os.environ:
efd6c574 466 # Restore the title from stack
734f90bb 467 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
468
469 def __enter__(self):
470 self.save_console_title()
471 return self
472
473 def __exit__(self, *args):
474 self.restore_console_title()
f89197d7 475
dca08720
PH
476 if self.params.get('cookiefile') is not None:
477 self.cookiejar.save()
bdde425c 478
8222d8de
JMF
479 def trouble(self, message=None, tb=None):
480 """Determine action to take when a download problem appears.
481
482 Depending on if the downloader has been configured to ignore
483 download errors or not, this method may throw an exception or
484 not when errors are found, after printing the message.
485
486 tb, if given, is additional traceback information.
487 """
488 if message is not None:
489 self.to_stderr(message)
490 if self.params.get('verbose'):
491 if tb is None:
492 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 493 tb = ''
8222d8de 494 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 495 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
496 tb += compat_str(traceback.format_exc())
497 else:
498 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 499 tb = ''.join(tb_data)
8222d8de
JMF
500 self.to_stderr(tb)
501 if not self.params.get('ignoreerrors', False):
502 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
503 exc_info = sys.exc_info()[1].exc_info
504 else:
505 exc_info = sys.exc_info()
506 raise DownloadError(message, exc_info)
507 self._download_retcode = 1
508
509 def report_warning(self, message):
510 '''
511 Print the message to stderr, it will be prefixed with 'WARNING:'
512 If stderr is a tty file the 'WARNING:' will be colored
513 '''
6d07ce01
JMF
514 if self.params.get('logger') is not None:
515 self.params['logger'].warning(message)
8222d8de 516 else:
ad8915b7
PH
517 if self.params.get('no_warnings'):
518 return
7e5db8c9 519 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
6d07ce01
JMF
520 _msg_header = '\033[0;33mWARNING:\033[0m'
521 else:
522 _msg_header = 'WARNING:'
523 warning_message = '%s %s' % (_msg_header, message)
524 self.to_stderr(warning_message)
8222d8de
JMF
525
526 def report_error(self, message, tb=None):
527 '''
528 Do the same as trouble, but prefixes the message with 'ERROR:', colored
529 in red if stderr is a tty file.
530 '''
7e5db8c9 531 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
6febd1c1 532 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 533 else:
6febd1c1
PH
534 _msg_header = 'ERROR:'
535 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
536 self.trouble(error_message, tb)
537
8222d8de
JMF
538 def report_file_already_downloaded(self, file_name):
539 """Report file has already been fully downloaded."""
540 try:
6febd1c1 541 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 542 except UnicodeEncodeError:
6febd1c1 543 self.to_screen('[download] The file has already been downloaded')
8222d8de 544
8222d8de
JMF
545 def prepare_filename(self, info_dict):
546 """Generate the output filename."""
547 try:
548 template_dict = dict(info_dict)
549
550 template_dict['epoch'] = int(time.time())
551 autonumber_size = self.params.get('autonumber_size')
552 if autonumber_size is None:
553 autonumber_size = 5
6febd1c1 554 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 555 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 556 if template_dict.get('playlist_index') is not None:
c6b4132a 557 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
558 if template_dict.get('resolution') is None:
559 if template_dict.get('width') and template_dict.get('height'):
560 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
561 elif template_dict.get('height'):
805ef3c6 562 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 563 elif template_dict.get('width'):
805ef3c6 564 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 565
586a91b6 566 sanitize = lambda k, v: sanitize_filename(
45598aab 567 compat_str(v),
1bb5c511 568 restricted=self.params.get('restrictfilenames'),
6febd1c1 569 is_id=(k == 'id'))
586a91b6 570 template_dict = dict((k, sanitize(k, v))
45598aab
PH
571 for k, v in template_dict.items()
572 if v is not None)
6febd1c1 573 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 574
b3613d36 575 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
4644ac55 576 tmpl = compat_expanduser(outtmpl)
586a91b6 577 filename = tmpl % template_dict
3a0d2f52
S
578 # Temporary fix for #4787
579 # 'Treat' all problem characters by passing filename through preferredencoding
580 # to workaround encoding issues with subprocess on python2 @ Windows
581 if sys.version_info < (3, 0) and sys.platform == 'win32':
582 filename = encodeFilename(filename, True).decode(preferredencoding())
b3613d36 583 return sanitize_path(filename)
8222d8de 584 except ValueError as err:
6febd1c1 585 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
586 return None
587
442c37b7 588 def _match_entry(self, info_dict, incomplete):
8222d8de
JMF
589 """ Returns None iff the file should be downloaded """
590
6febd1c1 591 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
592 if 'title' in info_dict:
593 # This can happen when we're just evaluating the playlist
594 title = info_dict['title']
595 matchtitle = self.params.get('matchtitle', False)
596 if matchtitle:
597 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 598 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
599 rejecttitle = self.params.get('rejecttitle', False)
600 if rejecttitle:
601 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 602 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
603 date = info_dict.get('upload_date', None)
604 if date is not None:
605 dateRange = self.params.get('daterange', DateRange())
606 if date not in dateRange:
6febd1c1 607 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
608 view_count = info_dict.get('view_count', None)
609 if view_count is not None:
610 min_views = self.params.get('min_views')
611 if min_views is not None and view_count < min_views:
6febd1c1 612 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
613 max_views = self.params.get('max_views')
614 if max_views is not None and view_count > max_views:
6febd1c1 615 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629 616 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
347de493 617 return 'Skipping "%s" because it is age restricted' % video_title
c1c9a79c 618 if self.in_download_archive(info_dict):
6febd1c1 619 return '%s has already been recorded in archive' % video_title
347de493 620
442c37b7
PH
621 if not incomplete:
622 match_filter = self.params.get('match_filter')
623 if match_filter is not None:
624 ret = match_filter(info_dict)
625 if ret is not None:
626 return ret
347de493 627
8222d8de 628 return None
fe7e0c98 629
b6c45014
JMF
630 @staticmethod
631 def add_extra_info(info_dict, extra_info):
632 '''Set the keys from extra_info in info dict if they are missing'''
633 for key, value in extra_info.items():
634 info_dict.setdefault(key, value)
635
7fc3fa05 636 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 637 process=True, force_generic_extractor=False):
8222d8de
JMF
638 '''
639 Returns a list with a dictionary for each video we find.
640 If 'download', also downloads the videos.
641 extra_info is a dict containing the extra values to add to each result
613b2d9d 642 '''
fe7e0c98 643
61aa5ba3 644 if not ie_key and force_generic_extractor:
d22dec74
S
645 ie_key = 'Generic'
646
8222d8de 647 if ie_key:
56c73665 648 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
649 else:
650 ies = self._ies
651
652 for ie in ies:
653 if not ie.suitable(url):
654 continue
655
656 if not ie.working():
6febd1c1
PH
657 self.report_warning('The program functionality for this site has been marked as broken, '
658 'and will probably not work.')
8222d8de
JMF
659
660 try:
661 ie_result = ie.extract(url)
5f6a1245 662 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
663 break
664 if isinstance(ie_result, list):
665 # Backwards compatibility: old IE result format
8222d8de
JMF
666 ie_result = {
667 '_type': 'compat_list',
668 'entries': ie_result,
669 }
ea38e55f 670 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
671 if process:
672 return self.process_ie_result(ie_result, download, extra_info)
673 else:
674 return ie_result
5f6a1245 675 except ExtractorError as de: # An error we somewhat expected
8222d8de
JMF
676 self.report_error(compat_str(de), de.format_traceback())
677 break
d3e5bbf4
PH
678 except MaxDownloadsReached:
679 raise
8222d8de
JMF
680 except Exception as e:
681 if self.params.get('ignoreerrors', False):
682 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
683 break
684 else:
685 raise
686 else:
1a489545 687 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 688
ea38e55f
PH
689 def add_default_extra_info(self, ie_result, ie, url):
690 self.add_extra_info(ie_result, {
691 'extractor': ie.IE_NAME,
692 'webpage_url': url,
693 'webpage_url_basename': url_basename(url),
694 'extractor_key': ie.ie_key(),
695 })
696
8222d8de
JMF
697 def process_ie_result(self, ie_result, download=True, extra_info={}):
698 """
699 Take the result of the ie(may be modified) and resolve all unresolved
700 references (URLs, playlist items).
701
702 It will also download the videos if 'download'.
703 Returns the resolved ie_result.
704 """
705
e8ee972c
PH
706 result_type = ie_result.get('_type', 'video')
707
057a5206
PH
708 if result_type in ('url', 'url_transparent'):
709 extract_flat = self.params.get('extract_flat', False)
710 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
711 extract_flat is True):
057a5206
PH
712 if self.params.get('forcejson', False):
713 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
714 return ie_result
715
8222d8de 716 if result_type == 'video':
b6c45014 717 self.add_extra_info(ie_result, extra_info)
feee2ecf 718 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
719 elif result_type == 'url':
720 # We have to add extra_info to the results because it may be
721 # contained in a playlist
722 return self.extract_info(ie_result['url'],
723 download,
724 ie_key=ie_result.get('ie_key'),
725 extra_info=extra_info)
7fc3fa05
PH
726 elif result_type == 'url_transparent':
727 # Use the information from the embedding page
728 info = self.extract_info(
729 ie_result['url'], ie_key=ie_result.get('ie_key'),
730 extra_info=extra_info, download=False, process=False)
731
412c617d
PH
732 force_properties = dict(
733 (k, v) for k, v in ie_result.items() if v is not None)
734 for f in ('_type', 'url'):
735 if f in force_properties:
736 del force_properties[f]
737 new_result = info.copy()
738 new_result.update(force_properties)
7fc3fa05
PH
739
740 assert new_result.get('_type') != 'url_transparent'
7fc3fa05
PH
741
742 return self.process_ie_result(
743 new_result, download=download, extra_info=extra_info)
42e12102 744 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de
JMF
745 # We process each entry in the playlist
746 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 747 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
748
749 playlist_results = []
750
8222d8de 751 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
752 playlistend = self.params.get('playlistend', None)
753 # For backwards compatibility, interpret -1 as whole list
8222d8de 754 if playlistend == -1:
a19fd00c 755 playlistend = None
8222d8de 756
c14e88f0
PH
757 playlistitems_str = self.params.get('playlist_items', None)
758 playlistitems = None
759 if playlistitems_str is not None:
760 def iter_playlistitems(format):
761 for string_segment in format.split(','):
762 if '-' in string_segment:
763 start, end = string_segment.split('-')
764 for item in range(int(start), int(end) + 1):
765 yield int(item)
766 else:
767 yield int(string_segment)
768 playlistitems = iter_playlistitems(playlistitems_str)
769
b82f815f
PH
770 ie_entries = ie_result['entries']
771 if isinstance(ie_entries, list):
772 n_all_entries = len(ie_entries)
c14e88f0 773 if playlistitems:
3884dcf3
JMF
774 entries = [
775 ie_entries[i - 1] for i in playlistitems
776 if -n_all_entries <= i - 1 < n_all_entries]
c14e88f0
PH
777 else:
778 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
779 n_entries = len(entries)
780 self.to_screen(
781 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
782 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 783 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
784 if playlistitems:
785 entries = []
786 for item in playlistitems:
787 entries.extend(ie_entries.getslice(
788 item - 1, item
789 ))
790 else:
791 entries = ie_entries.getslice(
792 playliststart, playlistend)
b7ab0590
PH
793 n_entries = len(entries)
794 self.to_screen(
795 "[%s] playlist %s: Downloading %d videos" %
796 (ie_result['extractor'], playlist, n_entries))
b82f815f 797 else: # iterable
c14e88f0
PH
798 if playlistitems:
799 entry_list = list(ie_entries)
800 entries = [entry_list[i - 1] for i in playlistitems]
801 else:
802 entries = list(itertools.islice(
803 ie_entries, playliststart, playlistend))
b82f815f
PH
804 n_entries = len(entries)
805 self.to_screen(
806 "[%s] playlist %s: Downloading %d videos" %
807 (ie_result['extractor'], playlist, n_entries))
8222d8de 808
ff815fe6
MS
809 if self.params.get('playlistreverse', False):
810 entries = entries[::-1]
811
fe7e0c98 812 for i, entry in enumerate(entries, 1):
734ea11e 813 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
8222d8de 814 extra = {
c6b4132a 815 'n_entries': n_entries,
fe7e0c98 816 'playlist': playlist,
a1cf99d0
PH
817 'playlist_id': ie_result.get('id'),
818 'playlist_title': ie_result.get('title'),
fe7e0c98 819 'playlist_index': i + playliststart,
b6c45014 820 'extractor': ie_result['extractor'],
9103bbc5 821 'webpage_url': ie_result['webpage_url'],
29eb5174 822 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 823 'extractor_key': ie_result['extractor_key'],
fe7e0c98 824 }
7012b23c 825
442c37b7 826 reason = self._match_entry(entry, incomplete=True)
7012b23c 827 if reason is not None:
6febd1c1 828 self.to_screen('[download] ' + reason)
7012b23c
PH
829 continue
830
8222d8de
JMF
831 entry_result = self.process_ie_result(entry,
832 download=download,
833 extra_info=extra)
834 playlist_results.append(entry_result)
835 ie_result['entries'] = playlist_results
836 return ie_result
837 elif result_type == 'compat_list':
c9bf4114
PH
838 self.report_warning(
839 'Extractor %s returned a compat_list result. '
840 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 841
8222d8de 842 def _fixup(r):
9e1a5b84
JW
843 self.add_extra_info(
844 r,
9103bbc5
JMF
845 {
846 'extractor': ie_result['extractor'],
847 'webpage_url': ie_result['webpage_url'],
29eb5174 848 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 849 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
850 }
851 )
8222d8de
JMF
852 return r
853 ie_result['entries'] = [
b6c45014 854 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
855 for r in ie_result['entries']
856 ]
857 return ie_result
858 else:
859 raise Exception('Invalid result type: %s' % result_type)
860
67134eab
JMF
861 def _build_format_filter(self, filter_spec):
862 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
863
864 OPERATORS = {
865 '<': operator.lt,
866 '<=': operator.le,
867 '>': operator.gt,
868 '>=': operator.ge,
869 '=': operator.eq,
870 '!=': operator.ne,
871 }
67134eab 872 operator_rex = re.compile(r'''(?x)\s*
2ec19e95 873 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
083c9df9
PH
874 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
875 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 876 $
083c9df9 877 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 878 m = operator_rex.search(filter_spec)
9ddb6925
S
879 if m:
880 try:
881 comparison_value = int(m.group('value'))
882 except ValueError:
883 comparison_value = parse_filesize(m.group('value'))
884 if comparison_value is None:
885 comparison_value = parse_filesize(m.group('value') + 'B')
886 if comparison_value is None:
887 raise ValueError(
888 'Invalid value %r in format specification %r' % (
67134eab 889 m.group('value'), filter_spec))
9ddb6925
S
890 op = OPERATORS[m.group('op')]
891
083c9df9 892 if not m:
9ddb6925
S
893 STR_OPERATORS = {
894 '=': operator.eq,
895 '!=': operator.ne,
896 }
67134eab 897 str_operator_rex = re.compile(r'''(?x)
9ddb6925
S
898 \s*(?P<key>ext|acodec|vcodec|container|protocol)
899 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
900 \s*(?P<value>[a-zA-Z0-9_-]+)
67134eab 901 \s*$
9ddb6925 902 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 903 m = str_operator_rex.search(filter_spec)
9ddb6925
S
904 if m:
905 comparison_value = m.group('value')
906 op = STR_OPERATORS[m.group('op')]
083c9df9 907
9ddb6925 908 if not m:
67134eab 909 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
910
911 def _filter(f):
912 actual_value = f.get(m.group('key'))
913 if actual_value is None:
914 return m.group('none_inclusive')
915 return op(actual_value, comparison_value)
67134eab
JMF
916 return _filter
917
918 def build_format_selector(self, format_spec):
919 def syntax_error(note, start):
920 message = (
921 'Invalid format specification: '
922 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
923 return SyntaxError(message)
924
925 PICKFIRST = 'PICKFIRST'
926 MERGE = 'MERGE'
927 SINGLE = 'SINGLE'
0130afb7 928 GROUP = 'GROUP'
67134eab
JMF
929 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
930
931 def _parse_filter(tokens):
932 filter_parts = []
933 for type, string, start, _, _ in tokens:
934 if type == tokenize.OP and string == ']':
935 return ''.join(filter_parts)
936 else:
937 filter_parts.append(string)
938
232541df
JMF
939 def _remove_unused_ops(tokens):
940 # Remove operators that we don't use and join them with the sourrounding strings
941 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
942 ALLOWED_OPS = ('/', '+', ',', '(', ')')
943 last_string, last_start, last_end, last_line = None, None, None, None
944 for type, string, start, end, line in tokens:
945 if type == tokenize.OP and string == '[':
946 if last_string:
947 yield tokenize.NAME, last_string, last_start, last_end, last_line
948 last_string = None
949 yield type, string, start, end, line
950 # everything inside brackets will be handled by _parse_filter
951 for type, string, start, end, line in tokens:
952 yield type, string, start, end, line
953 if type == tokenize.OP and string == ']':
954 break
955 elif type == tokenize.OP and string in ALLOWED_OPS:
956 if last_string:
957 yield tokenize.NAME, last_string, last_start, last_end, last_line
958 last_string = None
959 yield type, string, start, end, line
960 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
961 if not last_string:
962 last_string = string
963 last_start = start
964 last_end = end
965 else:
966 last_string += string
967 if last_string:
968 yield tokenize.NAME, last_string, last_start, last_end, last_line
969
cf2ac6df 970 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
971 selectors = []
972 current_selector = None
973 for type, string, start, _, _ in tokens:
974 # ENCODING is only defined in python 3.x
975 if type == getattr(tokenize, 'ENCODING', None):
976 continue
977 elif type in [tokenize.NAME, tokenize.NUMBER]:
978 current_selector = FormatSelector(SINGLE, string, [])
979 elif type == tokenize.OP:
cf2ac6df
JMF
980 if string == ')':
981 if not inside_group:
982 # ')' will be handled by the parentheses group
983 tokens.restore_last_token()
67134eab 984 break
cf2ac6df 985 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
986 tokens.restore_last_token()
987 break
cf2ac6df
JMF
988 elif inside_choice and string == ',':
989 tokens.restore_last_token()
990 break
991 elif string == ',':
0a31a350
JMF
992 if not current_selector:
993 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
994 selectors.append(current_selector)
995 current_selector = None
996 elif string == '/':
d96d604e
JMF
997 if not current_selector:
998 raise syntax_error('"/" must follow a format selector', start)
67134eab 999 first_choice = current_selector
cf2ac6df 1000 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1001 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1002 elif string == '[':
1003 if not current_selector:
1004 current_selector = FormatSelector(SINGLE, 'best', [])
1005 format_filter = _parse_filter(tokens)
1006 current_selector.filters.append(format_filter)
0130afb7
JMF
1007 elif string == '(':
1008 if current_selector:
1009 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1010 group = _parse_format_selection(tokens, inside_group=True)
1011 current_selector = FormatSelector(GROUP, group, [])
67134eab
JMF
1012 elif string == '+':
1013 video_selector = current_selector
cf2ac6df 1014 audio_selector = _parse_format_selection(tokens, inside_merge=True)
0a31a350
JMF
1015 if not video_selector or not audio_selector:
1016 raise syntax_error('"+" must be between two format selectors', start)
cf2ac6df 1017 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
67134eab
JMF
1018 else:
1019 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1020 elif type == tokenize.ENDMARKER:
1021 break
1022 if current_selector:
1023 selectors.append(current_selector)
1024 return selectors
1025
1026 def _build_selector_function(selector):
1027 if isinstance(selector, list):
1028 fs = [_build_selector_function(s) for s in selector]
1029
1030 def selector_function(formats):
1031 for f in fs:
1032 for format in f(formats):
1033 yield format
1034 return selector_function
0130afb7
JMF
1035 elif selector.type == GROUP:
1036 selector_function = _build_selector_function(selector.selector)
67134eab
JMF
1037 elif selector.type == PICKFIRST:
1038 fs = [_build_selector_function(s) for s in selector.selector]
1039
1040 def selector_function(formats):
1041 for f in fs:
1042 picked_formats = list(f(formats))
1043 if picked_formats:
1044 return picked_formats
1045 return []
1046 elif selector.type == SINGLE:
1047 format_spec = selector.selector
1048
1049 def selector_function(formats):
bb8e5536
JMF
1050 formats = list(formats)
1051 if not formats:
1052 return
5acfa126
JMF
1053 if format_spec == 'all':
1054 for f in formats:
1055 yield f
1056 elif format_spec in ['best', 'worst', None]:
67134eab
JMF
1057 format_idx = 0 if format_spec == 'worst' else -1
1058 audiovideo_formats = [
1059 f for f in formats
1060 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1061 if audiovideo_formats:
1062 yield audiovideo_formats[format_idx]
1063 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1064 elif (all(f.get('acodec') != 'none' for f in formats) or
1065 all(f.get('vcodec') != 'none' for f in formats)):
1066 yield formats[format_idx]
1067 elif format_spec == 'bestaudio':
1068 audio_formats = [
1069 f for f in formats
1070 if f.get('vcodec') == 'none']
1071 if audio_formats:
1072 yield audio_formats[-1]
1073 elif format_spec == 'worstaudio':
1074 audio_formats = [
1075 f for f in formats
1076 if f.get('vcodec') == 'none']
1077 if audio_formats:
1078 yield audio_formats[0]
1079 elif format_spec == 'bestvideo':
1080 video_formats = [
1081 f for f in formats
1082 if f.get('acodec') == 'none']
1083 if video_formats:
1084 yield video_formats[-1]
1085 elif format_spec == 'worstvideo':
1086 video_formats = [
1087 f for f in formats
1088 if f.get('acodec') == 'none']
1089 if video_formats:
1090 yield video_formats[0]
1091 else:
1092 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1093 if format_spec in extensions:
1094 filter_f = lambda f: f['ext'] == format_spec
1095 else:
1096 filter_f = lambda f: f['format_id'] == format_spec
1097 matches = list(filter(filter_f, formats))
1098 if matches:
1099 yield matches[-1]
1100 elif selector.type == MERGE:
1101 def _merge(formats_info):
1102 format_1, format_2 = [f['format_id'] for f in formats_info]
1103 # The first format must contain the video and the
1104 # second the audio
1105 if formats_info[0].get('vcodec') == 'none':
1106 self.report_error('The first format must '
1107 'contain the video, try using '
1108 '"-f %s+%s"' % (format_2, format_1))
1109 return
1110 output_ext = (
1111 formats_info[0]['ext']
1112 if self.params.get('merge_output_format') is None
1113 else self.params['merge_output_format'])
1114 return {
1115 'requested_formats': formats_info,
1116 'format': '%s+%s' % (formats_info[0].get('format'),
1117 formats_info[1].get('format')),
1118 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1119 formats_info[1].get('format_id')),
1120 'width': formats_info[0].get('width'),
1121 'height': formats_info[0].get('height'),
1122 'resolution': formats_info[0].get('resolution'),
1123 'fps': formats_info[0].get('fps'),
1124 'vcodec': formats_info[0].get('vcodec'),
1125 'vbr': formats_info[0].get('vbr'),
1126 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1127 'acodec': formats_info[1].get('acodec'),
1128 'abr': formats_info[1].get('abr'),
1129 'ext': output_ext,
1130 }
1131 video_selector, audio_selector = map(_build_selector_function, selector.selector)
083c9df9 1132
67134eab
JMF
1133 def selector_function(formats):
1134 formats = list(formats)
1135 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1136 yield _merge(pair)
083c9df9 1137
67134eab 1138 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1139
67134eab
JMF
1140 def final_selector(formats):
1141 for _filter in filters:
1142 formats = list(filter(_filter, formats))
1143 return selector_function(formats)
1144 return final_selector
083c9df9 1145
67134eab 1146 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1147 try:
232541df 1148 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1149 except tokenize.TokenError:
1150 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1151
1152 class TokenIterator(object):
1153 def __init__(self, tokens):
1154 self.tokens = tokens
1155 self.counter = 0
1156
1157 def __iter__(self):
1158 return self
1159
1160 def __next__(self):
1161 if self.counter >= len(self.tokens):
1162 raise StopIteration()
1163 value = self.tokens[self.counter]
1164 self.counter += 1
1165 return value
1166
1167 next = __next__
1168
1169 def restore_last_token(self):
1170 self.counter -= 1
1171
1172 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1173 return _build_selector_function(parsed_selector)
a9c58ad9 1174
e5660ee6
JMF
1175 def _calc_headers(self, info_dict):
1176 res = std_headers.copy()
1177
1178 add_headers = info_dict.get('http_headers')
1179 if add_headers:
1180 res.update(add_headers)
1181
1182 cookies = self._calc_cookies(info_dict)
1183 if cookies:
1184 res['Cookie'] = cookies
1185
1186 return res
1187
1188 def _calc_cookies(self, info_dict):
662435f7 1189 pr = compat_urllib_request.Request(info_dict['url'])
e5660ee6 1190 self.cookiejar.add_cookie_header(pr)
662435f7 1191 return pr.get_header('Cookie')
e5660ee6 1192
dd82ffea
JMF
1193 def process_video_result(self, info_dict, download=True):
1194 assert info_dict.get('_type', 'video') == 'video'
1195
bec1fad2
PH
1196 if 'id' not in info_dict:
1197 raise ExtractorError('Missing "id" field in extractor result')
1198 if 'title' not in info_dict:
1199 raise ExtractorError('Missing "title" field in extractor result')
1200
dd82ffea
JMF
1201 if 'playlist' not in info_dict:
1202 # It isn't part of a playlist
1203 info_dict['playlist'] = None
1204 info_dict['playlist_index'] = None
1205
d5519808 1206 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1207 if thumbnails is None:
1208 thumbnail = info_dict.get('thumbnail')
1209 if thumbnail:
a7a14d95 1210 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1211 if thumbnails:
be6d7229 1212 thumbnails.sort(key=lambda t: (
cfb56d1a
PH
1213 t.get('preference'), t.get('width'), t.get('height'),
1214 t.get('id'), t.get('url')))
f6c24009 1215 for i, t in enumerate(thumbnails):
9603e8a7 1216 if t.get('width') and t.get('height'):
d5519808 1217 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1218 if t.get('id') is None:
1219 t['id'] = '%d' % i
d5519808
PH
1220
1221 if thumbnails and 'thumbnail' not in info_dict:
1222 info_dict['thumbnail'] = thumbnails[-1]['url']
1223
c9ae7b95 1224 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1225 info_dict['display_id'] = info_dict['id']
1226
955c4514 1227 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1228 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1229 # see http://bugs.python.org/issue1646728)
1230 try:
1231 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1232 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1233 except (ValueError, OverflowError, OSError):
1234 pass
9d2ecdbc 1235
4bba3716
S
1236 subtitles = info_dict.get('subtitles')
1237 if subtitles:
1238 for _, subtitle in subtitles.items():
1239 for subtitle_format in subtitle:
1240 if 'ext' not in subtitle_format:
1241 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1242
a504ced0 1243 if self.params.get('listsubtitles', False):
360e1ca5
JMF
1244 if 'automatic_captions' in info_dict:
1245 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
4bba3716 1246 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1247 return
360e1ca5 1248 info_dict['requested_subtitles'] = self.process_subtitles(
4bba3716 1249 info_dict['id'], subtitles,
360e1ca5 1250 info_dict.get('automatic_captions'))
a504ced0 1251
dd82ffea
JMF
1252 # We now pick which formats have to be downloaded
1253 if info_dict.get('formats') is None:
1254 # There's only one format available
1255 formats = [info_dict]
1256 else:
1257 formats = info_dict['formats']
1258
db95dc13
PH
1259 if not formats:
1260 raise ExtractorError('No video formats found!')
1261
181c7053
S
1262 formats_dict = {}
1263
dd82ffea 1264 # We check that all the formats have the format and format_id fields
db95dc13 1265 for i, format in enumerate(formats):
bec1fad2
PH
1266 if 'url' not in format:
1267 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1268
dd82ffea 1269 if format.get('format_id') is None:
8016c922 1270 format['format_id'] = compat_str(i)
181c7053
S
1271 format_id = format['format_id']
1272 if format_id not in formats_dict:
1273 formats_dict[format_id] = []
1274 formats_dict[format_id].append(format)
1275
1276 # Make sure all formats have unique format_id
1277 for format_id, ambiguous_formats in formats_dict.items():
1278 if len(ambiguous_formats) > 1:
1279 for i, format in enumerate(ambiguous_formats):
1280 format['format_id'] = '%s-%d' % (format_id, i)
1281
1282 for i, format in enumerate(formats):
8c51aa65 1283 if format.get('format') is None:
6febd1c1 1284 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1285 id=format['format_id'],
1286 res=self.format_resolution(format),
6febd1c1 1287 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1288 )
c1002e96
PH
1289 # Automatically determine file extension if missing
1290 if 'ext' not in format:
cce929ea 1291 format['ext'] = determine_ext(format['url']).lower()
e5660ee6
JMF
1292 # Add HTTP headers, so that external programs can use them from the
1293 # json output
1294 full_format_info = info_dict.copy()
1295 full_format_info.update(format)
1296 format['http_headers'] = self._calc_headers(full_format_info)
dd82ffea 1297
4bcc7bd1 1298 # TODO Central sorting goes here
99e206d5 1299
f89197d7 1300 if formats[0] is not info_dict:
b3d9ef88
JMF
1301 # only set the 'formats' fields if the original info_dict list them
1302 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1303 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
1304 # wich can't be exported to json
1305 info_dict['formats'] = formats
cfb56d1a 1306 if self.params.get('listformats'):
bfaae0a7 1307 self.list_formats(info_dict)
1308 return
cfb56d1a
PH
1309 if self.params.get('list_thumbnails'):
1310 self.list_thumbnails(info_dict)
1311 return
bfaae0a7 1312
de3ef3ed 1313 req_format = self.params.get('format')
a9c58ad9 1314 if req_format is None:
feccf29c 1315 req_format_list = []
3749e36e 1316 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
8250c32f
JMF
1317 info_dict['extractor'] in ['youtube', 'ted'] and
1318 not info_dict.get('is_live')):
7fcb605b 1319 merger = FFmpegMergerPP(self)
97fcf1bb 1320 if merger.available and merger.can_merge():
7fcb605b 1321 req_format_list.append('bestvideo+bestaudio')
feccf29c
S
1322 req_format_list.append('best')
1323 req_format = '/'.join(req_format_list)
5acfa126
JMF
1324 format_selector = self.build_format_selector(req_format)
1325 formats_to_download = list(format_selector(formats))
dd82ffea 1326 if not formats_to_download:
6febd1c1 1327 raise ExtractorError('requested format not available',
78a3a9f8 1328 expected=True)
dd82ffea
JMF
1329
1330 if download:
1331 if len(formats_to_download) > 1:
6febd1c1 1332 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1333 for format in formats_to_download:
1334 new_info = dict(info_dict)
1335 new_info.update(format)
1336 self.process_info(new_info)
1337 # We update the info dict with the best quality format (backwards compatibility)
1338 info_dict.update(formats_to_download[-1])
1339 return info_dict
1340
98c70d6f 1341 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1342 """Select the requested subtitles and their format"""
98c70d6f
JMF
1343 available_subs = {}
1344 if normal_subtitles and self.params.get('writesubtitles'):
1345 available_subs.update(normal_subtitles)
1346 if automatic_captions and self.params.get('writeautomaticsub'):
1347 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1348 if lang not in available_subs:
1349 available_subs[lang] = cap_info
1350
4d171848
JMF
1351 if (not self.params.get('writesubtitles') and not
1352 self.params.get('writeautomaticsub') or not
1353 available_subs):
1354 return None
a504ced0
JMF
1355
1356 if self.params.get('allsubtitles', False):
1357 requested_langs = available_subs.keys()
1358 else:
1359 if self.params.get('subtitleslangs', False):
1360 requested_langs = self.params.get('subtitleslangs')
1361 elif 'en' in available_subs:
1362 requested_langs = ['en']
1363 else:
1364 requested_langs = [list(available_subs.keys())[0]]
1365
1366 formats_query = self.params.get('subtitlesformat', 'best')
1367 formats_preference = formats_query.split('/') if formats_query else []
1368 subs = {}
1369 for lang in requested_langs:
1370 formats = available_subs.get(lang)
1371 if formats is None:
1372 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1373 continue
a504ced0
JMF
1374 for ext in formats_preference:
1375 if ext == 'best':
1376 f = formats[-1]
1377 break
1378 matches = list(filter(lambda f: f['ext'] == ext, formats))
1379 if matches:
1380 f = matches[-1]
1381 break
1382 else:
1383 f = formats[-1]
1384 self.report_warning(
1385 'No subtitle format found matching "%s" for language %s, '
1386 'using %s' % (formats_query, lang, f['ext']))
1387 subs[lang] = f
1388 return subs
1389
8222d8de
JMF
1390 def process_info(self, info_dict):
1391 """Process a single resolved IE result."""
1392
1393 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1394
1395 max_downloads = self.params.get('max_downloads')
1396 if max_downloads is not None:
1397 if self._num_downloads >= int(max_downloads):
1398 raise MaxDownloadsReached()
8222d8de
JMF
1399
1400 info_dict['fulltitle'] = info_dict['title']
1401 if len(info_dict['title']) > 200:
6febd1c1 1402 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de 1403
11b85ce6 1404 if 'format' not in info_dict:
8222d8de
JMF
1405 info_dict['format'] = info_dict['ext']
1406
442c37b7 1407 reason = self._match_entry(info_dict, incomplete=False)
8222d8de 1408 if reason is not None:
6febd1c1 1409 self.to_screen('[download] ' + reason)
8222d8de
JMF
1410 return
1411
fd288278 1412 self._num_downloads += 1
8222d8de 1413
e72c7e41 1414 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1415
1416 # Forced printings
1417 if self.params.get('forcetitle', False):
0783b09b 1418 self.to_stdout(info_dict['fulltitle'])
8222d8de 1419 if self.params.get('forceid', False):
0783b09b 1420 self.to_stdout(info_dict['id'])
8222d8de 1421 if self.params.get('forceurl', False):
16ae61f6 1422 if info_dict.get('requested_formats') is not None:
1423 for f in info_dict['requested_formats']:
1424 self.to_stdout(f['url'] + f.get('play_path', ''))
1425 else:
1426 # For RTMP URLs, also include the playpath
1427 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 1428 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 1429 self.to_stdout(info_dict['thumbnail'])
216d71d0 1430 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 1431 self.to_stdout(info_dict['description'])
8222d8de 1432 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 1433 self.to_stdout(filename)
525ef922
PH
1434 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1435 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 1436 if self.params.get('forceformat', False):
0783b09b 1437 self.to_stdout(info_dict['format'])
9d153818 1438 if self.params.get('forcejson', False):
0783b09b 1439 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
1440
1441 # Do nothing else if in simulate mode
1442 if self.params.get('simulate', False):
1443 return
1444
1445 if filename is None:
1446 return
1447
1448 try:
e5a11a22 1449 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
d26e981d 1450 if dn and not os.path.exists(dn):
8222d8de
JMF
1451 os.makedirs(dn)
1452 except (OSError, IOError) as err:
6febd1c1 1453 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
1454 return
1455
1456 if self.params.get('writedescription', False):
2699da80 1457 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
7b6fefc9 1458 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1459 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1460 elif info_dict.get('description') is None:
1461 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1462 else:
1463 try:
6febd1c1 1464 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1465 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1466 descfile.write(info_dict['description'])
7b6fefc9 1467 except (OSError, IOError):
6febd1c1 1468 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1469 return
8222d8de 1470
1fb07d10 1471 if self.params.get('writeannotations', False):
98727e12 1472 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
7b6fefc9 1473 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1474 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
1475 else:
1476 try:
6febd1c1 1477 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1478 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1479 annofile.write(info_dict['annotations'])
1480 except (KeyError, TypeError):
6febd1c1 1481 self.report_warning('There are no annotations to write.')
7b6fefc9 1482 except (OSError, IOError):
6febd1c1 1483 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1484 return
1fb07d10 1485
c4a91be7 1486 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1487 self.params.get('writeautomaticsub')])
c4a91be7 1488
c84dd8a9 1489 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1490 # subtitles download errors are already managed as troubles in relevant IE
1491 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1492 subtitles = info_dict['requested_subtitles']
0f2c0d33 1493 ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1494 for sub_lang, sub_info in subtitles.items():
1495 sub_format = sub_info['ext']
1496 if sub_info.get('data') is not None:
1497 sub_data = sub_info['data']
1498 else:
1499 try:
0f2c0d33
JMF
1500 sub_data = ie._download_webpage(
1501 sub_info['url'], info_dict['id'], note=False)
1502 except ExtractorError as err:
a504ced0 1503 self.report_warning('Unable to download subtitle for "%s": %s' %
0f2c0d33 1504 (sub_lang, compat_str(err.cause)))
a504ced0 1505 continue
8222d8de 1506 try:
d4051a8e 1507 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1508 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1509 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1510 else:
6febd1c1 1511 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9 1512 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
a504ced0 1513 subfile.write(sub_data)
8222d8de 1514 except (OSError, IOError):
e4db1951 1515 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1516 return
1517
8222d8de 1518 if self.params.get('writeinfojson', False):
b29e0000 1519 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
7b6fefc9 1520 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1521 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1522 else:
6febd1c1 1523 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1524 try:
cb202fd2 1525 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 1526 except (OSError, IOError):
6febd1c1 1527 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1528 return
8222d8de 1529
ec82d85a 1530 self._write_thumbnails(info_dict, filename)
8222d8de
JMF
1531
1532 if not self.params.get('skip_download', False):
4340deca
P
1533 try:
1534 def dl(name, info):
a055469f 1535 fd = get_suitable_downloader(info, self.params)(self, self.params)
4340deca
P
1536 for ph in self._progress_hooks:
1537 fd.add_progress_hook(ph)
1538 if self.params.get('verbose'):
1539 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1540 return fd.download(name, info)
ee69b99a 1541
4340deca
P
1542 if info_dict.get('requested_formats') is not None:
1543 downloaded = []
1544 success = True
d47aeb22 1545 merger = FFmpegMergerPP(self)
f740fae2 1546 if not merger.available:
4340deca
P
1547 postprocessors = []
1548 self.report_warning('You have requested multiple '
1549 'formats but ffmpeg or avconv are not installed.'
4a5a898a 1550 ' The formats won\'t be merged.')
6350728b 1551 else:
4340deca 1552 postprocessors = [merger]
81cd954a
S
1553
1554 def compatible_formats(formats):
1555 video, audio = formats
1556 # Check extension
1557 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1558 if video_ext and audio_ext:
1559 COMPATIBLE_EXTS = (
6728187a 1560 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
81cd954a
S
1561 ('webm')
1562 )
1563 for exts in COMPATIBLE_EXTS:
1564 if video_ext in exts and audio_ext in exts:
1565 return True
1566 # TODO: Check acodec/vcodec
1567 return False
1568
38c6902b
S
1569 filename_real_ext = os.path.splitext(filename)[1][1:]
1570 filename_wo_ext = (
1571 os.path.splitext(filename)[0]
1572 if filename_real_ext == info_dict['ext']
1573 else filename)
81cd954a 1574 requested_formats = info_dict['requested_formats']
c0dea0a7 1575 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 1576 info_dict['ext'] = 'mkv'
4a5a898a
S
1577 self.report_warning(
1578 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
1579 # Ensure filename always has a correct extension for successful merge
1580 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
5b5fbc08
JMF
1581 if os.path.exists(encodeFilename(filename)):
1582 self.to_screen(
1583 '[download] %s has already been downloaded and '
1584 'merged' % filename)
1585 else:
81cd954a 1586 for f in requested_formats:
5b5fbc08
JMF
1587 new_info = dict(info_dict)
1588 new_info.update(f)
1589 fname = self.prepare_filename(new_info)
666a9a2b 1590 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
5b5fbc08
JMF
1591 downloaded.append(fname)
1592 partial_success = dl(fname, new_info)
1593 success = success and partial_success
1594 info_dict['__postprocessors'] = postprocessors
1595 info_dict['__files_to_merge'] = downloaded
4340deca
P
1596 else:
1597 # Just a single file
1598 success = dl(filename, info_dict)
1599 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1600 self.report_error('unable to download video data: %s' % str(err))
1601 return
1602 except (OSError, IOError) as err:
1603 raise UnavailableVideoError(err)
1604 except (ContentTooShortError, ) as err:
1605 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1606 return
8222d8de
JMF
1607
1608 if success:
6271f1ca 1609 # Fixup content
62cd676c
PH
1610 fixup_policy = self.params.get('fixup')
1611 if fixup_policy is None:
1612 fixup_policy = 'detect_or_warn'
1613
6271f1ca
PH
1614 stretched_ratio = info_dict.get('stretched_ratio')
1615 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
1616 if fixup_policy == 'warn':
1617 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1618 info_dict['id'], stretched_ratio))
1619 elif fixup_policy == 'detect_or_warn':
1620 stretched_pp = FFmpegFixupStretchedPP(self)
1621 if stretched_pp.available:
1622 info_dict.setdefault('__postprocessors', [])
1623 info_dict['__postprocessors'].append(stretched_pp)
1624 else:
1625 self.report_warning(
1626 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1627 info_dict['id'], stretched_ratio))
1628 else:
62cd676c
PH
1629 assert fixup_policy in ('ignore', 'never')
1630
1631 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1632 if fixup_policy == 'warn':
1633 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1634 info_dict['id']))
1635 elif fixup_policy == 'detect_or_warn':
1636 fixup_pp = FFmpegFixupM4aPP(self)
1637 if fixup_pp.available:
1638 info_dict.setdefault('__postprocessors', [])
1639 info_dict['__postprocessors'].append(fixup_pp)
1640 else:
1641 self.report_warning(
1642 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1643 info_dict['id']))
1644 else:
1645 assert fixup_policy in ('ignore', 'never')
6271f1ca 1646
8222d8de
JMF
1647 try:
1648 self.post_process(filename, info_dict)
1649 except (PostProcessingError) as err:
6febd1c1 1650 self.report_error('postprocessing: %s' % str(err))
8222d8de 1651 return
cd58dc3e 1652 self.record_download_archive(info_dict)
8222d8de
JMF
1653
1654 def download(self, url_list):
1655 """Download a given list of URLs."""
acd69589 1656 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1657 if (len(url_list) > 1 and
8fb3ac36
PH
1658 '%' not in outtmpl and
1659 self.params.get('max_downloads') != 1):
acd69589 1660 raise SameFileError(outtmpl)
8222d8de
JMF
1661
1662 for url in url_list:
1663 try:
5f6a1245 1664 # It also downloads the videos
61aa5ba3
S
1665 res = self.extract_info(
1666 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 1667 except UnavailableVideoError:
6febd1c1 1668 self.report_error('unable to download video')
8222d8de 1669 except MaxDownloadsReached:
6febd1c1 1670 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1671 raise
63e0be34
PH
1672 else:
1673 if self.params.get('dump_single_json', False):
1674 self.to_stdout(json.dumps(res))
8222d8de
JMF
1675
1676 return self._download_retcode
1677
1dcc4c0c 1678 def download_with_info_file(self, info_filename):
31bd3925
JMF
1679 with contextlib.closing(fileinput.FileInput(
1680 [info_filename], mode='r',
1681 openhook=fileinput.hook_encoded('utf-8'))) as f:
1682 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 1683 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
1684 try:
1685 self.process_ie_result(info, download=True)
1686 except DownloadError:
1687 webpage_url = info.get('webpage_url')
1688 if webpage_url is not None:
6febd1c1 1689 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1690 return self.download([webpage_url])
1691 else:
1692 raise
1693 return self._download_retcode
1dcc4c0c 1694
cb202fd2
S
1695 @staticmethod
1696 def filter_requested_info(info_dict):
1697 return dict(
1698 (k, v) for k, v in info_dict.items()
1699 if k not in ['requested_formats', 'requested_subtitles'])
1700
8222d8de
JMF
1701 def post_process(self, filename, ie_info):
1702 """Run all the postprocessors on the given file."""
1703 info = dict(ie_info)
1704 info['filepath'] = filename
6350728b
JMF
1705 pps_chain = []
1706 if ie_info.get('__postprocessors') is not None:
1707 pps_chain.extend(ie_info['__postprocessors'])
1708 pps_chain.extend(self._pps)
1709 for pp in pps_chain:
71646e46 1710 files_to_delete = []
8222d8de 1711 try:
592e97e8 1712 files_to_delete, info = pp.run(info)
8222d8de 1713 except PostProcessingError as e:
bbcbf4d4 1714 self.report_error(e.msg)
592e97e8
JMF
1715 if files_to_delete and not self.params.get('keepvideo', False):
1716 for old_filename in files_to_delete:
f3ff1a36 1717 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
1718 try:
1719 os.remove(encodeFilename(old_filename))
1720 except (IOError, OSError):
1721 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 1722
5db07df6
PH
1723 def _make_archive_id(self, info_dict):
1724 # Future-proof against any change in case
1725 # and backwards compatibility with prior versions
d31209a1 1726 extractor = info_dict.get('extractor_key')
7012b23c
PH
1727 if extractor is None:
1728 if 'id' in info_dict:
1729 extractor = info_dict.get('ie_key') # key in a playlist
1730 if extractor is None:
5db07df6 1731 return None # Incomplete video information
6febd1c1 1732 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1733
1734 def in_download_archive(self, info_dict):
1735 fn = self.params.get('download_archive')
1736 if fn is None:
1737 return False
1738
1739 vid_id = self._make_archive_id(info_dict)
1740 if vid_id is None:
7012b23c 1741 return False # Incomplete video information
5db07df6 1742
c1c9a79c
PH
1743 try:
1744 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1745 for line in archive_file:
1746 if line.strip() == vid_id:
1747 return True
1748 except IOError as ioe:
1749 if ioe.errno != errno.ENOENT:
1750 raise
1751 return False
1752
1753 def record_download_archive(self, info_dict):
1754 fn = self.params.get('download_archive')
1755 if fn is None:
1756 return
5db07df6
PH
1757 vid_id = self._make_archive_id(info_dict)
1758 assert vid_id
c1c9a79c 1759 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1760 archive_file.write(vid_id + '\n')
dd82ffea 1761
8c51aa65 1762 @staticmethod
8abeeb94 1763 def format_resolution(format, default='unknown'):
fb04e403
PH
1764 if format.get('vcodec') == 'none':
1765 return 'audio only'
f49d89ee
PH
1766 if format.get('resolution') is not None:
1767 return format['resolution']
8c51aa65
JMF
1768 if format.get('height') is not None:
1769 if format.get('width') is not None:
6febd1c1 1770 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1771 else:
6febd1c1 1772 res = '%sp' % format['height']
f49d89ee 1773 elif format.get('width') is not None:
6febd1c1 1774 res = '?x%d' % format['width']
8c51aa65 1775 else:
8abeeb94 1776 res = default
8c51aa65
JMF
1777 return res
1778
c57f7757
PH
1779 def _format_note(self, fdict):
1780 res = ''
1781 if fdict.get('ext') in ['f4f', 'f4m']:
1782 res += '(unsupported) '
1783 if fdict.get('format_note') is not None:
1784 res += fdict['format_note'] + ' '
1785 if fdict.get('tbr') is not None:
1786 res += '%4dk ' % fdict['tbr']
1787 if fdict.get('container') is not None:
1788 if res:
1789 res += ', '
1790 res += '%s container' % fdict['container']
1791 if (fdict.get('vcodec') is not None and
1792 fdict.get('vcodec') != 'none'):
1793 if res:
1794 res += ', '
1795 res += fdict['vcodec']
91c7271a 1796 if fdict.get('vbr') is not None:
c57f7757
PH
1797 res += '@'
1798 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1799 res += 'video@'
1800 if fdict.get('vbr') is not None:
1801 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1802 if fdict.get('fps') is not None:
1803 res += ', %sfps' % fdict['fps']
c57f7757
PH
1804 if fdict.get('acodec') is not None:
1805 if res:
1806 res += ', '
1807 if fdict['acodec'] == 'none':
1808 res += 'video only'
1809 else:
1810 res += '%-5s' % fdict['acodec']
1811 elif fdict.get('abr') is not None:
1812 if res:
1813 res += ', '
1814 res += 'audio'
1815 if fdict.get('abr') is not None:
1816 res += '@%3dk' % fdict['abr']
1817 if fdict.get('asr') is not None:
1818 res += ' (%5dHz)' % fdict['asr']
1819 if fdict.get('filesize') is not None:
1820 if res:
1821 res += ', '
1822 res += format_bytes(fdict['filesize'])
9732d77e
PH
1823 elif fdict.get('filesize_approx') is not None:
1824 if res:
1825 res += ', '
1826 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1827 return res
91c7271a 1828
c57f7757 1829 def list_formats(self, info_dict):
94badb25 1830 formats = info_dict.get('formats', [info_dict])
b81a359e
PH
1831 table = [
1832 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1833 for f in formats
e65566a9 1834 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 1835 if len(formats) > 1:
b81a359e 1836 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
57dd9a8f 1837
b81a359e 1838 header_line = ['format code', 'extension', 'resolution', 'note']
cfb56d1a 1839 self.to_screen(
b81a359e
PH
1840 '[info] Available formats for %s:\n%s' %
1841 (info_dict['id'], render_table(header_line, table)))
cfb56d1a
PH
1842
1843 def list_thumbnails(self, info_dict):
1844 thumbnails = info_dict.get('thumbnails')
1845 if not thumbnails:
1846 tn_url = info_dict.get('thumbnail')
1847 if tn_url:
1848 thumbnails = [{'id': '0', 'url': tn_url}]
1849 else:
1850 self.to_screen(
1851 '[info] No thumbnails present for %s' % info_dict['id'])
1852 return
1853
1854 self.to_screen(
1855 '[info] Thumbnails for %s:' % info_dict['id'])
1856 self.to_screen(render_table(
1857 ['ID', 'width', 'height', 'URL'],
1858 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 1859
360e1ca5 1860 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 1861 if not subtitles:
360e1ca5 1862 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 1863 return
a504ced0 1864 self.to_screen(
edab9dbf
JMF
1865 'Available %s for %s:' % (name, video_id))
1866 self.to_screen(render_table(
1867 ['Language', 'formats'],
1868 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1869 for lang, formats in subtitles.items()]))
a504ced0 1870
dca08720
PH
1871 def urlopen(self, req):
1872 """ Start an HTTP download """
19a41fc6 1873 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1874
1875 def print_debug_header(self):
1876 if not self.params.get('verbose'):
1877 return
62fec3b2 1878
4192b51c
PH
1879 if type('') is not compat_str:
1880 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1881 self.report_warning(
1882 'Your Python is broken! Update to a newer and supported version')
1883
c6afed48
PH
1884 stdout_encoding = getattr(
1885 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1886 encoding_str = (
734f90bb
PH
1887 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1888 locale.getpreferredencoding(),
1889 sys.getfilesystemencoding(),
c6afed48 1890 stdout_encoding,
b0472057 1891 self.get_encoding()))
4192b51c 1892 write_string(encoding_str, encoding=None)
734f90bb
PH
1893
1894 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1895 try:
1896 sp = subprocess.Popen(
1897 ['git', 'rev-parse', '--short', 'HEAD'],
1898 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1899 cwd=os.path.dirname(os.path.abspath(__file__)))
1900 out, err = sp.communicate()
1901 out = out.decode().strip()
1902 if re.match('[0-9a-f]+', out):
734f90bb 1903 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 1904 except Exception:
dca08720
PH
1905 try:
1906 sys.exc_clear()
70a1165b 1907 except Exception:
dca08720 1908 pass
d28b5171
PH
1909 self._write_string('[debug] Python version %s - %s\n' % (
1910 platform.python_version(), platform_name()))
1911
73fac4e9 1912 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 1913 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1914 exe_str = ', '.join(
1915 '%s %s' % (exe, v)
1916 for exe, v in sorted(exe_versions.items())
1917 if v
1918 )
1919 if not exe_str:
1920 exe_str = 'none'
1921 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1922
1923 proxy_map = {}
1924 for handler in self._opener.handlers:
1925 if hasattr(handler, 'proxies'):
1926 proxy_map.update(handler.proxies)
734f90bb 1927 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1928
58b1f00d
PH
1929 if self.params.get('call_home', False):
1930 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1931 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1932 latest_version = self.urlopen(
1933 'https://yt-dl.org/latest/version').read().decode('utf-8')
1934 if version_tuple(latest_version) > version_tuple(__version__):
1935 self.report_warning(
1936 'You are using an outdated version (newest version: %s)! '
1937 'See https://yt-dl.org/update if you need help updating.' %
1938 latest_version)
1939
e344693b 1940 def _setup_opener(self):
6ad14cab 1941 timeout_val = self.params.get('socket_timeout')
19a41fc6 1942 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1943
dca08720
PH
1944 opts_cookiefile = self.params.get('cookiefile')
1945 opts_proxy = self.params.get('proxy')
1946
1947 if opts_cookiefile is None:
1948 self.cookiejar = compat_cookiejar.CookieJar()
1949 else:
1950 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1951 opts_cookiefile)
1952 if os.access(opts_cookiefile, os.R_OK):
1953 self.cookiejar.load()
1954
6a3f4c3f 1955 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
1956 if opts_proxy is not None:
1957 if opts_proxy == '':
1958 proxies = {}
1959 else:
1960 proxies = {'http': opts_proxy, 'https': opts_proxy}
1961 else:
1962 proxies = compat_urllib_request.getproxies()
1963 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1964 if 'http' in proxies and 'https' not in proxies:
1965 proxies['https'] = proxies['http']
91410c9b 1966 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
1967
1968 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
1969 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1970 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
8b172c2e 1971 data_handler = compat_urllib_request_DataHandler()
dca08720 1972 opener = compat_urllib_request.build_opener(
8b172c2e 1973 proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
2461f79d 1974
dca08720
PH
1975 # Delete the default user-agent header, which would otherwise apply in
1976 # cases where our custom HTTP handler doesn't come into play
1977 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1978 opener.addheaders = []
1979 self._opener = opener
62fec3b2
PH
1980
1981 def encode(self, s):
1982 if isinstance(s, bytes):
1983 return s # Already encoded
1984
1985 try:
1986 return s.encode(self.get_encoding())
1987 except UnicodeEncodeError as err:
1988 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1989 raise
1990
1991 def get_encoding(self):
1992 encoding = self.params.get('encoding')
1993 if encoding is None:
1994 encoding = preferredencoding()
1995 return encoding
ec82d85a
PH
1996
1997 def _write_thumbnails(self, info_dict, filename):
1998 if self.params.get('writethumbnail', False):
1999 thumbnails = info_dict.get('thumbnails')
2000 if thumbnails:
2001 thumbnails = [thumbnails[-1]]
2002 elif self.params.get('write_all_thumbnails', False):
2003 thumbnails = info_dict.get('thumbnails')
2004 else:
2005 return
2006
2007 if not thumbnails:
2008 # No thumbnails present, so return immediately
2009 return
2010
2011 for t in thumbnails:
2012 thumb_ext = determine_ext(t['url'], 'jpg')
2013 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2014 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
82245a6d 2015 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
ec82d85a
PH
2016
2017 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2018 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2019 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2020 else:
2021 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2022 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2023 try:
2024 uf = self.urlopen(t['url'])
d3d89c32 2025 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a
PH
2026 shutil.copyfileobj(uf, thumbf)
2027 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2028 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2029 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2030 self.report_warning('Unable to download thumbnail "%s": %s' %
2031 (t['url'], compat_str(err)))