]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
Properly convert errors to strings
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
8222d8de 11import io
b82f815f 12import itertools
8694c600 13import json
62fec3b2 14import locale
083c9df9 15import operator
8222d8de 16import os
dca08720 17import platform
8222d8de
JMF
18import re
19import shutil
dca08720 20import subprocess
8222d8de
JMF
21import socket
22import sys
23import time
67134eab 24import tokenize
8222d8de
JMF
25import traceback
26
1e5b9a95
PH
27if os.name == 'nt':
28 import ctypes
29
8c25f81b 30from .compat import (
82d8a8b6 31 compat_basestring,
dca08720 32 compat_cookiejar,
4644ac55 33 compat_expanduser,
003c69a8 34 compat_get_terminal_size,
ce02ed60 35 compat_http_client,
4f026faf 36 compat_kwargs,
ce02ed60 37 compat_str,
67134eab 38 compat_tokenize_tokenize,
ce02ed60
PH
39 compat_urllib_error,
40 compat_urllib_request,
8b172c2e 41 compat_urllib_request_DataHandler,
8c25f81b
PH
42)
43from .utils import (
ce02ed60
PH
44 ContentTooShortError,
45 date_from_str,
46 DateRange,
acd69589 47 DEFAULT_OUTTMPL,
ce02ed60
PH
48 determine_ext,
49 DownloadError,
50 encodeFilename,
7f8b2714 51 error_to_str,
ce02ed60 52 ExtractorError,
02dbf93f 53 format_bytes,
525ef922 54 formatSeconds,
ce02ed60 55 locked_file,
dca08720 56 make_HTTPS_handler,
ce02ed60 57 MaxDownloadsReached,
b7ab0590 58 PagedList,
083c9df9 59 parse_filesize,
91410c9b 60 PerRequestProxyHandler,
ce02ed60 61 PostProcessingError,
dca08720 62 platform_name,
ce02ed60 63 preferredencoding,
cfb56d1a 64 render_table,
ce02ed60
PH
65 SameFileError,
66 sanitize_filename,
1bb5c511 67 sanitize_path,
67dda517 68 sanitized_Request,
e5660ee6 69 std_headers,
ce02ed60 70 subtitles_filename,
ce02ed60 71 UnavailableVideoError,
29eb5174 72 url_basename,
58b1f00d 73 version_tuple,
ce02ed60
PH
74 write_json_file,
75 write_string,
6a3f4c3f 76 YoutubeDLCookieProcessor,
dca08720 77 YoutubeDLHandler,
6350728b 78 prepend_extension,
b29e0000 79 replace_extension,
7d4111ed 80 args_to_str,
05900629 81 age_restricted,
ce02ed60 82)
a0e07d31 83from .cache import Cache
023fa8c4 84from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 85from .downloader import get_suitable_downloader
4c83c967 86from .downloader.rtmp import rtmpdump_version
4f026faf 87from .postprocessor import (
62cd676c 88 FFmpegFixupM4aPP,
6271f1ca 89 FFmpegFixupStretchedPP,
4f026faf
PH
90 FFmpegMergerPP,
91 FFmpegPostProcessor,
92 get_postprocessor,
93)
dca08720 94from .version import __version__
8222d8de
JMF
95
96
97class YoutubeDL(object):
98 """YoutubeDL class.
99
100 YoutubeDL objects are the ones responsible of downloading the
101 actual video file and writing it to disk if the user has requested
102 it, among some other tasks. In most cases there should be one per
103 program. As, given a video URL, the downloader doesn't know how to
104 extract all the needed information, task that InfoExtractors do, it
105 has to pass the URL to one of them.
106
107 For this, YoutubeDL objects have a method that allows
108 InfoExtractors to be registered in a given order. When it is passed
109 a URL, the YoutubeDL object handles it to the first InfoExtractor it
110 finds that reports being able to handle it. The InfoExtractor extracts
111 all the information about the video or videos the URL refers to, and
112 YoutubeDL process the extracted information, possibly using a File
113 Downloader to download the video.
114
115 YoutubeDL objects accept a lot of parameters. In order not to saturate
116 the object constructor with arguments, it receives a dictionary of
117 options instead. These options are available through the params
118 attribute for the InfoExtractors to use. The YoutubeDL also
119 registers itself as the downloader in charge for the InfoExtractors
120 that are added to it, so this is a "mutual registration".
121
122 Available options:
123
124 username: Username for authentication purposes.
125 password: Password for authentication purposes.
180940e0 126 videopassword: Password for accessing a video.
8222d8de
JMF
127 usenetrc: Use netrc for authentication instead.
128 verbose: Print additional info to stdout.
129 quiet: Do not print messages to stdout.
ad8915b7 130 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
131 forceurl: Force printing final URL.
132 forcetitle: Force printing title.
133 forceid: Force printing ID.
134 forcethumbnail: Force printing thumbnail URL.
135 forcedescription: Force printing description.
136 forcefilename: Force printing final filename.
525ef922 137 forceduration: Force printing duration.
8694c600 138 forcejson: Force printing info_dict as JSON.
63e0be34
PH
139 dump_single_json: Force printing the info_dict of the whole playlist
140 (or video) as a single JSON line.
8222d8de 141 simulate: Do not download the video files.
d8600787 142 format: Video format code. See options.py for more information.
8222d8de
JMF
143 outtmpl: Template for output names.
144 restrictfilenames: Do not allow "&" and spaces in file names
145 ignoreerrors: Do not stop on download errors.
d22dec74 146 force_generic_extractor: Force downloader to use the generic extractor
8222d8de
JMF
147 nooverwrites: Prevent overwriting files.
148 playliststart: Playlist item to start at.
149 playlistend: Playlist item to end at.
c14e88f0 150 playlist_items: Specific indices of playlist to download.
ff815fe6 151 playlistreverse: Download playlist items in reverse order.
8222d8de
JMF
152 matchtitle: Download only matching titles.
153 rejecttitle: Reject downloads for matching titles.
8bf9319e 154 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
155 logtostderr: Log messages to stderr instead of stdout.
156 writedescription: Write the video description to a .description file
157 writeinfojson: Write the video description to a .info.json file
1fb07d10 158 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 159 writethumbnail: Write the thumbnail image to a file
ec82d85a 160 write_all_thumbnails: Write all thumbnail formats to files
8222d8de 161 writesubtitles: Write the video subtitles to a file
741dd8ea 162 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 163 allsubtitles: Downloads all the subtitles of the video
0b7f3118 164 (requires writesubtitles or writeautomaticsub)
8222d8de 165 listsubtitles: Lists all available subtitles for the video
a504ced0 166 subtitlesformat: The format code for subtitles
aa6a10c4 167 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
168 keepvideo: Keep the video file after post-processing
169 daterange: A DateRange object, download only if the upload_date is in the range.
170 skip_download: Skip the actual download of the video file
c35f9e72 171 cachedir: Location of the cache files in the filesystem.
a0e07d31 172 False to disable filesystem cache.
47192f92 173 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
174 age_limit: An integer representing the user's age in years.
175 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
176 min_views: An integer representing the minimum view count the video
177 must have in order to not be skipped.
178 Videos without view count information are always
179 downloaded. None for no limit.
180 max_views: An integer representing the maximum view count.
181 Videos that are more popular than that are not
182 downloaded.
183 Videos without view count information are always
184 downloaded. None for no limit.
185 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
186 Videos already present in the file are not downloaded
187 again.
dca08720 188 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 189 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
190 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
191 At the moment, this is only supported by YouTube.
a1ee09e8 192 proxy: URL of the proxy server to use
91410c9b
PH
193 cn_verification_proxy: URL of the proxy to use for IP address verification
194 on Chinese sites. (Experimental)
e344693b 195 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
196 bidi_workaround: Work around buggy terminals without bidirectional text
197 support, using fridibi
a0ddb8a2 198 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 199 include_ads: Download ads as well
04b4d394
PH
200 default_search: Prepend this string if an input url is not valid.
201 'auto' for elaborate guessing
62fec3b2 202 encoding: Use this encoding instead of the system-specified.
e8ee972c 203 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
204 Pass in 'in_playlist' to only show this behavior for
205 playlist items.
4f026faf 206 postprocessors: A list of dictionaries, each with an entry
71b640cc
PH
207 * key: The name of the postprocessor. See
208 youtube_dl/postprocessor/__init__.py for a list.
4f026faf
PH
209 as well as any further keyword arguments for the
210 postprocessor.
71b640cc
PH
211 progress_hooks: A list of functions that get called on download
212 progress, with a dictionary with the entries
5cda4eda 213 * status: One of "downloading", "error", or "finished".
ee69b99a 214 Check this first and ignore unknown values.
71b640cc 215
5cda4eda 216 If status is one of "downloading", or "finished", the
ee69b99a
PH
217 following properties may also be present:
218 * filename: The final filename (always present)
5cda4eda 219 * tmpfilename: The filename we're currently writing to
71b640cc
PH
220 * downloaded_bytes: Bytes on disk
221 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
222 * total_bytes_estimate: Guess of the eventual file size,
223 None if unavailable.
224 * elapsed: The number of seconds since download started.
71b640cc
PH
225 * eta: The estimated time in seconds, None if unknown
226 * speed: The download speed in bytes/second, None if
227 unknown
5cda4eda
PH
228 * fragment_index: The counter of the currently
229 downloaded video fragment.
230 * fragment_count: The number of fragments (= individual
231 files that will be merged)
71b640cc
PH
232
233 Progress hooks are guaranteed to be called at least once
234 (with status "finished") if the download is successful.
45598f15 235 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
236 fixup: Automatically correct known faults of the file.
237 One of:
238 - "never": do nothing
239 - "warn": only emit a warning
240 - "detect_or_warn": check whether we can do anything
62cd676c 241 about it, warn otherwise (default)
be4a824d 242 source_address: (Experimental) Client-side IP address to bind to.
8bfa7545
PH
243 call_home: Boolean, true iff we are allowed to contact the
244 youtube-dl servers for debugging.
5f0d813d 245 sleep_interval: Number of seconds to sleep before each download.
cfb56d1a
PH
246 listformats: Print an overview of available video formats and exit.
247 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
248 match_filter: A function that gets called with the info_dict of
249 every video.
250 If it returns a message, the video is ignored.
251 If it returns None, the video is downloaded.
252 match_filter_func in utils.py is one example for this.
7e5db8c9 253 no_color: Do not emit color codes in output.
71b640cc 254
85729c51
PH
255 The following options determine which downloader is picked:
256 external_downloader: Executable of the external downloader to call.
257 None or unset for standard (built-in) downloader.
258 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
fe7e0c98 259
8222d8de 260 The following parameters are not used by YoutubeDL itself, they are used by
c75f0b36 261 the downloader (see youtube_dl/downloader/common.py):
8222d8de 262 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 263 noresizebuffer, retries, continuedl, noprogress, consoletitle,
c75f0b36 264 xattr_set_filesize, external_downloader_args.
76b1bd67
JMF
265
266 The following options are used by the post processors:
267 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
268 otherwise prefer avconv.
f72b0a60
S
269 postprocessor_args: A list of additional command-line arguments for the
270 postprocessor.
8222d8de
JMF
271 """
272
273 params = None
274 _ies = []
275 _pps = []
276 _download_retcode = None
277 _num_downloads = None
278 _screen_file = None
279
3511266b 280 def __init__(self, params=None, auto_init=True):
8222d8de 281 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
282 if params is None:
283 params = {}
8222d8de 284 self._ies = []
56c73665 285 self._ies_instances = {}
8222d8de 286 self._pps = []
933605d7 287 self._progress_hooks = []
8222d8de
JMF
288 self._download_retcode = 0
289 self._num_downloads = 0
290 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 291 self._err_file = sys.stderr
4abf617b
S
292 self.params = {
293 # Default parameters
294 'nocheckcertificate': False,
295 }
296 self.params.update(params)
a0e07d31 297 self.cache = Cache(self)
34308b30 298
0783b09b 299 if params.get('bidi_workaround', False):
1c088fa8
PH
300 try:
301 import pty
302 master, slave = pty.openpty()
003c69a8 303 width = compat_get_terminal_size().columns
1c088fa8
PH
304 if width is None:
305 width_args = []
306 else:
307 width_args = ['-w', str(width)]
5d681e96 308 sp_kwargs = dict(
1c088fa8
PH
309 stdin=subprocess.PIPE,
310 stdout=slave,
311 stderr=self._err_file)
5d681e96
PH
312 try:
313 self._output_process = subprocess.Popen(
314 ['bidiv'] + width_args, **sp_kwargs
315 )
316 except OSError:
5d681e96
PH
317 self._output_process = subprocess.Popen(
318 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
319 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
320 except OSError as ose:
321 if ose.errno == 2:
6febd1c1 322 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
323 else:
324 raise
0783b09b 325
34308b30 326 if (sys.version_info >= (3,) and sys.platform != 'win32' and
8fb3ac36
PH
327 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
328 not params.get('restrictfilenames', False)):
34308b30
PH
329 # On Python 3, the Unicode filesystem API will throw errors (#1474)
330 self.report_warning(
6febd1c1 331 'Assuming --restrict-filenames since file system encoding '
1b725173 332 'cannot encode all characters. '
6febd1c1 333 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 334 self.params['restrictfilenames'] = True
34308b30 335
486dd09e
PH
336 if isinstance(params.get('outtmpl'), bytes):
337 self.report_warning(
338 'Parameter outtmpl is bytes, but should be a unicode string. '
339 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
340
dca08720
PH
341 self._setup_opener()
342
3511266b
PH
343 if auto_init:
344 self.print_debug_header()
345 self.add_default_info_extractors()
346
4f026faf
PH
347 for pp_def_raw in self.params.get('postprocessors', []):
348 pp_class = get_postprocessor(pp_def_raw['key'])
349 pp_def = dict(pp_def_raw)
350 del pp_def['key']
351 pp = pp_class(self, **compat_kwargs(pp_def))
352 self.add_post_processor(pp)
353
71b640cc
PH
354 for ph in self.params.get('progress_hooks', []):
355 self.add_progress_hook(ph)
356
7d4111ed
PH
357 def warn_if_short_id(self, argv):
358 # short YouTube ID starting with dash?
359 idxs = [
360 i for i, a in enumerate(argv)
361 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
362 if idxs:
363 correct_argv = (
364 ['youtube-dl'] +
365 [a for i, a in enumerate(argv) if i not in idxs] +
366 ['--'] + [argv[i] for i in idxs]
367 )
368 self.report_warning(
369 'Long argument string detected. '
370 'Use -- to separate parameters and URLs, like this:\n%s\n' %
371 args_to_str(correct_argv))
372
8222d8de
JMF
373 def add_info_extractor(self, ie):
374 """Add an InfoExtractor object to the end of the list."""
375 self._ies.append(ie)
56c73665 376 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
377 ie.set_downloader(self)
378
56c73665
JMF
379 def get_info_extractor(self, ie_key):
380 """
381 Get an instance of an IE with name ie_key, it will try to get one from
382 the _ies list, if there's no instance it will create a new one and add
383 it to the extractor list.
384 """
385 ie = self._ies_instances.get(ie_key)
386 if ie is None:
387 ie = get_info_extractor(ie_key)()
388 self.add_info_extractor(ie)
389 return ie
390
023fa8c4
JMF
391 def add_default_info_extractors(self):
392 """
393 Add the InfoExtractors returned by gen_extractors to the end of the list
394 """
395 for ie in gen_extractors():
396 self.add_info_extractor(ie)
397
8222d8de
JMF
398 def add_post_processor(self, pp):
399 """Add a PostProcessor object to the end of the chain."""
400 self._pps.append(pp)
401 pp.set_downloader(self)
402
933605d7
JMF
403 def add_progress_hook(self, ph):
404 """Add the progress hook (currently only for the file downloader)"""
405 self._progress_hooks.append(ph)
8ab470f1 406
1c088fa8 407 def _bidi_workaround(self, message):
5d681e96 408 if not hasattr(self, '_output_channel'):
1c088fa8
PH
409 return message
410
5d681e96 411 assert hasattr(self, '_output_process')
11b85ce6 412 assert isinstance(message, compat_str)
6febd1c1
PH
413 line_count = message.count('\n') + 1
414 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 415 self._output_process.stdin.flush()
6febd1c1 416 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 417 for _ in range(line_count))
6febd1c1 418 return res[:-len('\n')]
1c088fa8 419
8222d8de 420 def to_screen(self, message, skip_eol=False):
0783b09b
PH
421 """Print message to stdout if not in quiet mode."""
422 return self.to_stdout(message, skip_eol, check_quiet=True)
423
734f90bb 424 def _write_string(self, s, out=None):
b58ddb32 425 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 426
0783b09b 427 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 428 """Print message to stdout if not in quiet mode."""
8bf9319e 429 if self.params.get('logger'):
43afe285 430 self.params['logger'].debug(message)
0783b09b 431 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 432 message = self._bidi_workaround(message)
6febd1c1 433 terminator = ['\n', ''][skip_eol]
8222d8de 434 output = message + terminator
1c088fa8 435
734f90bb 436 self._write_string(output, self._screen_file)
8222d8de
JMF
437
438 def to_stderr(self, message):
439 """Print message to stderr."""
11b85ce6 440 assert isinstance(message, compat_str)
8bf9319e 441 if self.params.get('logger'):
43afe285
IB
442 self.params['logger'].error(message)
443 else:
1c088fa8 444 message = self._bidi_workaround(message)
6febd1c1 445 output = message + '\n'
734f90bb 446 self._write_string(output, self._err_file)
8222d8de 447
1e5b9a95
PH
448 def to_console_title(self, message):
449 if not self.params.get('consoletitle', False):
450 return
451 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
452 # c_wchar_p() might not be necessary if `message` is
453 # already of type unicode()
454 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
455 elif 'TERM' in os.environ:
734f90bb 456 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 457
bdde425c
PH
458 def save_console_title(self):
459 if not self.params.get('consoletitle', False):
460 return
461 if 'TERM' in os.environ:
efd6c574 462 # Save the title on stack
734f90bb 463 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
464
465 def restore_console_title(self):
466 if not self.params.get('consoletitle', False):
467 return
468 if 'TERM' in os.environ:
efd6c574 469 # Restore the title from stack
734f90bb 470 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
471
472 def __enter__(self):
473 self.save_console_title()
474 return self
475
476 def __exit__(self, *args):
477 self.restore_console_title()
f89197d7 478
dca08720
PH
479 if self.params.get('cookiefile') is not None:
480 self.cookiejar.save()
bdde425c 481
8222d8de
JMF
482 def trouble(self, message=None, tb=None):
483 """Determine action to take when a download problem appears.
484
485 Depending on if the downloader has been configured to ignore
486 download errors or not, this method may throw an exception or
487 not when errors are found, after printing the message.
488
489 tb, if given, is additional traceback information.
490 """
491 if message is not None:
492 self.to_stderr(message)
493 if self.params.get('verbose'):
494 if tb is None:
495 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 496 tb = ''
8222d8de 497 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 498 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
499 tb += compat_str(traceback.format_exc())
500 else:
501 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 502 tb = ''.join(tb_data)
8222d8de
JMF
503 self.to_stderr(tb)
504 if not self.params.get('ignoreerrors', False):
505 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
506 exc_info = sys.exc_info()[1].exc_info
507 else:
508 exc_info = sys.exc_info()
509 raise DownloadError(message, exc_info)
510 self._download_retcode = 1
511
512 def report_warning(self, message):
513 '''
514 Print the message to stderr, it will be prefixed with 'WARNING:'
515 If stderr is a tty file the 'WARNING:' will be colored
516 '''
6d07ce01
JMF
517 if self.params.get('logger') is not None:
518 self.params['logger'].warning(message)
8222d8de 519 else:
ad8915b7
PH
520 if self.params.get('no_warnings'):
521 return
7e5db8c9 522 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
6d07ce01
JMF
523 _msg_header = '\033[0;33mWARNING:\033[0m'
524 else:
525 _msg_header = 'WARNING:'
526 warning_message = '%s %s' % (_msg_header, message)
527 self.to_stderr(warning_message)
8222d8de
JMF
528
529 def report_error(self, message, tb=None):
530 '''
531 Do the same as trouble, but prefixes the message with 'ERROR:', colored
532 in red if stderr is a tty file.
533 '''
7e5db8c9 534 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
6febd1c1 535 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 536 else:
6febd1c1
PH
537 _msg_header = 'ERROR:'
538 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
539 self.trouble(error_message, tb)
540
8222d8de
JMF
541 def report_file_already_downloaded(self, file_name):
542 """Report file has already been fully downloaded."""
543 try:
6febd1c1 544 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 545 except UnicodeEncodeError:
6febd1c1 546 self.to_screen('[download] The file has already been downloaded')
8222d8de 547
8222d8de
JMF
548 def prepare_filename(self, info_dict):
549 """Generate the output filename."""
550 try:
551 template_dict = dict(info_dict)
552
553 template_dict['epoch'] = int(time.time())
554 autonumber_size = self.params.get('autonumber_size')
555 if autonumber_size is None:
556 autonumber_size = 5
6febd1c1 557 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 558 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 559 if template_dict.get('playlist_index') is not None:
c6b4132a 560 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
561 if template_dict.get('resolution') is None:
562 if template_dict.get('width') and template_dict.get('height'):
563 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
564 elif template_dict.get('height'):
805ef3c6 565 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 566 elif template_dict.get('width'):
805ef3c6 567 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 568
586a91b6 569 sanitize = lambda k, v: sanitize_filename(
45598aab 570 compat_str(v),
1bb5c511 571 restricted=self.params.get('restrictfilenames'),
6febd1c1 572 is_id=(k == 'id'))
586a91b6 573 template_dict = dict((k, sanitize(k, v))
45598aab
PH
574 for k, v in template_dict.items()
575 if v is not None)
6febd1c1 576 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 577
b3613d36 578 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
4644ac55 579 tmpl = compat_expanduser(outtmpl)
586a91b6 580 filename = tmpl % template_dict
3a0d2f52
S
581 # Temporary fix for #4787
582 # 'Treat' all problem characters by passing filename through preferredencoding
583 # to workaround encoding issues with subprocess on python2 @ Windows
584 if sys.version_info < (3, 0) and sys.platform == 'win32':
585 filename = encodeFilename(filename, True).decode(preferredencoding())
b3613d36 586 return sanitize_path(filename)
8222d8de 587 except ValueError as err:
6febd1c1 588 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
589 return None
590
442c37b7 591 def _match_entry(self, info_dict, incomplete):
8222d8de
JMF
592 """ Returns None iff the file should be downloaded """
593
6febd1c1 594 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
595 if 'title' in info_dict:
596 # This can happen when we're just evaluating the playlist
597 title = info_dict['title']
598 matchtitle = self.params.get('matchtitle', False)
599 if matchtitle:
600 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 601 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
602 rejecttitle = self.params.get('rejecttitle', False)
603 if rejecttitle:
604 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 605 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
606 date = info_dict.get('upload_date', None)
607 if date is not None:
608 dateRange = self.params.get('daterange', DateRange())
609 if date not in dateRange:
6febd1c1 610 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
611 view_count = info_dict.get('view_count', None)
612 if view_count is not None:
613 min_views = self.params.get('min_views')
614 if min_views is not None and view_count < min_views:
6febd1c1 615 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
616 max_views = self.params.get('max_views')
617 if max_views is not None and view_count > max_views:
6febd1c1 618 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629 619 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
347de493 620 return 'Skipping "%s" because it is age restricted' % video_title
c1c9a79c 621 if self.in_download_archive(info_dict):
6febd1c1 622 return '%s has already been recorded in archive' % video_title
347de493 623
442c37b7
PH
624 if not incomplete:
625 match_filter = self.params.get('match_filter')
626 if match_filter is not None:
627 ret = match_filter(info_dict)
628 if ret is not None:
629 return ret
347de493 630
8222d8de 631 return None
fe7e0c98 632
b6c45014
JMF
633 @staticmethod
634 def add_extra_info(info_dict, extra_info):
635 '''Set the keys from extra_info in info dict if they are missing'''
636 for key, value in extra_info.items():
637 info_dict.setdefault(key, value)
638
7fc3fa05 639 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 640 process=True, force_generic_extractor=False):
8222d8de
JMF
641 '''
642 Returns a list with a dictionary for each video we find.
643 If 'download', also downloads the videos.
644 extra_info is a dict containing the extra values to add to each result
613b2d9d 645 '''
fe7e0c98 646
61aa5ba3 647 if not ie_key and force_generic_extractor:
d22dec74
S
648 ie_key = 'Generic'
649
8222d8de 650 if ie_key:
56c73665 651 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
652 else:
653 ies = self._ies
654
655 for ie in ies:
656 if not ie.suitable(url):
657 continue
658
659 if not ie.working():
6febd1c1
PH
660 self.report_warning('The program functionality for this site has been marked as broken, '
661 'and will probably not work.')
8222d8de
JMF
662
663 try:
664 ie_result = ie.extract(url)
5f6a1245 665 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
666 break
667 if isinstance(ie_result, list):
668 # Backwards compatibility: old IE result format
8222d8de
JMF
669 ie_result = {
670 '_type': 'compat_list',
671 'entries': ie_result,
672 }
ea38e55f 673 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
674 if process:
675 return self.process_ie_result(ie_result, download, extra_info)
676 else:
677 return ie_result
5f6a1245 678 except ExtractorError as de: # An error we somewhat expected
8222d8de
JMF
679 self.report_error(compat_str(de), de.format_traceback())
680 break
d3e5bbf4
PH
681 except MaxDownloadsReached:
682 raise
8222d8de
JMF
683 except Exception as e:
684 if self.params.get('ignoreerrors', False):
7f8b2714 685 self.report_error(error_to_str(e), tb=compat_str(traceback.format_exc()))
8222d8de
JMF
686 break
687 else:
688 raise
689 else:
1a489545 690 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 691
ea38e55f
PH
692 def add_default_extra_info(self, ie_result, ie, url):
693 self.add_extra_info(ie_result, {
694 'extractor': ie.IE_NAME,
695 'webpage_url': url,
696 'webpage_url_basename': url_basename(url),
697 'extractor_key': ie.ie_key(),
698 })
699
8222d8de
JMF
700 def process_ie_result(self, ie_result, download=True, extra_info={}):
701 """
702 Take the result of the ie(may be modified) and resolve all unresolved
703 references (URLs, playlist items).
704
705 It will also download the videos if 'download'.
706 Returns the resolved ie_result.
707 """
708
e8ee972c
PH
709 result_type = ie_result.get('_type', 'video')
710
057a5206
PH
711 if result_type in ('url', 'url_transparent'):
712 extract_flat = self.params.get('extract_flat', False)
713 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
714 extract_flat is True):
057a5206
PH
715 if self.params.get('forcejson', False):
716 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
717 return ie_result
718
8222d8de 719 if result_type == 'video':
b6c45014 720 self.add_extra_info(ie_result, extra_info)
feee2ecf 721 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
722 elif result_type == 'url':
723 # We have to add extra_info to the results because it may be
724 # contained in a playlist
725 return self.extract_info(ie_result['url'],
726 download,
727 ie_key=ie_result.get('ie_key'),
728 extra_info=extra_info)
7fc3fa05
PH
729 elif result_type == 'url_transparent':
730 # Use the information from the embedding page
731 info = self.extract_info(
732 ie_result['url'], ie_key=ie_result.get('ie_key'),
733 extra_info=extra_info, download=False, process=False)
734
412c617d
PH
735 force_properties = dict(
736 (k, v) for k, v in ie_result.items() if v is not None)
737 for f in ('_type', 'url'):
738 if f in force_properties:
739 del force_properties[f]
740 new_result = info.copy()
741 new_result.update(force_properties)
7fc3fa05
PH
742
743 assert new_result.get('_type') != 'url_transparent'
7fc3fa05
PH
744
745 return self.process_ie_result(
746 new_result, download=download, extra_info=extra_info)
42e12102 747 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de
JMF
748 # We process each entry in the playlist
749 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 750 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
751
752 playlist_results = []
753
8222d8de 754 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
755 playlistend = self.params.get('playlistend', None)
756 # For backwards compatibility, interpret -1 as whole list
8222d8de 757 if playlistend == -1:
a19fd00c 758 playlistend = None
8222d8de 759
c14e88f0
PH
760 playlistitems_str = self.params.get('playlist_items', None)
761 playlistitems = None
762 if playlistitems_str is not None:
763 def iter_playlistitems(format):
764 for string_segment in format.split(','):
765 if '-' in string_segment:
766 start, end = string_segment.split('-')
767 for item in range(int(start), int(end) + 1):
768 yield int(item)
769 else:
770 yield int(string_segment)
771 playlistitems = iter_playlistitems(playlistitems_str)
772
b82f815f
PH
773 ie_entries = ie_result['entries']
774 if isinstance(ie_entries, list):
775 n_all_entries = len(ie_entries)
c14e88f0 776 if playlistitems:
3884dcf3
JMF
777 entries = [
778 ie_entries[i - 1] for i in playlistitems
779 if -n_all_entries <= i - 1 < n_all_entries]
c14e88f0
PH
780 else:
781 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
782 n_entries = len(entries)
783 self.to_screen(
784 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
785 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 786 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
787 if playlistitems:
788 entries = []
789 for item in playlistitems:
790 entries.extend(ie_entries.getslice(
791 item - 1, item
792 ))
793 else:
794 entries = ie_entries.getslice(
795 playliststart, playlistend)
b7ab0590
PH
796 n_entries = len(entries)
797 self.to_screen(
798 "[%s] playlist %s: Downloading %d videos" %
799 (ie_result['extractor'], playlist, n_entries))
b82f815f 800 else: # iterable
c14e88f0
PH
801 if playlistitems:
802 entry_list = list(ie_entries)
803 entries = [entry_list[i - 1] for i in playlistitems]
804 else:
805 entries = list(itertools.islice(
806 ie_entries, playliststart, playlistend))
b82f815f
PH
807 n_entries = len(entries)
808 self.to_screen(
809 "[%s] playlist %s: Downloading %d videos" %
810 (ie_result['extractor'], playlist, n_entries))
8222d8de 811
ff815fe6
MS
812 if self.params.get('playlistreverse', False):
813 entries = entries[::-1]
814
fe7e0c98 815 for i, entry in enumerate(entries, 1):
734ea11e 816 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
8222d8de 817 extra = {
c6b4132a 818 'n_entries': n_entries,
fe7e0c98 819 'playlist': playlist,
a1cf99d0
PH
820 'playlist_id': ie_result.get('id'),
821 'playlist_title': ie_result.get('title'),
fe7e0c98 822 'playlist_index': i + playliststart,
b6c45014 823 'extractor': ie_result['extractor'],
9103bbc5 824 'webpage_url': ie_result['webpage_url'],
29eb5174 825 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 826 'extractor_key': ie_result['extractor_key'],
fe7e0c98 827 }
7012b23c 828
442c37b7 829 reason = self._match_entry(entry, incomplete=True)
7012b23c 830 if reason is not None:
6febd1c1 831 self.to_screen('[download] ' + reason)
7012b23c
PH
832 continue
833
8222d8de
JMF
834 entry_result = self.process_ie_result(entry,
835 download=download,
836 extra_info=extra)
837 playlist_results.append(entry_result)
838 ie_result['entries'] = playlist_results
371c3b79 839 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
8222d8de
JMF
840 return ie_result
841 elif result_type == 'compat_list':
c9bf4114
PH
842 self.report_warning(
843 'Extractor %s returned a compat_list result. '
844 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 845
8222d8de 846 def _fixup(r):
9e1a5b84
JW
847 self.add_extra_info(
848 r,
9103bbc5
JMF
849 {
850 'extractor': ie_result['extractor'],
851 'webpage_url': ie_result['webpage_url'],
29eb5174 852 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 853 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
854 }
855 )
8222d8de
JMF
856 return r
857 ie_result['entries'] = [
b6c45014 858 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
859 for r in ie_result['entries']
860 ]
861 return ie_result
862 else:
863 raise Exception('Invalid result type: %s' % result_type)
864
67134eab
JMF
865 def _build_format_filter(self, filter_spec):
866 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
867
868 OPERATORS = {
869 '<': operator.lt,
870 '<=': operator.le,
871 '>': operator.gt,
872 '>=': operator.ge,
873 '=': operator.eq,
874 '!=': operator.ne,
875 }
67134eab 876 operator_rex = re.compile(r'''(?x)\s*
2ec19e95 877 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
083c9df9
PH
878 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
879 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 880 $
083c9df9 881 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 882 m = operator_rex.search(filter_spec)
9ddb6925
S
883 if m:
884 try:
885 comparison_value = int(m.group('value'))
886 except ValueError:
887 comparison_value = parse_filesize(m.group('value'))
888 if comparison_value is None:
889 comparison_value = parse_filesize(m.group('value') + 'B')
890 if comparison_value is None:
891 raise ValueError(
892 'Invalid value %r in format specification %r' % (
67134eab 893 m.group('value'), filter_spec))
9ddb6925
S
894 op = OPERATORS[m.group('op')]
895
083c9df9 896 if not m:
9ddb6925
S
897 STR_OPERATORS = {
898 '=': operator.eq,
899 '!=': operator.ne,
900 }
67134eab 901 str_operator_rex = re.compile(r'''(?x)
9ddb6925
S
902 \s*(?P<key>ext|acodec|vcodec|container|protocol)
903 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
904 \s*(?P<value>[a-zA-Z0-9_-]+)
67134eab 905 \s*$
9ddb6925 906 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 907 m = str_operator_rex.search(filter_spec)
9ddb6925
S
908 if m:
909 comparison_value = m.group('value')
910 op = STR_OPERATORS[m.group('op')]
083c9df9 911
9ddb6925 912 if not m:
67134eab 913 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
914
915 def _filter(f):
916 actual_value = f.get(m.group('key'))
917 if actual_value is None:
918 return m.group('none_inclusive')
919 return op(actual_value, comparison_value)
67134eab
JMF
920 return _filter
921
922 def build_format_selector(self, format_spec):
923 def syntax_error(note, start):
924 message = (
925 'Invalid format specification: '
926 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
927 return SyntaxError(message)
928
929 PICKFIRST = 'PICKFIRST'
930 MERGE = 'MERGE'
931 SINGLE = 'SINGLE'
0130afb7 932 GROUP = 'GROUP'
67134eab
JMF
933 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
934
935 def _parse_filter(tokens):
936 filter_parts = []
937 for type, string, start, _, _ in tokens:
938 if type == tokenize.OP and string == ']':
939 return ''.join(filter_parts)
940 else:
941 filter_parts.append(string)
942
232541df 943 def _remove_unused_ops(tokens):
17cc1534 944 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
945 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
946 ALLOWED_OPS = ('/', '+', ',', '(', ')')
947 last_string, last_start, last_end, last_line = None, None, None, None
948 for type, string, start, end, line in tokens:
949 if type == tokenize.OP and string == '[':
950 if last_string:
951 yield tokenize.NAME, last_string, last_start, last_end, last_line
952 last_string = None
953 yield type, string, start, end, line
954 # everything inside brackets will be handled by _parse_filter
955 for type, string, start, end, line in tokens:
956 yield type, string, start, end, line
957 if type == tokenize.OP and string == ']':
958 break
959 elif type == tokenize.OP and string in ALLOWED_OPS:
960 if last_string:
961 yield tokenize.NAME, last_string, last_start, last_end, last_line
962 last_string = None
963 yield type, string, start, end, line
964 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
965 if not last_string:
966 last_string = string
967 last_start = start
968 last_end = end
969 else:
970 last_string += string
971 if last_string:
972 yield tokenize.NAME, last_string, last_start, last_end, last_line
973
cf2ac6df 974 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
975 selectors = []
976 current_selector = None
977 for type, string, start, _, _ in tokens:
978 # ENCODING is only defined in python 3.x
979 if type == getattr(tokenize, 'ENCODING', None):
980 continue
981 elif type in [tokenize.NAME, tokenize.NUMBER]:
982 current_selector = FormatSelector(SINGLE, string, [])
983 elif type == tokenize.OP:
cf2ac6df
JMF
984 if string == ')':
985 if not inside_group:
986 # ')' will be handled by the parentheses group
987 tokens.restore_last_token()
67134eab 988 break
cf2ac6df 989 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
990 tokens.restore_last_token()
991 break
cf2ac6df
JMF
992 elif inside_choice and string == ',':
993 tokens.restore_last_token()
994 break
995 elif string == ',':
0a31a350
JMF
996 if not current_selector:
997 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
998 selectors.append(current_selector)
999 current_selector = None
1000 elif string == '/':
d96d604e
JMF
1001 if not current_selector:
1002 raise syntax_error('"/" must follow a format selector', start)
67134eab 1003 first_choice = current_selector
cf2ac6df 1004 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1005 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1006 elif string == '[':
1007 if not current_selector:
1008 current_selector = FormatSelector(SINGLE, 'best', [])
1009 format_filter = _parse_filter(tokens)
1010 current_selector.filters.append(format_filter)
0130afb7
JMF
1011 elif string == '(':
1012 if current_selector:
1013 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1014 group = _parse_format_selection(tokens, inside_group=True)
1015 current_selector = FormatSelector(GROUP, group, [])
67134eab
JMF
1016 elif string == '+':
1017 video_selector = current_selector
cf2ac6df 1018 audio_selector = _parse_format_selection(tokens, inside_merge=True)
0a31a350
JMF
1019 if not video_selector or not audio_selector:
1020 raise syntax_error('"+" must be between two format selectors', start)
cf2ac6df 1021 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
67134eab
JMF
1022 else:
1023 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1024 elif type == tokenize.ENDMARKER:
1025 break
1026 if current_selector:
1027 selectors.append(current_selector)
1028 return selectors
1029
1030 def _build_selector_function(selector):
1031 if isinstance(selector, list):
1032 fs = [_build_selector_function(s) for s in selector]
1033
1034 def selector_function(formats):
1035 for f in fs:
1036 for format in f(formats):
1037 yield format
1038 return selector_function
0130afb7
JMF
1039 elif selector.type == GROUP:
1040 selector_function = _build_selector_function(selector.selector)
67134eab
JMF
1041 elif selector.type == PICKFIRST:
1042 fs = [_build_selector_function(s) for s in selector.selector]
1043
1044 def selector_function(formats):
1045 for f in fs:
1046 picked_formats = list(f(formats))
1047 if picked_formats:
1048 return picked_formats
1049 return []
1050 elif selector.type == SINGLE:
1051 format_spec = selector.selector
1052
1053 def selector_function(formats):
bb8e5536
JMF
1054 formats = list(formats)
1055 if not formats:
1056 return
5acfa126
JMF
1057 if format_spec == 'all':
1058 for f in formats:
1059 yield f
1060 elif format_spec in ['best', 'worst', None]:
67134eab
JMF
1061 format_idx = 0 if format_spec == 'worst' else -1
1062 audiovideo_formats = [
1063 f for f in formats
1064 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1065 if audiovideo_formats:
1066 yield audiovideo_formats[format_idx]
1067 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1068 elif (all(f.get('acodec') != 'none' for f in formats) or
1069 all(f.get('vcodec') != 'none' for f in formats)):
1070 yield formats[format_idx]
1071 elif format_spec == 'bestaudio':
1072 audio_formats = [
1073 f for f in formats
1074 if f.get('vcodec') == 'none']
1075 if audio_formats:
1076 yield audio_formats[-1]
1077 elif format_spec == 'worstaudio':
1078 audio_formats = [
1079 f for f in formats
1080 if f.get('vcodec') == 'none']
1081 if audio_formats:
1082 yield audio_formats[0]
1083 elif format_spec == 'bestvideo':
1084 video_formats = [
1085 f for f in formats
1086 if f.get('acodec') == 'none']
1087 if video_formats:
1088 yield video_formats[-1]
1089 elif format_spec == 'worstvideo':
1090 video_formats = [
1091 f for f in formats
1092 if f.get('acodec') == 'none']
1093 if video_formats:
1094 yield video_formats[0]
1095 else:
1096 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1097 if format_spec in extensions:
1098 filter_f = lambda f: f['ext'] == format_spec
1099 else:
1100 filter_f = lambda f: f['format_id'] == format_spec
1101 matches = list(filter(filter_f, formats))
1102 if matches:
1103 yield matches[-1]
1104 elif selector.type == MERGE:
1105 def _merge(formats_info):
1106 format_1, format_2 = [f['format_id'] for f in formats_info]
1107 # The first format must contain the video and the
1108 # second the audio
1109 if formats_info[0].get('vcodec') == 'none':
1110 self.report_error('The first format must '
1111 'contain the video, try using '
1112 '"-f %s+%s"' % (format_2, format_1))
1113 return
3d24bbfb
S
1114 # Formats must be opposite (video+audio)
1115 if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1116 self.report_error(
1117 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1118 % (format_1, format_2))
1119 return
67134eab
JMF
1120 output_ext = (
1121 formats_info[0]['ext']
1122 if self.params.get('merge_output_format') is None
1123 else self.params['merge_output_format'])
1124 return {
1125 'requested_formats': formats_info,
1126 'format': '%s+%s' % (formats_info[0].get('format'),
1127 formats_info[1].get('format')),
1128 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1129 formats_info[1].get('format_id')),
1130 'width': formats_info[0].get('width'),
1131 'height': formats_info[0].get('height'),
1132 'resolution': formats_info[0].get('resolution'),
1133 'fps': formats_info[0].get('fps'),
1134 'vcodec': formats_info[0].get('vcodec'),
1135 'vbr': formats_info[0].get('vbr'),
1136 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1137 'acodec': formats_info[1].get('acodec'),
1138 'abr': formats_info[1].get('abr'),
1139 'ext': output_ext,
1140 }
1141 video_selector, audio_selector = map(_build_selector_function, selector.selector)
083c9df9 1142
67134eab
JMF
1143 def selector_function(formats):
1144 formats = list(formats)
1145 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1146 yield _merge(pair)
083c9df9 1147
67134eab 1148 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1149
67134eab
JMF
1150 def final_selector(formats):
1151 for _filter in filters:
1152 formats = list(filter(_filter, formats))
1153 return selector_function(formats)
1154 return final_selector
083c9df9 1155
67134eab 1156 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1157 try:
232541df 1158 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1159 except tokenize.TokenError:
1160 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1161
1162 class TokenIterator(object):
1163 def __init__(self, tokens):
1164 self.tokens = tokens
1165 self.counter = 0
1166
1167 def __iter__(self):
1168 return self
1169
1170 def __next__(self):
1171 if self.counter >= len(self.tokens):
1172 raise StopIteration()
1173 value = self.tokens[self.counter]
1174 self.counter += 1
1175 return value
1176
1177 next = __next__
1178
1179 def restore_last_token(self):
1180 self.counter -= 1
1181
1182 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1183 return _build_selector_function(parsed_selector)
a9c58ad9 1184
e5660ee6
JMF
1185 def _calc_headers(self, info_dict):
1186 res = std_headers.copy()
1187
1188 add_headers = info_dict.get('http_headers')
1189 if add_headers:
1190 res.update(add_headers)
1191
1192 cookies = self._calc_cookies(info_dict)
1193 if cookies:
1194 res['Cookie'] = cookies
1195
1196 return res
1197
1198 def _calc_cookies(self, info_dict):
5c2266df 1199 pr = sanitized_Request(info_dict['url'])
e5660ee6 1200 self.cookiejar.add_cookie_header(pr)
662435f7 1201 return pr.get_header('Cookie')
e5660ee6 1202
dd82ffea
JMF
1203 def process_video_result(self, info_dict, download=True):
1204 assert info_dict.get('_type', 'video') == 'video'
1205
bec1fad2
PH
1206 if 'id' not in info_dict:
1207 raise ExtractorError('Missing "id" field in extractor result')
1208 if 'title' not in info_dict:
1209 raise ExtractorError('Missing "title" field in extractor result')
1210
dd82ffea
JMF
1211 if 'playlist' not in info_dict:
1212 # It isn't part of a playlist
1213 info_dict['playlist'] = None
1214 info_dict['playlist_index'] = None
1215
d5519808 1216 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1217 if thumbnails is None:
1218 thumbnail = info_dict.get('thumbnail')
1219 if thumbnail:
a7a14d95 1220 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1221 if thumbnails:
be6d7229 1222 thumbnails.sort(key=lambda t: (
cfb56d1a
PH
1223 t.get('preference'), t.get('width'), t.get('height'),
1224 t.get('id'), t.get('url')))
f6c24009 1225 for i, t in enumerate(thumbnails):
9603e8a7 1226 if t.get('width') and t.get('height'):
d5519808 1227 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1228 if t.get('id') is None:
1229 t['id'] = '%d' % i
d5519808
PH
1230
1231 if thumbnails and 'thumbnail' not in info_dict:
1232 info_dict['thumbnail'] = thumbnails[-1]['url']
1233
c9ae7b95 1234 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1235 info_dict['display_id'] = info_dict['id']
1236
955c4514 1237 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1238 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1239 # see http://bugs.python.org/issue1646728)
1240 try:
1241 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1242 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1243 except (ValueError, OverflowError, OSError):
1244 pass
9d2ecdbc 1245
4bba3716
S
1246 subtitles = info_dict.get('subtitles')
1247 if subtitles:
1248 for _, subtitle in subtitles.items():
1249 for subtitle_format in subtitle:
1250 if 'ext' not in subtitle_format:
1251 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1252
a504ced0 1253 if self.params.get('listsubtitles', False):
360e1ca5
JMF
1254 if 'automatic_captions' in info_dict:
1255 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
4bba3716 1256 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1257 return
360e1ca5 1258 info_dict['requested_subtitles'] = self.process_subtitles(
4bba3716 1259 info_dict['id'], subtitles,
360e1ca5 1260 info_dict.get('automatic_captions'))
a504ced0 1261
dd82ffea
JMF
1262 # We now pick which formats have to be downloaded
1263 if info_dict.get('formats') is None:
1264 # There's only one format available
1265 formats = [info_dict]
1266 else:
1267 formats = info_dict['formats']
1268
db95dc13
PH
1269 if not formats:
1270 raise ExtractorError('No video formats found!')
1271
181c7053
S
1272 formats_dict = {}
1273
dd82ffea 1274 # We check that all the formats have the format and format_id fields
db95dc13 1275 for i, format in enumerate(formats):
bec1fad2
PH
1276 if 'url' not in format:
1277 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1278
dd82ffea 1279 if format.get('format_id') is None:
8016c922 1280 format['format_id'] = compat_str(i)
181c7053
S
1281 format_id = format['format_id']
1282 if format_id not in formats_dict:
1283 formats_dict[format_id] = []
1284 formats_dict[format_id].append(format)
1285
1286 # Make sure all formats have unique format_id
1287 for format_id, ambiguous_formats in formats_dict.items():
1288 if len(ambiguous_formats) > 1:
1289 for i, format in enumerate(ambiguous_formats):
1290 format['format_id'] = '%s-%d' % (format_id, i)
1291
1292 for i, format in enumerate(formats):
8c51aa65 1293 if format.get('format') is None:
6febd1c1 1294 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1295 id=format['format_id'],
1296 res=self.format_resolution(format),
6febd1c1 1297 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1298 )
c1002e96
PH
1299 # Automatically determine file extension if missing
1300 if 'ext' not in format:
cce929ea 1301 format['ext'] = determine_ext(format['url']).lower()
e5660ee6
JMF
1302 # Add HTTP headers, so that external programs can use them from the
1303 # json output
1304 full_format_info = info_dict.copy()
1305 full_format_info.update(format)
1306 format['http_headers'] = self._calc_headers(full_format_info)
dd82ffea 1307
4bcc7bd1 1308 # TODO Central sorting goes here
99e206d5 1309
f89197d7 1310 if formats[0] is not info_dict:
b3d9ef88
JMF
1311 # only set the 'formats' fields if the original info_dict list them
1312 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1313 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
1314 # wich can't be exported to json
1315 info_dict['formats'] = formats
cfb56d1a 1316 if self.params.get('listformats'):
bfaae0a7 1317 self.list_formats(info_dict)
1318 return
cfb56d1a
PH
1319 if self.params.get('list_thumbnails'):
1320 self.list_thumbnails(info_dict)
1321 return
bfaae0a7 1322
de3ef3ed 1323 req_format = self.params.get('format')
a9c58ad9 1324 if req_format is None:
feccf29c 1325 req_format_list = []
3749e36e 1326 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
8250c32f
JMF
1327 info_dict['extractor'] in ['youtube', 'ted'] and
1328 not info_dict.get('is_live')):
7fcb605b 1329 merger = FFmpegMergerPP(self)
97fcf1bb 1330 if merger.available and merger.can_merge():
7fcb605b 1331 req_format_list.append('bestvideo+bestaudio')
feccf29c
S
1332 req_format_list.append('best')
1333 req_format = '/'.join(req_format_list)
5acfa126
JMF
1334 format_selector = self.build_format_selector(req_format)
1335 formats_to_download = list(format_selector(formats))
dd82ffea 1336 if not formats_to_download:
6febd1c1 1337 raise ExtractorError('requested format not available',
78a3a9f8 1338 expected=True)
dd82ffea
JMF
1339
1340 if download:
1341 if len(formats_to_download) > 1:
6febd1c1 1342 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1343 for format in formats_to_download:
1344 new_info = dict(info_dict)
1345 new_info.update(format)
1346 self.process_info(new_info)
1347 # We update the info dict with the best quality format (backwards compatibility)
1348 info_dict.update(formats_to_download[-1])
1349 return info_dict
1350
98c70d6f 1351 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1352 """Select the requested subtitles and their format"""
98c70d6f
JMF
1353 available_subs = {}
1354 if normal_subtitles and self.params.get('writesubtitles'):
1355 available_subs.update(normal_subtitles)
1356 if automatic_captions and self.params.get('writeautomaticsub'):
1357 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1358 if lang not in available_subs:
1359 available_subs[lang] = cap_info
1360
4d171848
JMF
1361 if (not self.params.get('writesubtitles') and not
1362 self.params.get('writeautomaticsub') or not
1363 available_subs):
1364 return None
a504ced0
JMF
1365
1366 if self.params.get('allsubtitles', False):
1367 requested_langs = available_subs.keys()
1368 else:
1369 if self.params.get('subtitleslangs', False):
1370 requested_langs = self.params.get('subtitleslangs')
1371 elif 'en' in available_subs:
1372 requested_langs = ['en']
1373 else:
1374 requested_langs = [list(available_subs.keys())[0]]
1375
1376 formats_query = self.params.get('subtitlesformat', 'best')
1377 formats_preference = formats_query.split('/') if formats_query else []
1378 subs = {}
1379 for lang in requested_langs:
1380 formats = available_subs.get(lang)
1381 if formats is None:
1382 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1383 continue
a504ced0
JMF
1384 for ext in formats_preference:
1385 if ext == 'best':
1386 f = formats[-1]
1387 break
1388 matches = list(filter(lambda f: f['ext'] == ext, formats))
1389 if matches:
1390 f = matches[-1]
1391 break
1392 else:
1393 f = formats[-1]
1394 self.report_warning(
1395 'No subtitle format found matching "%s" for language %s, '
1396 'using %s' % (formats_query, lang, f['ext']))
1397 subs[lang] = f
1398 return subs
1399
8222d8de
JMF
1400 def process_info(self, info_dict):
1401 """Process a single resolved IE result."""
1402
1403 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1404
1405 max_downloads = self.params.get('max_downloads')
1406 if max_downloads is not None:
1407 if self._num_downloads >= int(max_downloads):
1408 raise MaxDownloadsReached()
8222d8de
JMF
1409
1410 info_dict['fulltitle'] = info_dict['title']
1411 if len(info_dict['title']) > 200:
6febd1c1 1412 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de 1413
11b85ce6 1414 if 'format' not in info_dict:
8222d8de
JMF
1415 info_dict['format'] = info_dict['ext']
1416
442c37b7 1417 reason = self._match_entry(info_dict, incomplete=False)
8222d8de 1418 if reason is not None:
6febd1c1 1419 self.to_screen('[download] ' + reason)
8222d8de
JMF
1420 return
1421
fd288278 1422 self._num_downloads += 1
8222d8de 1423
e72c7e41 1424 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1425
1426 # Forced printings
1427 if self.params.get('forcetitle', False):
0783b09b 1428 self.to_stdout(info_dict['fulltitle'])
8222d8de 1429 if self.params.get('forceid', False):
0783b09b 1430 self.to_stdout(info_dict['id'])
8222d8de 1431 if self.params.get('forceurl', False):
16ae61f6 1432 if info_dict.get('requested_formats') is not None:
1433 for f in info_dict['requested_formats']:
1434 self.to_stdout(f['url'] + f.get('play_path', ''))
1435 else:
1436 # For RTMP URLs, also include the playpath
1437 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 1438 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 1439 self.to_stdout(info_dict['thumbnail'])
216d71d0 1440 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 1441 self.to_stdout(info_dict['description'])
8222d8de 1442 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 1443 self.to_stdout(filename)
525ef922
PH
1444 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1445 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 1446 if self.params.get('forceformat', False):
0783b09b 1447 self.to_stdout(info_dict['format'])
9d153818 1448 if self.params.get('forcejson', False):
0783b09b 1449 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
1450
1451 # Do nothing else if in simulate mode
1452 if self.params.get('simulate', False):
1453 return
1454
1455 if filename is None:
1456 return
1457
1458 try:
e5a11a22 1459 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
d26e981d 1460 if dn and not os.path.exists(dn):
8222d8de
JMF
1461 os.makedirs(dn)
1462 except (OSError, IOError) as err:
7f8b2714 1463 self.report_error('unable to create directory ' + error_to_str(err))
8222d8de
JMF
1464 return
1465
1466 if self.params.get('writedescription', False):
2699da80 1467 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
7b6fefc9 1468 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1469 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1470 elif info_dict.get('description') is None:
1471 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1472 else:
1473 try:
6febd1c1 1474 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1475 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1476 descfile.write(info_dict['description'])
7b6fefc9 1477 except (OSError, IOError):
6febd1c1 1478 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1479 return
8222d8de 1480
1fb07d10 1481 if self.params.get('writeannotations', False):
98727e12 1482 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
7b6fefc9 1483 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1484 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
1485 else:
1486 try:
6febd1c1 1487 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1488 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1489 annofile.write(info_dict['annotations'])
1490 except (KeyError, TypeError):
6febd1c1 1491 self.report_warning('There are no annotations to write.')
7b6fefc9 1492 except (OSError, IOError):
6febd1c1 1493 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1494 return
1fb07d10 1495
c4a91be7 1496 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1497 self.params.get('writeautomaticsub')])
c4a91be7 1498
c84dd8a9 1499 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1500 # subtitles download errors are already managed as troubles in relevant IE
1501 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1502 subtitles = info_dict['requested_subtitles']
0f2c0d33 1503 ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1504 for sub_lang, sub_info in subtitles.items():
1505 sub_format = sub_info['ext']
1506 if sub_info.get('data') is not None:
1507 sub_data = sub_info['data']
1508 else:
1509 try:
0f2c0d33
JMF
1510 sub_data = ie._download_webpage(
1511 sub_info['url'], info_dict['id'], note=False)
1512 except ExtractorError as err:
a504ced0 1513 self.report_warning('Unable to download subtitle for "%s": %s' %
0f2c0d33 1514 (sub_lang, compat_str(err.cause)))
a504ced0 1515 continue
8222d8de 1516 try:
d4051a8e 1517 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1518 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1519 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1520 else:
6febd1c1 1521 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9 1522 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
a504ced0 1523 subfile.write(sub_data)
8222d8de 1524 except (OSError, IOError):
e4db1951 1525 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1526 return
1527
8222d8de 1528 if self.params.get('writeinfojson', False):
b29e0000 1529 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
7b6fefc9 1530 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1531 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1532 else:
6febd1c1 1533 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1534 try:
cb202fd2 1535 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 1536 except (OSError, IOError):
6febd1c1 1537 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1538 return
8222d8de 1539
ec82d85a 1540 self._write_thumbnails(info_dict, filename)
8222d8de
JMF
1541
1542 if not self.params.get('skip_download', False):
4340deca
P
1543 try:
1544 def dl(name, info):
a055469f 1545 fd = get_suitable_downloader(info, self.params)(self, self.params)
4340deca
P
1546 for ph in self._progress_hooks:
1547 fd.add_progress_hook(ph)
1548 if self.params.get('verbose'):
1549 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1550 return fd.download(name, info)
ee69b99a 1551
4340deca
P
1552 if info_dict.get('requested_formats') is not None:
1553 downloaded = []
1554 success = True
d47aeb22 1555 merger = FFmpegMergerPP(self)
f740fae2 1556 if not merger.available:
4340deca
P
1557 postprocessors = []
1558 self.report_warning('You have requested multiple '
1559 'formats but ffmpeg or avconv are not installed.'
4a5a898a 1560 ' The formats won\'t be merged.')
6350728b 1561 else:
4340deca 1562 postprocessors = [merger]
81cd954a
S
1563
1564 def compatible_formats(formats):
1565 video, audio = formats
1566 # Check extension
1567 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1568 if video_ext and audio_ext:
1569 COMPATIBLE_EXTS = (
6728187a 1570 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
81cd954a
S
1571 ('webm')
1572 )
1573 for exts in COMPATIBLE_EXTS:
1574 if video_ext in exts and audio_ext in exts:
1575 return True
1576 # TODO: Check acodec/vcodec
1577 return False
1578
38c6902b
S
1579 filename_real_ext = os.path.splitext(filename)[1][1:]
1580 filename_wo_ext = (
1581 os.path.splitext(filename)[0]
1582 if filename_real_ext == info_dict['ext']
1583 else filename)
81cd954a 1584 requested_formats = info_dict['requested_formats']
c0dea0a7 1585 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 1586 info_dict['ext'] = 'mkv'
4a5a898a
S
1587 self.report_warning(
1588 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
1589 # Ensure filename always has a correct extension for successful merge
1590 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
5b5fbc08
JMF
1591 if os.path.exists(encodeFilename(filename)):
1592 self.to_screen(
1593 '[download] %s has already been downloaded and '
1594 'merged' % filename)
1595 else:
81cd954a 1596 for f in requested_formats:
5b5fbc08
JMF
1597 new_info = dict(info_dict)
1598 new_info.update(f)
1599 fname = self.prepare_filename(new_info)
666a9a2b 1600 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
5b5fbc08
JMF
1601 downloaded.append(fname)
1602 partial_success = dl(fname, new_info)
1603 success = success and partial_success
1604 info_dict['__postprocessors'] = postprocessors
1605 info_dict['__files_to_merge'] = downloaded
4340deca
P
1606 else:
1607 # Just a single file
1608 success = dl(filename, info_dict)
1609 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1610 self.report_error('unable to download video data: %s' % str(err))
1611 return
1612 except (OSError, IOError) as err:
1613 raise UnavailableVideoError(err)
1614 except (ContentTooShortError, ) as err:
1615 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1616 return
8222d8de
JMF
1617
1618 if success:
6271f1ca 1619 # Fixup content
62cd676c
PH
1620 fixup_policy = self.params.get('fixup')
1621 if fixup_policy is None:
1622 fixup_policy = 'detect_or_warn'
1623
6271f1ca
PH
1624 stretched_ratio = info_dict.get('stretched_ratio')
1625 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
1626 if fixup_policy == 'warn':
1627 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1628 info_dict['id'], stretched_ratio))
1629 elif fixup_policy == 'detect_or_warn':
1630 stretched_pp = FFmpegFixupStretchedPP(self)
1631 if stretched_pp.available:
1632 info_dict.setdefault('__postprocessors', [])
1633 info_dict['__postprocessors'].append(stretched_pp)
1634 else:
1635 self.report_warning(
1636 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1637 info_dict['id'], stretched_ratio))
1638 else:
62cd676c
PH
1639 assert fixup_policy in ('ignore', 'never')
1640
1641 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1642 if fixup_policy == 'warn':
1643 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1644 info_dict['id']))
1645 elif fixup_policy == 'detect_or_warn':
1646 fixup_pp = FFmpegFixupM4aPP(self)
1647 if fixup_pp.available:
1648 info_dict.setdefault('__postprocessors', [])
1649 info_dict['__postprocessors'].append(fixup_pp)
1650 else:
1651 self.report_warning(
1652 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1653 info_dict['id']))
1654 else:
1655 assert fixup_policy in ('ignore', 'never')
6271f1ca 1656
8222d8de
JMF
1657 try:
1658 self.post_process(filename, info_dict)
1659 except (PostProcessingError) as err:
6febd1c1 1660 self.report_error('postprocessing: %s' % str(err))
8222d8de 1661 return
cd58dc3e 1662 self.record_download_archive(info_dict)
8222d8de
JMF
1663
1664 def download(self, url_list):
1665 """Download a given list of URLs."""
acd69589 1666 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1667 if (len(url_list) > 1 and
8fb3ac36
PH
1668 '%' not in outtmpl and
1669 self.params.get('max_downloads') != 1):
acd69589 1670 raise SameFileError(outtmpl)
8222d8de
JMF
1671
1672 for url in url_list:
1673 try:
5f6a1245 1674 # It also downloads the videos
61aa5ba3
S
1675 res = self.extract_info(
1676 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 1677 except UnavailableVideoError:
6febd1c1 1678 self.report_error('unable to download video')
8222d8de 1679 except MaxDownloadsReached:
6febd1c1 1680 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1681 raise
63e0be34
PH
1682 else:
1683 if self.params.get('dump_single_json', False):
1684 self.to_stdout(json.dumps(res))
8222d8de
JMF
1685
1686 return self._download_retcode
1687
1dcc4c0c 1688 def download_with_info_file(self, info_filename):
31bd3925
JMF
1689 with contextlib.closing(fileinput.FileInput(
1690 [info_filename], mode='r',
1691 openhook=fileinput.hook_encoded('utf-8'))) as f:
1692 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 1693 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
1694 try:
1695 self.process_ie_result(info, download=True)
1696 except DownloadError:
1697 webpage_url = info.get('webpage_url')
1698 if webpage_url is not None:
6febd1c1 1699 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1700 return self.download([webpage_url])
1701 else:
1702 raise
1703 return self._download_retcode
1dcc4c0c 1704
cb202fd2
S
1705 @staticmethod
1706 def filter_requested_info(info_dict):
1707 return dict(
1708 (k, v) for k, v in info_dict.items()
1709 if k not in ['requested_formats', 'requested_subtitles'])
1710
8222d8de
JMF
1711 def post_process(self, filename, ie_info):
1712 """Run all the postprocessors on the given file."""
1713 info = dict(ie_info)
1714 info['filepath'] = filename
6350728b
JMF
1715 pps_chain = []
1716 if ie_info.get('__postprocessors') is not None:
1717 pps_chain.extend(ie_info['__postprocessors'])
1718 pps_chain.extend(self._pps)
1719 for pp in pps_chain:
71646e46 1720 files_to_delete = []
8222d8de 1721 try:
592e97e8 1722 files_to_delete, info = pp.run(info)
8222d8de 1723 except PostProcessingError as e:
bbcbf4d4 1724 self.report_error(e.msg)
592e97e8
JMF
1725 if files_to_delete and not self.params.get('keepvideo', False):
1726 for old_filename in files_to_delete:
f3ff1a36 1727 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
1728 try:
1729 os.remove(encodeFilename(old_filename))
1730 except (IOError, OSError):
1731 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 1732
5db07df6
PH
1733 def _make_archive_id(self, info_dict):
1734 # Future-proof against any change in case
1735 # and backwards compatibility with prior versions
d31209a1 1736 extractor = info_dict.get('extractor_key')
7012b23c
PH
1737 if extractor is None:
1738 if 'id' in info_dict:
1739 extractor = info_dict.get('ie_key') # key in a playlist
1740 if extractor is None:
5db07df6 1741 return None # Incomplete video information
6febd1c1 1742 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1743
1744 def in_download_archive(self, info_dict):
1745 fn = self.params.get('download_archive')
1746 if fn is None:
1747 return False
1748
1749 vid_id = self._make_archive_id(info_dict)
1750 if vid_id is None:
7012b23c 1751 return False # Incomplete video information
5db07df6 1752
c1c9a79c
PH
1753 try:
1754 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1755 for line in archive_file:
1756 if line.strip() == vid_id:
1757 return True
1758 except IOError as ioe:
1759 if ioe.errno != errno.ENOENT:
1760 raise
1761 return False
1762
1763 def record_download_archive(self, info_dict):
1764 fn = self.params.get('download_archive')
1765 if fn is None:
1766 return
5db07df6
PH
1767 vid_id = self._make_archive_id(info_dict)
1768 assert vid_id
c1c9a79c 1769 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1770 archive_file.write(vid_id + '\n')
dd82ffea 1771
8c51aa65 1772 @staticmethod
8abeeb94 1773 def format_resolution(format, default='unknown'):
fb04e403
PH
1774 if format.get('vcodec') == 'none':
1775 return 'audio only'
f49d89ee
PH
1776 if format.get('resolution') is not None:
1777 return format['resolution']
8c51aa65
JMF
1778 if format.get('height') is not None:
1779 if format.get('width') is not None:
6febd1c1 1780 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1781 else:
6febd1c1 1782 res = '%sp' % format['height']
f49d89ee 1783 elif format.get('width') is not None:
6febd1c1 1784 res = '?x%d' % format['width']
8c51aa65 1785 else:
8abeeb94 1786 res = default
8c51aa65
JMF
1787 return res
1788
c57f7757
PH
1789 def _format_note(self, fdict):
1790 res = ''
1791 if fdict.get('ext') in ['f4f', 'f4m']:
1792 res += '(unsupported) '
1793 if fdict.get('format_note') is not None:
1794 res += fdict['format_note'] + ' '
1795 if fdict.get('tbr') is not None:
1796 res += '%4dk ' % fdict['tbr']
1797 if fdict.get('container') is not None:
1798 if res:
1799 res += ', '
1800 res += '%s container' % fdict['container']
1801 if (fdict.get('vcodec') is not None and
1802 fdict.get('vcodec') != 'none'):
1803 if res:
1804 res += ', '
1805 res += fdict['vcodec']
91c7271a 1806 if fdict.get('vbr') is not None:
c57f7757
PH
1807 res += '@'
1808 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1809 res += 'video@'
1810 if fdict.get('vbr') is not None:
1811 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1812 if fdict.get('fps') is not None:
1813 res += ', %sfps' % fdict['fps']
c57f7757
PH
1814 if fdict.get('acodec') is not None:
1815 if res:
1816 res += ', '
1817 if fdict['acodec'] == 'none':
1818 res += 'video only'
1819 else:
1820 res += '%-5s' % fdict['acodec']
1821 elif fdict.get('abr') is not None:
1822 if res:
1823 res += ', '
1824 res += 'audio'
1825 if fdict.get('abr') is not None:
1826 res += '@%3dk' % fdict['abr']
1827 if fdict.get('asr') is not None:
1828 res += ' (%5dHz)' % fdict['asr']
1829 if fdict.get('filesize') is not None:
1830 if res:
1831 res += ', '
1832 res += format_bytes(fdict['filesize'])
9732d77e
PH
1833 elif fdict.get('filesize_approx') is not None:
1834 if res:
1835 res += ', '
1836 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1837 return res
91c7271a 1838
c57f7757 1839 def list_formats(self, info_dict):
94badb25 1840 formats = info_dict.get('formats', [info_dict])
b81a359e
PH
1841 table = [
1842 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1843 for f in formats
e65566a9 1844 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 1845 if len(formats) > 1:
b81a359e 1846 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
57dd9a8f 1847
b81a359e 1848 header_line = ['format code', 'extension', 'resolution', 'note']
cfb56d1a 1849 self.to_screen(
b81a359e
PH
1850 '[info] Available formats for %s:\n%s' %
1851 (info_dict['id'], render_table(header_line, table)))
cfb56d1a
PH
1852
1853 def list_thumbnails(self, info_dict):
1854 thumbnails = info_dict.get('thumbnails')
1855 if not thumbnails:
1856 tn_url = info_dict.get('thumbnail')
1857 if tn_url:
1858 thumbnails = [{'id': '0', 'url': tn_url}]
1859 else:
1860 self.to_screen(
1861 '[info] No thumbnails present for %s' % info_dict['id'])
1862 return
1863
1864 self.to_screen(
1865 '[info] Thumbnails for %s:' % info_dict['id'])
1866 self.to_screen(render_table(
1867 ['ID', 'width', 'height', 'URL'],
1868 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 1869
360e1ca5 1870 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 1871 if not subtitles:
360e1ca5 1872 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 1873 return
a504ced0 1874 self.to_screen(
edab9dbf
JMF
1875 'Available %s for %s:' % (name, video_id))
1876 self.to_screen(render_table(
1877 ['Language', 'formats'],
1878 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1879 for lang, formats in subtitles.items()]))
a504ced0 1880
dca08720
PH
1881 def urlopen(self, req):
1882 """ Start an HTTP download """
82d8a8b6 1883 if isinstance(req, compat_basestring):
67dda517 1884 req = sanitized_Request(req)
19a41fc6 1885 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1886
1887 def print_debug_header(self):
1888 if not self.params.get('verbose'):
1889 return
62fec3b2 1890
4192b51c
PH
1891 if type('') is not compat_str:
1892 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1893 self.report_warning(
1894 'Your Python is broken! Update to a newer and supported version')
1895
c6afed48
PH
1896 stdout_encoding = getattr(
1897 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1898 encoding_str = (
734f90bb
PH
1899 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1900 locale.getpreferredencoding(),
1901 sys.getfilesystemencoding(),
c6afed48 1902 stdout_encoding,
b0472057 1903 self.get_encoding()))
4192b51c 1904 write_string(encoding_str, encoding=None)
734f90bb
PH
1905
1906 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1907 try:
1908 sp = subprocess.Popen(
1909 ['git', 'rev-parse', '--short', 'HEAD'],
1910 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1911 cwd=os.path.dirname(os.path.abspath(__file__)))
1912 out, err = sp.communicate()
1913 out = out.decode().strip()
1914 if re.match('[0-9a-f]+', out):
734f90bb 1915 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 1916 except Exception:
dca08720
PH
1917 try:
1918 sys.exc_clear()
70a1165b 1919 except Exception:
dca08720 1920 pass
d28b5171
PH
1921 self._write_string('[debug] Python version %s - %s\n' % (
1922 platform.python_version(), platform_name()))
1923
73fac4e9 1924 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 1925 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1926 exe_str = ', '.join(
1927 '%s %s' % (exe, v)
1928 for exe, v in sorted(exe_versions.items())
1929 if v
1930 )
1931 if not exe_str:
1932 exe_str = 'none'
1933 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1934
1935 proxy_map = {}
1936 for handler in self._opener.handlers:
1937 if hasattr(handler, 'proxies'):
1938 proxy_map.update(handler.proxies)
734f90bb 1939 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1940
58b1f00d
PH
1941 if self.params.get('call_home', False):
1942 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1943 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1944 latest_version = self.urlopen(
1945 'https://yt-dl.org/latest/version').read().decode('utf-8')
1946 if version_tuple(latest_version) > version_tuple(__version__):
1947 self.report_warning(
1948 'You are using an outdated version (newest version: %s)! '
1949 'See https://yt-dl.org/update if you need help updating.' %
1950 latest_version)
1951
e344693b 1952 def _setup_opener(self):
6ad14cab 1953 timeout_val = self.params.get('socket_timeout')
19a41fc6 1954 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1955
dca08720
PH
1956 opts_cookiefile = self.params.get('cookiefile')
1957 opts_proxy = self.params.get('proxy')
1958
1959 if opts_cookiefile is None:
1960 self.cookiejar = compat_cookiejar.CookieJar()
1961 else:
1962 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1963 opts_cookiefile)
1964 if os.access(opts_cookiefile, os.R_OK):
1965 self.cookiejar.load()
1966
6a3f4c3f 1967 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
1968 if opts_proxy is not None:
1969 if opts_proxy == '':
1970 proxies = {}
1971 else:
1972 proxies = {'http': opts_proxy, 'https': opts_proxy}
1973 else:
1974 proxies = compat_urllib_request.getproxies()
1975 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1976 if 'http' in proxies and 'https' not in proxies:
1977 proxies['https'] = proxies['http']
91410c9b 1978 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
1979
1980 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
1981 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1982 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
8b172c2e 1983 data_handler = compat_urllib_request_DataHandler()
dca08720 1984 opener = compat_urllib_request.build_opener(
8b172c2e 1985 proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
2461f79d 1986
dca08720
PH
1987 # Delete the default user-agent header, which would otherwise apply in
1988 # cases where our custom HTTP handler doesn't come into play
1989 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1990 opener.addheaders = []
1991 self._opener = opener
62fec3b2
PH
1992
1993 def encode(self, s):
1994 if isinstance(s, bytes):
1995 return s # Already encoded
1996
1997 try:
1998 return s.encode(self.get_encoding())
1999 except UnicodeEncodeError as err:
2000 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2001 raise
2002
2003 def get_encoding(self):
2004 encoding = self.params.get('encoding')
2005 if encoding is None:
2006 encoding = preferredencoding()
2007 return encoding
ec82d85a
PH
2008
2009 def _write_thumbnails(self, info_dict, filename):
2010 if self.params.get('writethumbnail', False):
2011 thumbnails = info_dict.get('thumbnails')
2012 if thumbnails:
2013 thumbnails = [thumbnails[-1]]
2014 elif self.params.get('write_all_thumbnails', False):
2015 thumbnails = info_dict.get('thumbnails')
2016 else:
2017 return
2018
2019 if not thumbnails:
2020 # No thumbnails present, so return immediately
2021 return
2022
2023 for t in thumbnails:
2024 thumb_ext = determine_ext(t['url'], 'jpg')
2025 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2026 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
82245a6d 2027 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
ec82d85a
PH
2028
2029 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2030 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2031 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2032 else:
2033 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2034 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2035 try:
2036 uf = self.urlopen(t['url'])
d3d89c32 2037 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a
PH
2038 shutil.copyfileobj(uf, thumbf)
2039 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2040 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2041 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2042 self.report_warning('Unable to download thumbnail "%s": %s' %
7f8b2714 2043 (t['url'], error_to_str(err)))