]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[utils] Add cookie processor for cookie correction (Closes #6769)
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
8222d8de 11import io
b82f815f 12import itertools
8694c600 13import json
62fec3b2 14import locale
083c9df9 15import operator
8222d8de 16import os
dca08720 17import platform
8222d8de
JMF
18import re
19import shutil
dca08720 20import subprocess
8222d8de
JMF
21import socket
22import sys
23import time
67134eab 24import tokenize
8222d8de
JMF
25import traceback
26
1e5b9a95
PH
27if os.name == 'nt':
28 import ctypes
29
8c25f81b 30from .compat import (
dca08720 31 compat_cookiejar,
4644ac55 32 compat_expanduser,
003c69a8 33 compat_get_terminal_size,
ce02ed60 34 compat_http_client,
4f026faf 35 compat_kwargs,
ce02ed60 36 compat_str,
67134eab 37 compat_tokenize_tokenize,
ce02ed60
PH
38 compat_urllib_error,
39 compat_urllib_request,
8c25f81b
PH
40)
41from .utils import (
ce02ed60
PH
42 ContentTooShortError,
43 date_from_str,
44 DateRange,
acd69589 45 DEFAULT_OUTTMPL,
ce02ed60
PH
46 determine_ext,
47 DownloadError,
48 encodeFilename,
49 ExtractorError,
02dbf93f 50 format_bytes,
525ef922 51 formatSeconds,
ce02ed60 52 locked_file,
dca08720 53 make_HTTPS_handler,
ce02ed60 54 MaxDownloadsReached,
b7ab0590 55 PagedList,
083c9df9 56 parse_filesize,
91410c9b 57 PerRequestProxyHandler,
ce02ed60 58 PostProcessingError,
dca08720 59 platform_name,
ce02ed60 60 preferredencoding,
cfb56d1a 61 render_table,
ce02ed60
PH
62 SameFileError,
63 sanitize_filename,
1bb5c511 64 sanitize_path,
e5660ee6 65 std_headers,
ce02ed60 66 subtitles_filename,
ce02ed60 67 UnavailableVideoError,
29eb5174 68 url_basename,
58b1f00d 69 version_tuple,
ce02ed60
PH
70 write_json_file,
71 write_string,
dca08720 72 YoutubeDLHandler,
6350728b 73 prepend_extension,
b29e0000 74 replace_extension,
7d4111ed 75 args_to_str,
05900629 76 age_restricted,
ce02ed60 77)
a0e07d31 78from .cache import Cache
023fa8c4 79from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 80from .downloader import get_suitable_downloader
4c83c967 81from .downloader.rtmp import rtmpdump_version
4f026faf 82from .postprocessor import (
62cd676c 83 FFmpegFixupM4aPP,
6271f1ca 84 FFmpegFixupStretchedPP,
4f026faf
PH
85 FFmpegMergerPP,
86 FFmpegPostProcessor,
87 get_postprocessor,
88)
dca08720 89from .version import __version__
8222d8de
JMF
90
91
92class YoutubeDL(object):
93 """YoutubeDL class.
94
95 YoutubeDL objects are the ones responsible of downloading the
96 actual video file and writing it to disk if the user has requested
97 it, among some other tasks. In most cases there should be one per
98 program. As, given a video URL, the downloader doesn't know how to
99 extract all the needed information, task that InfoExtractors do, it
100 has to pass the URL to one of them.
101
102 For this, YoutubeDL objects have a method that allows
103 InfoExtractors to be registered in a given order. When it is passed
104 a URL, the YoutubeDL object handles it to the first InfoExtractor it
105 finds that reports being able to handle it. The InfoExtractor extracts
106 all the information about the video or videos the URL refers to, and
107 YoutubeDL process the extracted information, possibly using a File
108 Downloader to download the video.
109
110 YoutubeDL objects accept a lot of parameters. In order not to saturate
111 the object constructor with arguments, it receives a dictionary of
112 options instead. These options are available through the params
113 attribute for the InfoExtractors to use. The YoutubeDL also
114 registers itself as the downloader in charge for the InfoExtractors
115 that are added to it, so this is a "mutual registration".
116
117 Available options:
118
119 username: Username for authentication purposes.
120 password: Password for authentication purposes.
180940e0 121 videopassword: Password for accessing a video.
8222d8de
JMF
122 usenetrc: Use netrc for authentication instead.
123 verbose: Print additional info to stdout.
124 quiet: Do not print messages to stdout.
ad8915b7 125 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
126 forceurl: Force printing final URL.
127 forcetitle: Force printing title.
128 forceid: Force printing ID.
129 forcethumbnail: Force printing thumbnail URL.
130 forcedescription: Force printing description.
131 forcefilename: Force printing final filename.
525ef922 132 forceduration: Force printing duration.
8694c600 133 forcejson: Force printing info_dict as JSON.
63e0be34
PH
134 dump_single_json: Force printing the info_dict of the whole playlist
135 (or video) as a single JSON line.
8222d8de 136 simulate: Do not download the video files.
d8600787 137 format: Video format code. See options.py for more information.
8222d8de
JMF
138 outtmpl: Template for output names.
139 restrictfilenames: Do not allow "&" and spaces in file names
140 ignoreerrors: Do not stop on download errors.
d22dec74 141 force_generic_extractor: Force downloader to use the generic extractor
8222d8de
JMF
142 nooverwrites: Prevent overwriting files.
143 playliststart: Playlist item to start at.
144 playlistend: Playlist item to end at.
c14e88f0 145 playlist_items: Specific indices of playlist to download.
ff815fe6 146 playlistreverse: Download playlist items in reverse order.
8222d8de
JMF
147 matchtitle: Download only matching titles.
148 rejecttitle: Reject downloads for matching titles.
8bf9319e 149 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
150 logtostderr: Log messages to stderr instead of stdout.
151 writedescription: Write the video description to a .description file
152 writeinfojson: Write the video description to a .info.json file
1fb07d10 153 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 154 writethumbnail: Write the thumbnail image to a file
ec82d85a 155 write_all_thumbnails: Write all thumbnail formats to files
8222d8de 156 writesubtitles: Write the video subtitles to a file
b004821f 157 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 158 allsubtitles: Downloads all the subtitles of the video
0b7f3118 159 (requires writesubtitles or writeautomaticsub)
8222d8de 160 listsubtitles: Lists all available subtitles for the video
a504ced0 161 subtitlesformat: The format code for subtitles
aa6a10c4 162 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
163 keepvideo: Keep the video file after post-processing
164 daterange: A DateRange object, download only if the upload_date is in the range.
165 skip_download: Skip the actual download of the video file
c35f9e72 166 cachedir: Location of the cache files in the filesystem.
a0e07d31 167 False to disable filesystem cache.
47192f92 168 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
169 age_limit: An integer representing the user's age in years.
170 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
171 min_views: An integer representing the minimum view count the video
172 must have in order to not be skipped.
173 Videos without view count information are always
174 downloaded. None for no limit.
175 max_views: An integer representing the maximum view count.
176 Videos that are more popular than that are not
177 downloaded.
178 Videos without view count information are always
179 downloaded. None for no limit.
180 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
181 Videos already present in the file are not downloaded
182 again.
dca08720 183 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 184 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
185 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
186 At the moment, this is only supported by YouTube.
a1ee09e8 187 proxy: URL of the proxy server to use
91410c9b
PH
188 cn_verification_proxy: URL of the proxy to use for IP address verification
189 on Chinese sites. (Experimental)
e344693b 190 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
191 bidi_workaround: Work around buggy terminals without bidirectional text
192 support, using fridibi
a0ddb8a2 193 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 194 include_ads: Download ads as well
04b4d394
PH
195 default_search: Prepend this string if an input url is not valid.
196 'auto' for elaborate guessing
62fec3b2 197 encoding: Use this encoding instead of the system-specified.
e8ee972c 198 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
199 Pass in 'in_playlist' to only show this behavior for
200 playlist items.
4f026faf 201 postprocessors: A list of dictionaries, each with an entry
71b640cc
PH
202 * key: The name of the postprocessor. See
203 youtube_dl/postprocessor/__init__.py for a list.
4f026faf
PH
204 as well as any further keyword arguments for the
205 postprocessor.
71b640cc
PH
206 progress_hooks: A list of functions that get called on download
207 progress, with a dictionary with the entries
5cda4eda 208 * status: One of "downloading", "error", or "finished".
ee69b99a 209 Check this first and ignore unknown values.
71b640cc 210
5cda4eda 211 If status is one of "downloading", or "finished", the
ee69b99a
PH
212 following properties may also be present:
213 * filename: The final filename (always present)
5cda4eda 214 * tmpfilename: The filename we're currently writing to
71b640cc
PH
215 * downloaded_bytes: Bytes on disk
216 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
217 * total_bytes_estimate: Guess of the eventual file size,
218 None if unavailable.
219 * elapsed: The number of seconds since download started.
71b640cc
PH
220 * eta: The estimated time in seconds, None if unknown
221 * speed: The download speed in bytes/second, None if
222 unknown
5cda4eda
PH
223 * fragment_index: The counter of the currently
224 downloaded video fragment.
225 * fragment_count: The number of fragments (= individual
226 files that will be merged)
71b640cc
PH
227
228 Progress hooks are guaranteed to be called at least once
229 (with status "finished") if the download is successful.
45598f15 230 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
231 fixup: Automatically correct known faults of the file.
232 One of:
233 - "never": do nothing
234 - "warn": only emit a warning
235 - "detect_or_warn": check whether we can do anything
62cd676c 236 about it, warn otherwise (default)
be4a824d 237 source_address: (Experimental) Client-side IP address to bind to.
8bfa7545
PH
238 call_home: Boolean, true iff we are allowed to contact the
239 youtube-dl servers for debugging.
5f0d813d 240 sleep_interval: Number of seconds to sleep before each download.
cfb56d1a
PH
241 listformats: Print an overview of available video formats and exit.
242 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
243 match_filter: A function that gets called with the info_dict of
244 every video.
245 If it returns a message, the video is ignored.
246 If it returns None, the video is downloaded.
247 match_filter_func in utils.py is one example for this.
7e5db8c9 248 no_color: Do not emit color codes in output.
71b640cc 249
85729c51
PH
250 The following options determine which downloader is picked:
251 external_downloader: Executable of the external downloader to call.
252 None or unset for standard (built-in) downloader.
253 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
fe7e0c98 254
8222d8de 255 The following parameters are not used by YoutubeDL itself, they are used by
c75f0b36 256 the downloader (see youtube_dl/downloader/common.py):
8222d8de 257 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 258 noresizebuffer, retries, continuedl, noprogress, consoletitle,
c75f0b36 259 xattr_set_filesize, external_downloader_args.
76b1bd67
JMF
260
261 The following options are used by the post processors:
262 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
263 otherwise prefer avconv.
f72b0a60
S
264 postprocessor_args: A list of additional command-line arguments for the
265 postprocessor.
8222d8de
JMF
266 """
267
268 params = None
269 _ies = []
270 _pps = []
271 _download_retcode = None
272 _num_downloads = None
273 _screen_file = None
274
3511266b 275 def __init__(self, params=None, auto_init=True):
8222d8de 276 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
277 if params is None:
278 params = {}
8222d8de 279 self._ies = []
56c73665 280 self._ies_instances = {}
8222d8de 281 self._pps = []
933605d7 282 self._progress_hooks = []
8222d8de
JMF
283 self._download_retcode = 0
284 self._num_downloads = 0
285 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 286 self._err_file = sys.stderr
4abf617b
S
287 self.params = {
288 # Default parameters
289 'nocheckcertificate': False,
290 }
291 self.params.update(params)
a0e07d31 292 self.cache = Cache(self)
34308b30 293
0783b09b 294 if params.get('bidi_workaround', False):
1c088fa8
PH
295 try:
296 import pty
297 master, slave = pty.openpty()
003c69a8 298 width = compat_get_terminal_size().columns
1c088fa8
PH
299 if width is None:
300 width_args = []
301 else:
302 width_args = ['-w', str(width)]
5d681e96 303 sp_kwargs = dict(
1c088fa8
PH
304 stdin=subprocess.PIPE,
305 stdout=slave,
306 stderr=self._err_file)
5d681e96
PH
307 try:
308 self._output_process = subprocess.Popen(
309 ['bidiv'] + width_args, **sp_kwargs
310 )
311 except OSError:
5d681e96
PH
312 self._output_process = subprocess.Popen(
313 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
314 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
315 except OSError as ose:
316 if ose.errno == 2:
6febd1c1 317 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
318 else:
319 raise
0783b09b 320
34308b30 321 if (sys.version_info >= (3,) and sys.platform != 'win32' and
8fb3ac36
PH
322 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
323 not params.get('restrictfilenames', False)):
34308b30
PH
324 # On Python 3, the Unicode filesystem API will throw errors (#1474)
325 self.report_warning(
6febd1c1 326 'Assuming --restrict-filenames since file system encoding '
1b725173 327 'cannot encode all characters. '
6febd1c1 328 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 329 self.params['restrictfilenames'] = True
34308b30 330
486dd09e
PH
331 if isinstance(params.get('outtmpl'), bytes):
332 self.report_warning(
333 'Parameter outtmpl is bytes, but should be a unicode string. '
334 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
335
dca08720
PH
336 self._setup_opener()
337
3511266b
PH
338 if auto_init:
339 self.print_debug_header()
340 self.add_default_info_extractors()
341
4f026faf
PH
342 for pp_def_raw in self.params.get('postprocessors', []):
343 pp_class = get_postprocessor(pp_def_raw['key'])
344 pp_def = dict(pp_def_raw)
345 del pp_def['key']
346 pp = pp_class(self, **compat_kwargs(pp_def))
347 self.add_post_processor(pp)
348
71b640cc
PH
349 for ph in self.params.get('progress_hooks', []):
350 self.add_progress_hook(ph)
351
7d4111ed
PH
352 def warn_if_short_id(self, argv):
353 # short YouTube ID starting with dash?
354 idxs = [
355 i for i, a in enumerate(argv)
356 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
357 if idxs:
358 correct_argv = (
359 ['youtube-dl'] +
360 [a for i, a in enumerate(argv) if i not in idxs] +
361 ['--'] + [argv[i] for i in idxs]
362 )
363 self.report_warning(
364 'Long argument string detected. '
365 'Use -- to separate parameters and URLs, like this:\n%s\n' %
366 args_to_str(correct_argv))
367
8222d8de
JMF
368 def add_info_extractor(self, ie):
369 """Add an InfoExtractor object to the end of the list."""
370 self._ies.append(ie)
56c73665 371 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
372 ie.set_downloader(self)
373
56c73665
JMF
374 def get_info_extractor(self, ie_key):
375 """
376 Get an instance of an IE with name ie_key, it will try to get one from
377 the _ies list, if there's no instance it will create a new one and add
378 it to the extractor list.
379 """
380 ie = self._ies_instances.get(ie_key)
381 if ie is None:
382 ie = get_info_extractor(ie_key)()
383 self.add_info_extractor(ie)
384 return ie
385
023fa8c4
JMF
386 def add_default_info_extractors(self):
387 """
388 Add the InfoExtractors returned by gen_extractors to the end of the list
389 """
390 for ie in gen_extractors():
391 self.add_info_extractor(ie)
392
8222d8de
JMF
393 def add_post_processor(self, pp):
394 """Add a PostProcessor object to the end of the chain."""
395 self._pps.append(pp)
396 pp.set_downloader(self)
397
933605d7
JMF
398 def add_progress_hook(self, ph):
399 """Add the progress hook (currently only for the file downloader)"""
400 self._progress_hooks.append(ph)
8ab470f1 401
1c088fa8 402 def _bidi_workaround(self, message):
5d681e96 403 if not hasattr(self, '_output_channel'):
1c088fa8
PH
404 return message
405
5d681e96 406 assert hasattr(self, '_output_process')
11b85ce6 407 assert isinstance(message, compat_str)
6febd1c1
PH
408 line_count = message.count('\n') + 1
409 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 410 self._output_process.stdin.flush()
6febd1c1 411 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 412 for _ in range(line_count))
6febd1c1 413 return res[:-len('\n')]
1c088fa8 414
8222d8de 415 def to_screen(self, message, skip_eol=False):
0783b09b
PH
416 """Print message to stdout if not in quiet mode."""
417 return self.to_stdout(message, skip_eol, check_quiet=True)
418
734f90bb 419 def _write_string(self, s, out=None):
b58ddb32 420 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 421
0783b09b 422 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 423 """Print message to stdout if not in quiet mode."""
8bf9319e 424 if self.params.get('logger'):
43afe285 425 self.params['logger'].debug(message)
0783b09b 426 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 427 message = self._bidi_workaround(message)
6febd1c1 428 terminator = ['\n', ''][skip_eol]
8222d8de 429 output = message + terminator
1c088fa8 430
734f90bb 431 self._write_string(output, self._screen_file)
8222d8de
JMF
432
433 def to_stderr(self, message):
434 """Print message to stderr."""
11b85ce6 435 assert isinstance(message, compat_str)
8bf9319e 436 if self.params.get('logger'):
43afe285
IB
437 self.params['logger'].error(message)
438 else:
1c088fa8 439 message = self._bidi_workaround(message)
6febd1c1 440 output = message + '\n'
734f90bb 441 self._write_string(output, self._err_file)
8222d8de 442
1e5b9a95
PH
443 def to_console_title(self, message):
444 if not self.params.get('consoletitle', False):
445 return
446 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
447 # c_wchar_p() might not be necessary if `message` is
448 # already of type unicode()
449 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
450 elif 'TERM' in os.environ:
734f90bb 451 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 452
bdde425c
PH
453 def save_console_title(self):
454 if not self.params.get('consoletitle', False):
455 return
456 if 'TERM' in os.environ:
efd6c574 457 # Save the title on stack
734f90bb 458 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
459
460 def restore_console_title(self):
461 if not self.params.get('consoletitle', False):
462 return
463 if 'TERM' in os.environ:
efd6c574 464 # Restore the title from stack
734f90bb 465 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
466
467 def __enter__(self):
468 self.save_console_title()
469 return self
470
471 def __exit__(self, *args):
472 self.restore_console_title()
f89197d7 473
dca08720
PH
474 if self.params.get('cookiefile') is not None:
475 self.cookiejar.save()
bdde425c 476
8222d8de
JMF
477 def trouble(self, message=None, tb=None):
478 """Determine action to take when a download problem appears.
479
480 Depending on if the downloader has been configured to ignore
481 download errors or not, this method may throw an exception or
482 not when errors are found, after printing the message.
483
484 tb, if given, is additional traceback information.
485 """
486 if message is not None:
487 self.to_stderr(message)
488 if self.params.get('verbose'):
489 if tb is None:
490 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 491 tb = ''
8222d8de 492 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 493 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
494 tb += compat_str(traceback.format_exc())
495 else:
496 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 497 tb = ''.join(tb_data)
8222d8de
JMF
498 self.to_stderr(tb)
499 if not self.params.get('ignoreerrors', False):
500 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
501 exc_info = sys.exc_info()[1].exc_info
502 else:
503 exc_info = sys.exc_info()
504 raise DownloadError(message, exc_info)
505 self._download_retcode = 1
506
507 def report_warning(self, message):
508 '''
509 Print the message to stderr, it will be prefixed with 'WARNING:'
510 If stderr is a tty file the 'WARNING:' will be colored
511 '''
6d07ce01
JMF
512 if self.params.get('logger') is not None:
513 self.params['logger'].warning(message)
8222d8de 514 else:
ad8915b7
PH
515 if self.params.get('no_warnings'):
516 return
7e5db8c9 517 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
6d07ce01
JMF
518 _msg_header = '\033[0;33mWARNING:\033[0m'
519 else:
520 _msg_header = 'WARNING:'
521 warning_message = '%s %s' % (_msg_header, message)
522 self.to_stderr(warning_message)
8222d8de
JMF
523
524 def report_error(self, message, tb=None):
525 '''
526 Do the same as trouble, but prefixes the message with 'ERROR:', colored
527 in red if stderr is a tty file.
528 '''
7e5db8c9 529 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
6febd1c1 530 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 531 else:
6febd1c1
PH
532 _msg_header = 'ERROR:'
533 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
534 self.trouble(error_message, tb)
535
8222d8de
JMF
536 def report_file_already_downloaded(self, file_name):
537 """Report file has already been fully downloaded."""
538 try:
6febd1c1 539 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 540 except UnicodeEncodeError:
6febd1c1 541 self.to_screen('[download] The file has already been downloaded')
8222d8de 542
8222d8de
JMF
543 def prepare_filename(self, info_dict):
544 """Generate the output filename."""
545 try:
546 template_dict = dict(info_dict)
547
548 template_dict['epoch'] = int(time.time())
549 autonumber_size = self.params.get('autonumber_size')
550 if autonumber_size is None:
551 autonumber_size = 5
6febd1c1 552 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 553 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 554 if template_dict.get('playlist_index') is not None:
c6b4132a 555 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
556 if template_dict.get('resolution') is None:
557 if template_dict.get('width') and template_dict.get('height'):
558 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
559 elif template_dict.get('height'):
805ef3c6 560 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 561 elif template_dict.get('width'):
805ef3c6 562 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 563
586a91b6 564 sanitize = lambda k, v: sanitize_filename(
45598aab 565 compat_str(v),
1bb5c511 566 restricted=self.params.get('restrictfilenames'),
6febd1c1 567 is_id=(k == 'id'))
586a91b6 568 template_dict = dict((k, sanitize(k, v))
45598aab
PH
569 for k, v in template_dict.items()
570 if v is not None)
6febd1c1 571 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 572
1bb5c511 573 outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
4644ac55 574 tmpl = compat_expanduser(outtmpl)
586a91b6 575 filename = tmpl % template_dict
3a0d2f52
S
576 # Temporary fix for #4787
577 # 'Treat' all problem characters by passing filename through preferredencoding
578 # to workaround encoding issues with subprocess on python2 @ Windows
579 if sys.version_info < (3, 0) and sys.platform == 'win32':
580 filename = encodeFilename(filename, True).decode(preferredencoding())
8222d8de 581 return filename
8222d8de 582 except ValueError as err:
6febd1c1 583 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
584 return None
585
442c37b7 586 def _match_entry(self, info_dict, incomplete):
8222d8de
JMF
587 """ Returns None iff the file should be downloaded """
588
6febd1c1 589 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
590 if 'title' in info_dict:
591 # This can happen when we're just evaluating the playlist
592 title = info_dict['title']
593 matchtitle = self.params.get('matchtitle', False)
594 if matchtitle:
595 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 596 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
597 rejecttitle = self.params.get('rejecttitle', False)
598 if rejecttitle:
599 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 600 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
601 date = info_dict.get('upload_date', None)
602 if date is not None:
603 dateRange = self.params.get('daterange', DateRange())
604 if date not in dateRange:
6febd1c1 605 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
606 view_count = info_dict.get('view_count', None)
607 if view_count is not None:
608 min_views = self.params.get('min_views')
609 if min_views is not None and view_count < min_views:
6febd1c1 610 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
611 max_views = self.params.get('max_views')
612 if max_views is not None and view_count > max_views:
6febd1c1 613 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629 614 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
347de493 615 return 'Skipping "%s" because it is age restricted' % video_title
c1c9a79c 616 if self.in_download_archive(info_dict):
6febd1c1 617 return '%s has already been recorded in archive' % video_title
347de493 618
442c37b7
PH
619 if not incomplete:
620 match_filter = self.params.get('match_filter')
621 if match_filter is not None:
622 ret = match_filter(info_dict)
623 if ret is not None:
624 return ret
347de493 625
8222d8de 626 return None
fe7e0c98 627
b6c45014
JMF
628 @staticmethod
629 def add_extra_info(info_dict, extra_info):
630 '''Set the keys from extra_info in info dict if they are missing'''
631 for key, value in extra_info.items():
632 info_dict.setdefault(key, value)
633
7fc3fa05 634 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 635 process=True, force_generic_extractor=False):
8222d8de
JMF
636 '''
637 Returns a list with a dictionary for each video we find.
638 If 'download', also downloads the videos.
639 extra_info is a dict containing the extra values to add to each result
613b2d9d 640 '''
fe7e0c98 641
61aa5ba3 642 if not ie_key and force_generic_extractor:
d22dec74
S
643 ie_key = 'Generic'
644
8222d8de 645 if ie_key:
56c73665 646 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
647 else:
648 ies = self._ies
649
650 for ie in ies:
651 if not ie.suitable(url):
652 continue
653
654 if not ie.working():
6febd1c1
PH
655 self.report_warning('The program functionality for this site has been marked as broken, '
656 'and will probably not work.')
8222d8de
JMF
657
658 try:
659 ie_result = ie.extract(url)
5f6a1245 660 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
661 break
662 if isinstance(ie_result, list):
663 # Backwards compatibility: old IE result format
8222d8de
JMF
664 ie_result = {
665 '_type': 'compat_list',
666 'entries': ie_result,
667 }
ea38e55f 668 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
669 if process:
670 return self.process_ie_result(ie_result, download, extra_info)
671 else:
672 return ie_result
5f6a1245 673 except ExtractorError as de: # An error we somewhat expected
8222d8de
JMF
674 self.report_error(compat_str(de), de.format_traceback())
675 break
d3e5bbf4
PH
676 except MaxDownloadsReached:
677 raise
8222d8de
JMF
678 except Exception as e:
679 if self.params.get('ignoreerrors', False):
680 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
681 break
682 else:
683 raise
684 else:
1a489545 685 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 686
ea38e55f
PH
687 def add_default_extra_info(self, ie_result, ie, url):
688 self.add_extra_info(ie_result, {
689 'extractor': ie.IE_NAME,
690 'webpage_url': url,
691 'webpage_url_basename': url_basename(url),
692 'extractor_key': ie.ie_key(),
693 })
694
8222d8de
JMF
695 def process_ie_result(self, ie_result, download=True, extra_info={}):
696 """
697 Take the result of the ie(may be modified) and resolve all unresolved
698 references (URLs, playlist items).
699
700 It will also download the videos if 'download'.
701 Returns the resolved ie_result.
702 """
703
e8ee972c
PH
704 result_type = ie_result.get('_type', 'video')
705
057a5206
PH
706 if result_type in ('url', 'url_transparent'):
707 extract_flat = self.params.get('extract_flat', False)
708 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
709 extract_flat is True):
057a5206
PH
710 if self.params.get('forcejson', False):
711 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
712 return ie_result
713
8222d8de 714 if result_type == 'video':
b6c45014 715 self.add_extra_info(ie_result, extra_info)
feee2ecf 716 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
717 elif result_type == 'url':
718 # We have to add extra_info to the results because it may be
719 # contained in a playlist
720 return self.extract_info(ie_result['url'],
721 download,
722 ie_key=ie_result.get('ie_key'),
723 extra_info=extra_info)
7fc3fa05
PH
724 elif result_type == 'url_transparent':
725 # Use the information from the embedding page
726 info = self.extract_info(
727 ie_result['url'], ie_key=ie_result.get('ie_key'),
728 extra_info=extra_info, download=False, process=False)
729
412c617d
PH
730 force_properties = dict(
731 (k, v) for k, v in ie_result.items() if v is not None)
732 for f in ('_type', 'url'):
733 if f in force_properties:
734 del force_properties[f]
735 new_result = info.copy()
736 new_result.update(force_properties)
7fc3fa05
PH
737
738 assert new_result.get('_type') != 'url_transparent'
7fc3fa05
PH
739
740 return self.process_ie_result(
741 new_result, download=download, extra_info=extra_info)
42e12102 742 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de
JMF
743 # We process each entry in the playlist
744 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 745 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
746
747 playlist_results = []
748
8222d8de 749 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
750 playlistend = self.params.get('playlistend', None)
751 # For backwards compatibility, interpret -1 as whole list
8222d8de 752 if playlistend == -1:
a19fd00c 753 playlistend = None
8222d8de 754
c14e88f0
PH
755 playlistitems_str = self.params.get('playlist_items', None)
756 playlistitems = None
757 if playlistitems_str is not None:
758 def iter_playlistitems(format):
759 for string_segment in format.split(','):
760 if '-' in string_segment:
761 start, end = string_segment.split('-')
762 for item in range(int(start), int(end) + 1):
763 yield int(item)
764 else:
765 yield int(string_segment)
766 playlistitems = iter_playlistitems(playlistitems_str)
767
b82f815f
PH
768 ie_entries = ie_result['entries']
769 if isinstance(ie_entries, list):
770 n_all_entries = len(ie_entries)
c14e88f0 771 if playlistitems:
3884dcf3
JMF
772 entries = [
773 ie_entries[i - 1] for i in playlistitems
774 if -n_all_entries <= i - 1 < n_all_entries]
c14e88f0
PH
775 else:
776 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
777 n_entries = len(entries)
778 self.to_screen(
779 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
780 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 781 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
782 if playlistitems:
783 entries = []
784 for item in playlistitems:
785 entries.extend(ie_entries.getslice(
786 item - 1, item
787 ))
788 else:
789 entries = ie_entries.getslice(
790 playliststart, playlistend)
b7ab0590
PH
791 n_entries = len(entries)
792 self.to_screen(
793 "[%s] playlist %s: Downloading %d videos" %
794 (ie_result['extractor'], playlist, n_entries))
b82f815f 795 else: # iterable
c14e88f0
PH
796 if playlistitems:
797 entry_list = list(ie_entries)
798 entries = [entry_list[i - 1] for i in playlistitems]
799 else:
800 entries = list(itertools.islice(
801 ie_entries, playliststart, playlistend))
b82f815f
PH
802 n_entries = len(entries)
803 self.to_screen(
804 "[%s] playlist %s: Downloading %d videos" %
805 (ie_result['extractor'], playlist, n_entries))
8222d8de 806
ff815fe6
MS
807 if self.params.get('playlistreverse', False):
808 entries = entries[::-1]
809
fe7e0c98 810 for i, entry in enumerate(entries, 1):
734ea11e 811 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
8222d8de 812 extra = {
c6b4132a 813 'n_entries': n_entries,
fe7e0c98 814 'playlist': playlist,
a1cf99d0
PH
815 'playlist_id': ie_result.get('id'),
816 'playlist_title': ie_result.get('title'),
fe7e0c98 817 'playlist_index': i + playliststart,
b6c45014 818 'extractor': ie_result['extractor'],
9103bbc5 819 'webpage_url': ie_result['webpage_url'],
29eb5174 820 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 821 'extractor_key': ie_result['extractor_key'],
fe7e0c98 822 }
7012b23c 823
442c37b7 824 reason = self._match_entry(entry, incomplete=True)
7012b23c 825 if reason is not None:
6febd1c1 826 self.to_screen('[download] ' + reason)
7012b23c
PH
827 continue
828
8222d8de
JMF
829 entry_result = self.process_ie_result(entry,
830 download=download,
831 extra_info=extra)
832 playlist_results.append(entry_result)
833 ie_result['entries'] = playlist_results
834 return ie_result
835 elif result_type == 'compat_list':
c9bf4114
PH
836 self.report_warning(
837 'Extractor %s returned a compat_list result. '
838 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 839
8222d8de 840 def _fixup(r):
9e1a5b84
JW
841 self.add_extra_info(
842 r,
9103bbc5
JMF
843 {
844 'extractor': ie_result['extractor'],
845 'webpage_url': ie_result['webpage_url'],
29eb5174 846 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 847 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
848 }
849 )
8222d8de
JMF
850 return r
851 ie_result['entries'] = [
b6c45014 852 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
853 for r in ie_result['entries']
854 ]
855 return ie_result
856 else:
857 raise Exception('Invalid result type: %s' % result_type)
858
67134eab
JMF
859 def _build_format_filter(self, filter_spec):
860 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
861
862 OPERATORS = {
863 '<': operator.lt,
864 '<=': operator.le,
865 '>': operator.gt,
866 '>=': operator.ge,
867 '=': operator.eq,
868 '!=': operator.ne,
869 }
67134eab 870 operator_rex = re.compile(r'''(?x)\s*
2ec19e95 871 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
083c9df9
PH
872 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
873 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 874 $
083c9df9 875 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 876 m = operator_rex.search(filter_spec)
9ddb6925
S
877 if m:
878 try:
879 comparison_value = int(m.group('value'))
880 except ValueError:
881 comparison_value = parse_filesize(m.group('value'))
882 if comparison_value is None:
883 comparison_value = parse_filesize(m.group('value') + 'B')
884 if comparison_value is None:
885 raise ValueError(
886 'Invalid value %r in format specification %r' % (
67134eab 887 m.group('value'), filter_spec))
9ddb6925
S
888 op = OPERATORS[m.group('op')]
889
083c9df9 890 if not m:
9ddb6925
S
891 STR_OPERATORS = {
892 '=': operator.eq,
893 '!=': operator.ne,
894 }
67134eab 895 str_operator_rex = re.compile(r'''(?x)
9ddb6925
S
896 \s*(?P<key>ext|acodec|vcodec|container|protocol)
897 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
898 \s*(?P<value>[a-zA-Z0-9_-]+)
67134eab 899 \s*$
9ddb6925 900 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 901 m = str_operator_rex.search(filter_spec)
9ddb6925
S
902 if m:
903 comparison_value = m.group('value')
904 op = STR_OPERATORS[m.group('op')]
083c9df9 905
9ddb6925 906 if not m:
67134eab 907 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
908
909 def _filter(f):
910 actual_value = f.get(m.group('key'))
911 if actual_value is None:
912 return m.group('none_inclusive')
913 return op(actual_value, comparison_value)
67134eab
JMF
914 return _filter
915
916 def build_format_selector(self, format_spec):
917 def syntax_error(note, start):
918 message = (
919 'Invalid format specification: '
920 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
921 return SyntaxError(message)
922
923 PICKFIRST = 'PICKFIRST'
924 MERGE = 'MERGE'
925 SINGLE = 'SINGLE'
0130afb7 926 GROUP = 'GROUP'
67134eab
JMF
927 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
928
929 def _parse_filter(tokens):
930 filter_parts = []
931 for type, string, start, _, _ in tokens:
932 if type == tokenize.OP and string == ']':
933 return ''.join(filter_parts)
934 else:
935 filter_parts.append(string)
936
232541df
JMF
937 def _remove_unused_ops(tokens):
938 # Remove operators that we don't use and join them with the sourrounding strings
939 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
940 ALLOWED_OPS = ('/', '+', ',', '(', ')')
941 last_string, last_start, last_end, last_line = None, None, None, None
942 for type, string, start, end, line in tokens:
943 if type == tokenize.OP and string == '[':
944 if last_string:
945 yield tokenize.NAME, last_string, last_start, last_end, last_line
946 last_string = None
947 yield type, string, start, end, line
948 # everything inside brackets will be handled by _parse_filter
949 for type, string, start, end, line in tokens:
950 yield type, string, start, end, line
951 if type == tokenize.OP and string == ']':
952 break
953 elif type == tokenize.OP and string in ALLOWED_OPS:
954 if last_string:
955 yield tokenize.NAME, last_string, last_start, last_end, last_line
956 last_string = None
957 yield type, string, start, end, line
958 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
959 if not last_string:
960 last_string = string
961 last_start = start
962 last_end = end
963 else:
964 last_string += string
965 if last_string:
966 yield tokenize.NAME, last_string, last_start, last_end, last_line
967
cf2ac6df 968 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
969 selectors = []
970 current_selector = None
971 for type, string, start, _, _ in tokens:
972 # ENCODING is only defined in python 3.x
973 if type == getattr(tokenize, 'ENCODING', None):
974 continue
975 elif type in [tokenize.NAME, tokenize.NUMBER]:
976 current_selector = FormatSelector(SINGLE, string, [])
977 elif type == tokenize.OP:
cf2ac6df
JMF
978 if string == ')':
979 if not inside_group:
980 # ')' will be handled by the parentheses group
981 tokens.restore_last_token()
67134eab 982 break
cf2ac6df 983 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
984 tokens.restore_last_token()
985 break
cf2ac6df
JMF
986 elif inside_choice and string == ',':
987 tokens.restore_last_token()
988 break
989 elif string == ',':
0a31a350
JMF
990 if not current_selector:
991 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
992 selectors.append(current_selector)
993 current_selector = None
994 elif string == '/':
d96d604e
JMF
995 if not current_selector:
996 raise syntax_error('"/" must follow a format selector', start)
67134eab 997 first_choice = current_selector
cf2ac6df 998 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 999 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1000 elif string == '[':
1001 if not current_selector:
1002 current_selector = FormatSelector(SINGLE, 'best', [])
1003 format_filter = _parse_filter(tokens)
1004 current_selector.filters.append(format_filter)
0130afb7
JMF
1005 elif string == '(':
1006 if current_selector:
1007 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1008 group = _parse_format_selection(tokens, inside_group=True)
1009 current_selector = FormatSelector(GROUP, group, [])
67134eab
JMF
1010 elif string == '+':
1011 video_selector = current_selector
cf2ac6df 1012 audio_selector = _parse_format_selection(tokens, inside_merge=True)
0a31a350
JMF
1013 if not video_selector or not audio_selector:
1014 raise syntax_error('"+" must be between two format selectors', start)
cf2ac6df 1015 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
67134eab
JMF
1016 else:
1017 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1018 elif type == tokenize.ENDMARKER:
1019 break
1020 if current_selector:
1021 selectors.append(current_selector)
1022 return selectors
1023
1024 def _build_selector_function(selector):
1025 if isinstance(selector, list):
1026 fs = [_build_selector_function(s) for s in selector]
1027
1028 def selector_function(formats):
1029 for f in fs:
1030 for format in f(formats):
1031 yield format
1032 return selector_function
0130afb7
JMF
1033 elif selector.type == GROUP:
1034 selector_function = _build_selector_function(selector.selector)
67134eab
JMF
1035 elif selector.type == PICKFIRST:
1036 fs = [_build_selector_function(s) for s in selector.selector]
1037
1038 def selector_function(formats):
1039 for f in fs:
1040 picked_formats = list(f(formats))
1041 if picked_formats:
1042 return picked_formats
1043 return []
1044 elif selector.type == SINGLE:
1045 format_spec = selector.selector
1046
1047 def selector_function(formats):
bb8e5536
JMF
1048 formats = list(formats)
1049 if not formats:
1050 return
5acfa126
JMF
1051 if format_spec == 'all':
1052 for f in formats:
1053 yield f
1054 elif format_spec in ['best', 'worst', None]:
67134eab
JMF
1055 format_idx = 0 if format_spec == 'worst' else -1
1056 audiovideo_formats = [
1057 f for f in formats
1058 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1059 if audiovideo_formats:
1060 yield audiovideo_formats[format_idx]
1061 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1062 elif (all(f.get('acodec') != 'none' for f in formats) or
1063 all(f.get('vcodec') != 'none' for f in formats)):
1064 yield formats[format_idx]
1065 elif format_spec == 'bestaudio':
1066 audio_formats = [
1067 f for f in formats
1068 if f.get('vcodec') == 'none']
1069 if audio_formats:
1070 yield audio_formats[-1]
1071 elif format_spec == 'worstaudio':
1072 audio_formats = [
1073 f for f in formats
1074 if f.get('vcodec') == 'none']
1075 if audio_formats:
1076 yield audio_formats[0]
1077 elif format_spec == 'bestvideo':
1078 video_formats = [
1079 f for f in formats
1080 if f.get('acodec') == 'none']
1081 if video_formats:
1082 yield video_formats[-1]
1083 elif format_spec == 'worstvideo':
1084 video_formats = [
1085 f for f in formats
1086 if f.get('acodec') == 'none']
1087 if video_formats:
1088 yield video_formats[0]
1089 else:
1090 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1091 if format_spec in extensions:
1092 filter_f = lambda f: f['ext'] == format_spec
1093 else:
1094 filter_f = lambda f: f['format_id'] == format_spec
1095 matches = list(filter(filter_f, formats))
1096 if matches:
1097 yield matches[-1]
1098 elif selector.type == MERGE:
1099 def _merge(formats_info):
1100 format_1, format_2 = [f['format_id'] for f in formats_info]
1101 # The first format must contain the video and the
1102 # second the audio
1103 if formats_info[0].get('vcodec') == 'none':
1104 self.report_error('The first format must '
1105 'contain the video, try using '
1106 '"-f %s+%s"' % (format_2, format_1))
1107 return
1108 output_ext = (
1109 formats_info[0]['ext']
1110 if self.params.get('merge_output_format') is None
1111 else self.params['merge_output_format'])
1112 return {
1113 'requested_formats': formats_info,
1114 'format': '%s+%s' % (formats_info[0].get('format'),
1115 formats_info[1].get('format')),
1116 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1117 formats_info[1].get('format_id')),
1118 'width': formats_info[0].get('width'),
1119 'height': formats_info[0].get('height'),
1120 'resolution': formats_info[0].get('resolution'),
1121 'fps': formats_info[0].get('fps'),
1122 'vcodec': formats_info[0].get('vcodec'),
1123 'vbr': formats_info[0].get('vbr'),
1124 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1125 'acodec': formats_info[1].get('acodec'),
1126 'abr': formats_info[1].get('abr'),
1127 'ext': output_ext,
1128 }
1129 video_selector, audio_selector = map(_build_selector_function, selector.selector)
083c9df9 1130
67134eab
JMF
1131 def selector_function(formats):
1132 formats = list(formats)
1133 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1134 yield _merge(pair)
083c9df9 1135
67134eab 1136 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1137
67134eab
JMF
1138 def final_selector(formats):
1139 for _filter in filters:
1140 formats = list(filter(_filter, formats))
1141 return selector_function(formats)
1142 return final_selector
083c9df9 1143
67134eab 1144 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1145 try:
232541df 1146 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1147 except tokenize.TokenError:
1148 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1149
1150 class TokenIterator(object):
1151 def __init__(self, tokens):
1152 self.tokens = tokens
1153 self.counter = 0
1154
1155 def __iter__(self):
1156 return self
1157
1158 def __next__(self):
1159 if self.counter >= len(self.tokens):
1160 raise StopIteration()
1161 value = self.tokens[self.counter]
1162 self.counter += 1
1163 return value
1164
1165 next = __next__
1166
1167 def restore_last_token(self):
1168 self.counter -= 1
1169
1170 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1171 return _build_selector_function(parsed_selector)
a9c58ad9 1172
e5660ee6
JMF
1173 def _calc_headers(self, info_dict):
1174 res = std_headers.copy()
1175
1176 add_headers = info_dict.get('http_headers')
1177 if add_headers:
1178 res.update(add_headers)
1179
1180 cookies = self._calc_cookies(info_dict)
1181 if cookies:
1182 res['Cookie'] = cookies
1183
1184 return res
1185
1186 def _calc_cookies(self, info_dict):
662435f7 1187 pr = compat_urllib_request.Request(info_dict['url'])
e5660ee6 1188 self.cookiejar.add_cookie_header(pr)
662435f7 1189 return pr.get_header('Cookie')
e5660ee6 1190
dd82ffea
JMF
1191 def process_video_result(self, info_dict, download=True):
1192 assert info_dict.get('_type', 'video') == 'video'
1193
bec1fad2
PH
1194 if 'id' not in info_dict:
1195 raise ExtractorError('Missing "id" field in extractor result')
1196 if 'title' not in info_dict:
1197 raise ExtractorError('Missing "title" field in extractor result')
1198
dd82ffea
JMF
1199 if 'playlist' not in info_dict:
1200 # It isn't part of a playlist
1201 info_dict['playlist'] = None
1202 info_dict['playlist_index'] = None
1203
d5519808 1204 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1205 if thumbnails is None:
1206 thumbnail = info_dict.get('thumbnail')
1207 if thumbnail:
a7a14d95 1208 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1209 if thumbnails:
be6d7229 1210 thumbnails.sort(key=lambda t: (
cfb56d1a
PH
1211 t.get('preference'), t.get('width'), t.get('height'),
1212 t.get('id'), t.get('url')))
f6c24009 1213 for i, t in enumerate(thumbnails):
9603e8a7 1214 if t.get('width') and t.get('height'):
d5519808 1215 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1216 if t.get('id') is None:
1217 t['id'] = '%d' % i
d5519808
PH
1218
1219 if thumbnails and 'thumbnail' not in info_dict:
1220 info_dict['thumbnail'] = thumbnails[-1]['url']
1221
c9ae7b95 1222 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1223 info_dict['display_id'] = info_dict['id']
1224
955c4514 1225 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1226 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1227 # see http://bugs.python.org/issue1646728)
1228 try:
1229 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1230 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1231 except (ValueError, OverflowError, OSError):
1232 pass
9d2ecdbc 1233
a504ced0 1234 if self.params.get('listsubtitles', False):
360e1ca5
JMF
1235 if 'automatic_captions' in info_dict:
1236 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1237 self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
a504ced0 1238 return
360e1ca5
JMF
1239 info_dict['requested_subtitles'] = self.process_subtitles(
1240 info_dict['id'], info_dict.get('subtitles'),
1241 info_dict.get('automatic_captions'))
a504ced0 1242
dd82ffea
JMF
1243 # We now pick which formats have to be downloaded
1244 if info_dict.get('formats') is None:
1245 # There's only one format available
1246 formats = [info_dict]
1247 else:
1248 formats = info_dict['formats']
1249
db95dc13
PH
1250 if not formats:
1251 raise ExtractorError('No video formats found!')
1252
181c7053
S
1253 formats_dict = {}
1254
dd82ffea 1255 # We check that all the formats have the format and format_id fields
db95dc13 1256 for i, format in enumerate(formats):
bec1fad2
PH
1257 if 'url' not in format:
1258 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1259
dd82ffea 1260 if format.get('format_id') is None:
8016c922 1261 format['format_id'] = compat_str(i)
181c7053
S
1262 format_id = format['format_id']
1263 if format_id not in formats_dict:
1264 formats_dict[format_id] = []
1265 formats_dict[format_id].append(format)
1266
1267 # Make sure all formats have unique format_id
1268 for format_id, ambiguous_formats in formats_dict.items():
1269 if len(ambiguous_formats) > 1:
1270 for i, format in enumerate(ambiguous_formats):
1271 format['format_id'] = '%s-%d' % (format_id, i)
1272
1273 for i, format in enumerate(formats):
8c51aa65 1274 if format.get('format') is None:
6febd1c1 1275 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1276 id=format['format_id'],
1277 res=self.format_resolution(format),
6febd1c1 1278 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1279 )
c1002e96
PH
1280 # Automatically determine file extension if missing
1281 if 'ext' not in format:
cce929ea 1282 format['ext'] = determine_ext(format['url']).lower()
e5660ee6
JMF
1283 # Add HTTP headers, so that external programs can use them from the
1284 # json output
1285 full_format_info = info_dict.copy()
1286 full_format_info.update(format)
1287 format['http_headers'] = self._calc_headers(full_format_info)
dd82ffea 1288
4bcc7bd1 1289 # TODO Central sorting goes here
99e206d5 1290
f89197d7 1291 if formats[0] is not info_dict:
b3d9ef88
JMF
1292 # only set the 'formats' fields if the original info_dict list them
1293 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1294 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
1295 # wich can't be exported to json
1296 info_dict['formats'] = formats
cfb56d1a 1297 if self.params.get('listformats'):
bfaae0a7 1298 self.list_formats(info_dict)
1299 return
cfb56d1a
PH
1300 if self.params.get('list_thumbnails'):
1301 self.list_thumbnails(info_dict)
1302 return
bfaae0a7 1303
de3ef3ed 1304 req_format = self.params.get('format')
a9c58ad9 1305 if req_format is None:
feccf29c 1306 req_format_list = []
3749e36e 1307 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
8250c32f
JMF
1308 info_dict['extractor'] in ['youtube', 'ted'] and
1309 not info_dict.get('is_live')):
7fcb605b 1310 merger = FFmpegMergerPP(self)
97fcf1bb 1311 if merger.available and merger.can_merge():
7fcb605b 1312 req_format_list.append('bestvideo+bestaudio')
feccf29c
S
1313 req_format_list.append('best')
1314 req_format = '/'.join(req_format_list)
5acfa126
JMF
1315 format_selector = self.build_format_selector(req_format)
1316 formats_to_download = list(format_selector(formats))
dd82ffea 1317 if not formats_to_download:
6febd1c1 1318 raise ExtractorError('requested format not available',
78a3a9f8 1319 expected=True)
dd82ffea
JMF
1320
1321 if download:
1322 if len(formats_to_download) > 1:
6febd1c1 1323 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1324 for format in formats_to_download:
1325 new_info = dict(info_dict)
1326 new_info.update(format)
1327 self.process_info(new_info)
1328 # We update the info dict with the best quality format (backwards compatibility)
1329 info_dict.update(formats_to_download[-1])
1330 return info_dict
1331
98c70d6f 1332 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1333 """Select the requested subtitles and their format"""
98c70d6f
JMF
1334 available_subs = {}
1335 if normal_subtitles and self.params.get('writesubtitles'):
1336 available_subs.update(normal_subtitles)
1337 if automatic_captions and self.params.get('writeautomaticsub'):
1338 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1339 if lang not in available_subs:
1340 available_subs[lang] = cap_info
1341
4d171848
JMF
1342 if (not self.params.get('writesubtitles') and not
1343 self.params.get('writeautomaticsub') or not
1344 available_subs):
1345 return None
a504ced0
JMF
1346
1347 if self.params.get('allsubtitles', False):
1348 requested_langs = available_subs.keys()
1349 else:
1350 if self.params.get('subtitleslangs', False):
1351 requested_langs = self.params.get('subtitleslangs')
1352 elif 'en' in available_subs:
1353 requested_langs = ['en']
1354 else:
1355 requested_langs = [list(available_subs.keys())[0]]
1356
1357 formats_query = self.params.get('subtitlesformat', 'best')
1358 formats_preference = formats_query.split('/') if formats_query else []
1359 subs = {}
1360 for lang in requested_langs:
1361 formats = available_subs.get(lang)
1362 if formats is None:
1363 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1364 continue
a504ced0
JMF
1365 for ext in formats_preference:
1366 if ext == 'best':
1367 f = formats[-1]
1368 break
1369 matches = list(filter(lambda f: f['ext'] == ext, formats))
1370 if matches:
1371 f = matches[-1]
1372 break
1373 else:
1374 f = formats[-1]
1375 self.report_warning(
1376 'No subtitle format found matching "%s" for language %s, '
1377 'using %s' % (formats_query, lang, f['ext']))
1378 subs[lang] = f
1379 return subs
1380
8222d8de
JMF
1381 def process_info(self, info_dict):
1382 """Process a single resolved IE result."""
1383
1384 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1385
1386 max_downloads = self.params.get('max_downloads')
1387 if max_downloads is not None:
1388 if self._num_downloads >= int(max_downloads):
1389 raise MaxDownloadsReached()
8222d8de
JMF
1390
1391 info_dict['fulltitle'] = info_dict['title']
1392 if len(info_dict['title']) > 200:
6febd1c1 1393 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de 1394
11b85ce6 1395 if 'format' not in info_dict:
8222d8de
JMF
1396 info_dict['format'] = info_dict['ext']
1397
442c37b7 1398 reason = self._match_entry(info_dict, incomplete=False)
8222d8de 1399 if reason is not None:
6febd1c1 1400 self.to_screen('[download] ' + reason)
8222d8de
JMF
1401 return
1402
fd288278 1403 self._num_downloads += 1
8222d8de 1404
e72c7e41 1405 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1406
1407 # Forced printings
1408 if self.params.get('forcetitle', False):
0783b09b 1409 self.to_stdout(info_dict['fulltitle'])
8222d8de 1410 if self.params.get('forceid', False):
0783b09b 1411 self.to_stdout(info_dict['id'])
8222d8de 1412 if self.params.get('forceurl', False):
16ae61f6 1413 if info_dict.get('requested_formats') is not None:
1414 for f in info_dict['requested_formats']:
1415 self.to_stdout(f['url'] + f.get('play_path', ''))
1416 else:
1417 # For RTMP URLs, also include the playpath
1418 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 1419 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 1420 self.to_stdout(info_dict['thumbnail'])
216d71d0 1421 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 1422 self.to_stdout(info_dict['description'])
8222d8de 1423 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 1424 self.to_stdout(filename)
525ef922
PH
1425 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1426 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 1427 if self.params.get('forceformat', False):
0783b09b 1428 self.to_stdout(info_dict['format'])
9d153818 1429 if self.params.get('forcejson', False):
0783b09b 1430 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
1431
1432 # Do nothing else if in simulate mode
1433 if self.params.get('simulate', False):
1434 return
1435
1436 if filename is None:
1437 return
1438
1439 try:
e5a11a22 1440 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
d26e981d 1441 if dn and not os.path.exists(dn):
8222d8de
JMF
1442 os.makedirs(dn)
1443 except (OSError, IOError) as err:
6febd1c1 1444 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
1445 return
1446
1447 if self.params.get('writedescription', False):
2699da80 1448 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
7b6fefc9 1449 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1450 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1451 elif info_dict.get('description') is None:
1452 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1453 else:
1454 try:
6febd1c1 1455 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1456 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1457 descfile.write(info_dict['description'])
7b6fefc9 1458 except (OSError, IOError):
6febd1c1 1459 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1460 return
8222d8de 1461
1fb07d10 1462 if self.params.get('writeannotations', False):
98727e12 1463 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
7b6fefc9 1464 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1465 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
1466 else:
1467 try:
6febd1c1 1468 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1469 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1470 annofile.write(info_dict['annotations'])
1471 except (KeyError, TypeError):
6febd1c1 1472 self.report_warning('There are no annotations to write.')
7b6fefc9 1473 except (OSError, IOError):
6febd1c1 1474 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1475 return
1fb07d10 1476
c4a91be7 1477 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1478 self.params.get('writeautomaticsub')])
c4a91be7 1479
c84dd8a9 1480 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1481 # subtitles download errors are already managed as troubles in relevant IE
1482 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1483 subtitles = info_dict['requested_subtitles']
0f2c0d33 1484 ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1485 for sub_lang, sub_info in subtitles.items():
1486 sub_format = sub_info['ext']
1487 if sub_info.get('data') is not None:
1488 sub_data = sub_info['data']
1489 else:
1490 try:
0f2c0d33
JMF
1491 sub_data = ie._download_webpage(
1492 sub_info['url'], info_dict['id'], note=False)
1493 except ExtractorError as err:
a504ced0 1494 self.report_warning('Unable to download subtitle for "%s": %s' %
0f2c0d33 1495 (sub_lang, compat_str(err.cause)))
a504ced0 1496 continue
8222d8de 1497 try:
d4051a8e 1498 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1499 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1500 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1501 else:
6febd1c1 1502 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9 1503 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
a504ced0 1504 subfile.write(sub_data)
8222d8de 1505 except (OSError, IOError):
e4db1951 1506 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1507 return
1508
8222d8de 1509 if self.params.get('writeinfojson', False):
b29e0000 1510 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
7b6fefc9 1511 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1512 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1513 else:
6febd1c1 1514 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1515 try:
cb202fd2 1516 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 1517 except (OSError, IOError):
6febd1c1 1518 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1519 return
8222d8de 1520
ec82d85a 1521 self._write_thumbnails(info_dict, filename)
8222d8de
JMF
1522
1523 if not self.params.get('skip_download', False):
4340deca
P
1524 try:
1525 def dl(name, info):
a055469f 1526 fd = get_suitable_downloader(info, self.params)(self, self.params)
4340deca
P
1527 for ph in self._progress_hooks:
1528 fd.add_progress_hook(ph)
1529 if self.params.get('verbose'):
1530 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1531 return fd.download(name, info)
ee69b99a 1532
4340deca
P
1533 if info_dict.get('requested_formats') is not None:
1534 downloaded = []
1535 success = True
d47aeb22 1536 merger = FFmpegMergerPP(self)
f740fae2 1537 if not merger.available:
4340deca
P
1538 postprocessors = []
1539 self.report_warning('You have requested multiple '
1540 'formats but ffmpeg or avconv are not installed.'
4a5a898a 1541 ' The formats won\'t be merged.')
6350728b 1542 else:
4340deca 1543 postprocessors = [merger]
81cd954a
S
1544
1545 def compatible_formats(formats):
1546 video, audio = formats
1547 # Check extension
1548 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1549 if video_ext and audio_ext:
1550 COMPATIBLE_EXTS = (
6728187a 1551 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
81cd954a
S
1552 ('webm')
1553 )
1554 for exts in COMPATIBLE_EXTS:
1555 if video_ext in exts and audio_ext in exts:
1556 return True
1557 # TODO: Check acodec/vcodec
1558 return False
1559
38c6902b
S
1560 filename_real_ext = os.path.splitext(filename)[1][1:]
1561 filename_wo_ext = (
1562 os.path.splitext(filename)[0]
1563 if filename_real_ext == info_dict['ext']
1564 else filename)
81cd954a 1565 requested_formats = info_dict['requested_formats']
c0dea0a7 1566 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 1567 info_dict['ext'] = 'mkv'
4a5a898a
S
1568 self.report_warning(
1569 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
1570 # Ensure filename always has a correct extension for successful merge
1571 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
5b5fbc08
JMF
1572 if os.path.exists(encodeFilename(filename)):
1573 self.to_screen(
1574 '[download] %s has already been downloaded and '
1575 'merged' % filename)
1576 else:
81cd954a 1577 for f in requested_formats:
5b5fbc08
JMF
1578 new_info = dict(info_dict)
1579 new_info.update(f)
1580 fname = self.prepare_filename(new_info)
666a9a2b 1581 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
5b5fbc08
JMF
1582 downloaded.append(fname)
1583 partial_success = dl(fname, new_info)
1584 success = success and partial_success
1585 info_dict['__postprocessors'] = postprocessors
1586 info_dict['__files_to_merge'] = downloaded
4340deca
P
1587 else:
1588 # Just a single file
1589 success = dl(filename, info_dict)
1590 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1591 self.report_error('unable to download video data: %s' % str(err))
1592 return
1593 except (OSError, IOError) as err:
1594 raise UnavailableVideoError(err)
1595 except (ContentTooShortError, ) as err:
1596 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1597 return
8222d8de
JMF
1598
1599 if success:
6271f1ca 1600 # Fixup content
62cd676c
PH
1601 fixup_policy = self.params.get('fixup')
1602 if fixup_policy is None:
1603 fixup_policy = 'detect_or_warn'
1604
6271f1ca
PH
1605 stretched_ratio = info_dict.get('stretched_ratio')
1606 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
1607 if fixup_policy == 'warn':
1608 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1609 info_dict['id'], stretched_ratio))
1610 elif fixup_policy == 'detect_or_warn':
1611 stretched_pp = FFmpegFixupStretchedPP(self)
1612 if stretched_pp.available:
1613 info_dict.setdefault('__postprocessors', [])
1614 info_dict['__postprocessors'].append(stretched_pp)
1615 else:
1616 self.report_warning(
1617 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1618 info_dict['id'], stretched_ratio))
1619 else:
62cd676c
PH
1620 assert fixup_policy in ('ignore', 'never')
1621
1622 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1623 if fixup_policy == 'warn':
1624 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1625 info_dict['id']))
1626 elif fixup_policy == 'detect_or_warn':
1627 fixup_pp = FFmpegFixupM4aPP(self)
1628 if fixup_pp.available:
1629 info_dict.setdefault('__postprocessors', [])
1630 info_dict['__postprocessors'].append(fixup_pp)
1631 else:
1632 self.report_warning(
1633 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1634 info_dict['id']))
1635 else:
1636 assert fixup_policy in ('ignore', 'never')
6271f1ca 1637
8222d8de
JMF
1638 try:
1639 self.post_process(filename, info_dict)
1640 except (PostProcessingError) as err:
6febd1c1 1641 self.report_error('postprocessing: %s' % str(err))
8222d8de 1642 return
cd58dc3e 1643 self.record_download_archive(info_dict)
8222d8de
JMF
1644
1645 def download(self, url_list):
1646 """Download a given list of URLs."""
acd69589 1647 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1648 if (len(url_list) > 1 and
8fb3ac36
PH
1649 '%' not in outtmpl and
1650 self.params.get('max_downloads') != 1):
acd69589 1651 raise SameFileError(outtmpl)
8222d8de
JMF
1652
1653 for url in url_list:
1654 try:
5f6a1245 1655 # It also downloads the videos
61aa5ba3
S
1656 res = self.extract_info(
1657 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 1658 except UnavailableVideoError:
6febd1c1 1659 self.report_error('unable to download video')
8222d8de 1660 except MaxDownloadsReached:
6febd1c1 1661 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1662 raise
63e0be34
PH
1663 else:
1664 if self.params.get('dump_single_json', False):
1665 self.to_stdout(json.dumps(res))
8222d8de
JMF
1666
1667 return self._download_retcode
1668
1dcc4c0c 1669 def download_with_info_file(self, info_filename):
31bd3925
JMF
1670 with contextlib.closing(fileinput.FileInput(
1671 [info_filename], mode='r',
1672 openhook=fileinput.hook_encoded('utf-8'))) as f:
1673 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 1674 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
1675 try:
1676 self.process_ie_result(info, download=True)
1677 except DownloadError:
1678 webpage_url = info.get('webpage_url')
1679 if webpage_url is not None:
6febd1c1 1680 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1681 return self.download([webpage_url])
1682 else:
1683 raise
1684 return self._download_retcode
1dcc4c0c 1685
cb202fd2
S
1686 @staticmethod
1687 def filter_requested_info(info_dict):
1688 return dict(
1689 (k, v) for k, v in info_dict.items()
1690 if k not in ['requested_formats', 'requested_subtitles'])
1691
8222d8de
JMF
1692 def post_process(self, filename, ie_info):
1693 """Run all the postprocessors on the given file."""
1694 info = dict(ie_info)
1695 info['filepath'] = filename
6350728b
JMF
1696 pps_chain = []
1697 if ie_info.get('__postprocessors') is not None:
1698 pps_chain.extend(ie_info['__postprocessors'])
1699 pps_chain.extend(self._pps)
1700 for pp in pps_chain:
71646e46 1701 files_to_delete = []
8222d8de 1702 try:
592e97e8 1703 files_to_delete, info = pp.run(info)
8222d8de 1704 except PostProcessingError as e:
bbcbf4d4 1705 self.report_error(e.msg)
592e97e8
JMF
1706 if files_to_delete and not self.params.get('keepvideo', False):
1707 for old_filename in files_to_delete:
f3ff1a36 1708 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
1709 try:
1710 os.remove(encodeFilename(old_filename))
1711 except (IOError, OSError):
1712 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 1713
5db07df6
PH
1714 def _make_archive_id(self, info_dict):
1715 # Future-proof against any change in case
1716 # and backwards compatibility with prior versions
d31209a1 1717 extractor = info_dict.get('extractor_key')
7012b23c
PH
1718 if extractor is None:
1719 if 'id' in info_dict:
1720 extractor = info_dict.get('ie_key') # key in a playlist
1721 if extractor is None:
5db07df6 1722 return None # Incomplete video information
6febd1c1 1723 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1724
1725 def in_download_archive(self, info_dict):
1726 fn = self.params.get('download_archive')
1727 if fn is None:
1728 return False
1729
1730 vid_id = self._make_archive_id(info_dict)
1731 if vid_id is None:
7012b23c 1732 return False # Incomplete video information
5db07df6 1733
c1c9a79c
PH
1734 try:
1735 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1736 for line in archive_file:
1737 if line.strip() == vid_id:
1738 return True
1739 except IOError as ioe:
1740 if ioe.errno != errno.ENOENT:
1741 raise
1742 return False
1743
1744 def record_download_archive(self, info_dict):
1745 fn = self.params.get('download_archive')
1746 if fn is None:
1747 return
5db07df6
PH
1748 vid_id = self._make_archive_id(info_dict)
1749 assert vid_id
c1c9a79c 1750 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1751 archive_file.write(vid_id + '\n')
dd82ffea 1752
8c51aa65 1753 @staticmethod
8abeeb94 1754 def format_resolution(format, default='unknown'):
fb04e403
PH
1755 if format.get('vcodec') == 'none':
1756 return 'audio only'
f49d89ee
PH
1757 if format.get('resolution') is not None:
1758 return format['resolution']
8c51aa65
JMF
1759 if format.get('height') is not None:
1760 if format.get('width') is not None:
6febd1c1 1761 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1762 else:
6febd1c1 1763 res = '%sp' % format['height']
f49d89ee 1764 elif format.get('width') is not None:
6febd1c1 1765 res = '?x%d' % format['width']
8c51aa65 1766 else:
8abeeb94 1767 res = default
8c51aa65
JMF
1768 return res
1769
c57f7757
PH
1770 def _format_note(self, fdict):
1771 res = ''
1772 if fdict.get('ext') in ['f4f', 'f4m']:
1773 res += '(unsupported) '
1774 if fdict.get('format_note') is not None:
1775 res += fdict['format_note'] + ' '
1776 if fdict.get('tbr') is not None:
1777 res += '%4dk ' % fdict['tbr']
1778 if fdict.get('container') is not None:
1779 if res:
1780 res += ', '
1781 res += '%s container' % fdict['container']
1782 if (fdict.get('vcodec') is not None and
1783 fdict.get('vcodec') != 'none'):
1784 if res:
1785 res += ', '
1786 res += fdict['vcodec']
91c7271a 1787 if fdict.get('vbr') is not None:
c57f7757
PH
1788 res += '@'
1789 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1790 res += 'video@'
1791 if fdict.get('vbr') is not None:
1792 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1793 if fdict.get('fps') is not None:
1794 res += ', %sfps' % fdict['fps']
c57f7757
PH
1795 if fdict.get('acodec') is not None:
1796 if res:
1797 res += ', '
1798 if fdict['acodec'] == 'none':
1799 res += 'video only'
1800 else:
1801 res += '%-5s' % fdict['acodec']
1802 elif fdict.get('abr') is not None:
1803 if res:
1804 res += ', '
1805 res += 'audio'
1806 if fdict.get('abr') is not None:
1807 res += '@%3dk' % fdict['abr']
1808 if fdict.get('asr') is not None:
1809 res += ' (%5dHz)' % fdict['asr']
1810 if fdict.get('filesize') is not None:
1811 if res:
1812 res += ', '
1813 res += format_bytes(fdict['filesize'])
9732d77e
PH
1814 elif fdict.get('filesize_approx') is not None:
1815 if res:
1816 res += ', '
1817 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1818 return res
91c7271a 1819
c57f7757 1820 def list_formats(self, info_dict):
94badb25 1821 formats = info_dict.get('formats', [info_dict])
b81a359e
PH
1822 table = [
1823 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1824 for f in formats
e65566a9 1825 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 1826 if len(formats) > 1:
b81a359e 1827 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
57dd9a8f 1828
b81a359e 1829 header_line = ['format code', 'extension', 'resolution', 'note']
cfb56d1a 1830 self.to_screen(
b81a359e
PH
1831 '[info] Available formats for %s:\n%s' %
1832 (info_dict['id'], render_table(header_line, table)))
cfb56d1a
PH
1833
1834 def list_thumbnails(self, info_dict):
1835 thumbnails = info_dict.get('thumbnails')
1836 if not thumbnails:
1837 tn_url = info_dict.get('thumbnail')
1838 if tn_url:
1839 thumbnails = [{'id': '0', 'url': tn_url}]
1840 else:
1841 self.to_screen(
1842 '[info] No thumbnails present for %s' % info_dict['id'])
1843 return
1844
1845 self.to_screen(
1846 '[info] Thumbnails for %s:' % info_dict['id'])
1847 self.to_screen(render_table(
1848 ['ID', 'width', 'height', 'URL'],
1849 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 1850
360e1ca5 1851 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 1852 if not subtitles:
360e1ca5 1853 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 1854 return
a504ced0 1855 self.to_screen(
edab9dbf
JMF
1856 'Available %s for %s:' % (name, video_id))
1857 self.to_screen(render_table(
1858 ['Language', 'formats'],
1859 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1860 for lang, formats in subtitles.items()]))
a504ced0 1861
dca08720
PH
1862 def urlopen(self, req):
1863 """ Start an HTTP download """
19a41fc6 1864 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1865
1866 def print_debug_header(self):
1867 if not self.params.get('verbose'):
1868 return
62fec3b2 1869
4192b51c
PH
1870 if type('') is not compat_str:
1871 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1872 self.report_warning(
1873 'Your Python is broken! Update to a newer and supported version')
1874
c6afed48
PH
1875 stdout_encoding = getattr(
1876 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1877 encoding_str = (
734f90bb
PH
1878 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1879 locale.getpreferredencoding(),
1880 sys.getfilesystemencoding(),
c6afed48 1881 stdout_encoding,
b0472057 1882 self.get_encoding()))
4192b51c 1883 write_string(encoding_str, encoding=None)
734f90bb
PH
1884
1885 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1886 try:
1887 sp = subprocess.Popen(
1888 ['git', 'rev-parse', '--short', 'HEAD'],
1889 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1890 cwd=os.path.dirname(os.path.abspath(__file__)))
1891 out, err = sp.communicate()
1892 out = out.decode().strip()
1893 if re.match('[0-9a-f]+', out):
734f90bb 1894 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 1895 except Exception:
dca08720
PH
1896 try:
1897 sys.exc_clear()
70a1165b 1898 except Exception:
dca08720 1899 pass
d28b5171
PH
1900 self._write_string('[debug] Python version %s - %s\n' % (
1901 platform.python_version(), platform_name()))
1902
73fac4e9 1903 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 1904 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1905 exe_str = ', '.join(
1906 '%s %s' % (exe, v)
1907 for exe, v in sorted(exe_versions.items())
1908 if v
1909 )
1910 if not exe_str:
1911 exe_str = 'none'
1912 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1913
1914 proxy_map = {}
1915 for handler in self._opener.handlers:
1916 if hasattr(handler, 'proxies'):
1917 proxy_map.update(handler.proxies)
734f90bb 1918 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1919
58b1f00d
PH
1920 if self.params.get('call_home', False):
1921 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1922 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1923 latest_version = self.urlopen(
1924 'https://yt-dl.org/latest/version').read().decode('utf-8')
1925 if version_tuple(latest_version) > version_tuple(__version__):
1926 self.report_warning(
1927 'You are using an outdated version (newest version: %s)! '
1928 'See https://yt-dl.org/update if you need help updating.' %
1929 latest_version)
1930
e344693b 1931 def _setup_opener(self):
6ad14cab 1932 timeout_val = self.params.get('socket_timeout')
19a41fc6 1933 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1934
dca08720
PH
1935 opts_cookiefile = self.params.get('cookiefile')
1936 opts_proxy = self.params.get('proxy')
1937
1938 if opts_cookiefile is None:
1939 self.cookiejar = compat_cookiejar.CookieJar()
1940 else:
1941 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1942 opts_cookiefile)
1943 if os.access(opts_cookiefile, os.R_OK):
1944 self.cookiejar.load()
1945
1946 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1947 self.cookiejar)
1948 if opts_proxy is not None:
1949 if opts_proxy == '':
1950 proxies = {}
1951 else:
1952 proxies = {'http': opts_proxy, 'https': opts_proxy}
1953 else:
1954 proxies = compat_urllib_request.getproxies()
1955 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1956 if 'http' in proxies and 'https' not in proxies:
1957 proxies['https'] = proxies['http']
91410c9b 1958 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
1959
1960 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
1961 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1962 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
dca08720 1963 opener = compat_urllib_request.build_opener(
2461f79d
PH
1964 proxy_handler, https_handler, cookie_processor, ydlh)
1965
dca08720
PH
1966 # Delete the default user-agent header, which would otherwise apply in
1967 # cases where our custom HTTP handler doesn't come into play
1968 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1969 opener.addheaders = []
1970 self._opener = opener
62fec3b2
PH
1971
1972 def encode(self, s):
1973 if isinstance(s, bytes):
1974 return s # Already encoded
1975
1976 try:
1977 return s.encode(self.get_encoding())
1978 except UnicodeEncodeError as err:
1979 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1980 raise
1981
1982 def get_encoding(self):
1983 encoding = self.params.get('encoding')
1984 if encoding is None:
1985 encoding = preferredencoding()
1986 return encoding
ec82d85a
PH
1987
1988 def _write_thumbnails(self, info_dict, filename):
1989 if self.params.get('writethumbnail', False):
1990 thumbnails = info_dict.get('thumbnails')
1991 if thumbnails:
1992 thumbnails = [thumbnails[-1]]
1993 elif self.params.get('write_all_thumbnails', False):
1994 thumbnails = info_dict.get('thumbnails')
1995 else:
1996 return
1997
1998 if not thumbnails:
1999 # No thumbnails present, so return immediately
2000 return
2001
2002 for t in thumbnails:
2003 thumb_ext = determine_ext(t['url'], 'jpg')
2004 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2005 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
82245a6d 2006 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
ec82d85a
PH
2007
2008 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2009 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2010 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2011 else:
2012 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2013 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2014 try:
2015 uf = self.urlopen(t['url'])
d3d89c32 2016 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a
PH
2017 shutil.copyfileobj(uf, thumbf)
2018 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2019 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2020 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2021 self.report_warning('Unable to download thumbnail "%s": %s' %
2022 (t['url'], compat_str(err)))