]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[youtube] Set 'is_live'
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
8222d8de 11import io
b82f815f 12import itertools
8694c600 13import json
62fec3b2 14import locale
083c9df9 15import operator
8222d8de 16import os
dca08720 17import platform
8222d8de
JMF
18import re
19import shutil
dca08720 20import subprocess
8222d8de
JMF
21import socket
22import sys
23import time
24import traceback
25
1e5b9a95
PH
26if os.name == 'nt':
27 import ctypes
28
8c25f81b 29from .compat import (
8f9312c3 30 compat_basestring,
dca08720 31 compat_cookiejar,
4644ac55 32 compat_expanduser,
003c69a8 33 compat_get_terminal_size,
ce02ed60 34 compat_http_client,
4f026faf 35 compat_kwargs,
ce02ed60
PH
36 compat_str,
37 compat_urllib_error,
38 compat_urllib_request,
8c25f81b
PH
39)
40from .utils import (
d05cfe06 41 escape_url,
ce02ed60
PH
42 ContentTooShortError,
43 date_from_str,
44 DateRange,
acd69589 45 DEFAULT_OUTTMPL,
ce02ed60
PH
46 determine_ext,
47 DownloadError,
48 encodeFilename,
49 ExtractorError,
02dbf93f 50 format_bytes,
525ef922 51 formatSeconds,
931bc3c3 52 HEADRequest,
ce02ed60 53 locked_file,
dca08720 54 make_HTTPS_handler,
ce02ed60 55 MaxDownloadsReached,
b7ab0590 56 PagedList,
083c9df9 57 parse_filesize,
91410c9b 58 PerRequestProxyHandler,
ce02ed60 59 PostProcessingError,
dca08720 60 platform_name,
ce02ed60 61 preferredencoding,
cfb56d1a 62 render_table,
ce02ed60
PH
63 SameFileError,
64 sanitize_filename,
1bb5c511 65 sanitize_path,
e5660ee6 66 std_headers,
ce02ed60 67 subtitles_filename,
ce02ed60 68 UnavailableVideoError,
29eb5174 69 url_basename,
58b1f00d 70 version_tuple,
ce02ed60
PH
71 write_json_file,
72 write_string,
dca08720 73 YoutubeDLHandler,
6350728b 74 prepend_extension,
b29e0000 75 replace_extension,
7d4111ed 76 args_to_str,
05900629 77 age_restricted,
ce02ed60 78)
a0e07d31 79from .cache import Cache
023fa8c4 80from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 81from .downloader import get_suitable_downloader
4c83c967 82from .downloader.rtmp import rtmpdump_version
4f026faf 83from .postprocessor import (
62cd676c 84 FFmpegFixupM4aPP,
6271f1ca 85 FFmpegFixupStretchedPP,
4f026faf
PH
86 FFmpegMergerPP,
87 FFmpegPostProcessor,
88 get_postprocessor,
89)
dca08720 90from .version import __version__
8222d8de
JMF
91
92
93class YoutubeDL(object):
94 """YoutubeDL class.
95
96 YoutubeDL objects are the ones responsible of downloading the
97 actual video file and writing it to disk if the user has requested
98 it, among some other tasks. In most cases there should be one per
99 program. As, given a video URL, the downloader doesn't know how to
100 extract all the needed information, task that InfoExtractors do, it
101 has to pass the URL to one of them.
102
103 For this, YoutubeDL objects have a method that allows
104 InfoExtractors to be registered in a given order. When it is passed
105 a URL, the YoutubeDL object handles it to the first InfoExtractor it
106 finds that reports being able to handle it. The InfoExtractor extracts
107 all the information about the video or videos the URL refers to, and
108 YoutubeDL process the extracted information, possibly using a File
109 Downloader to download the video.
110
111 YoutubeDL objects accept a lot of parameters. In order not to saturate
112 the object constructor with arguments, it receives a dictionary of
113 options instead. These options are available through the params
114 attribute for the InfoExtractors to use. The YoutubeDL also
115 registers itself as the downloader in charge for the InfoExtractors
116 that are added to it, so this is a "mutual registration".
117
118 Available options:
119
120 username: Username for authentication purposes.
121 password: Password for authentication purposes.
180940e0 122 videopassword: Password for accessing a video.
8222d8de
JMF
123 usenetrc: Use netrc for authentication instead.
124 verbose: Print additional info to stdout.
125 quiet: Do not print messages to stdout.
ad8915b7 126 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
127 forceurl: Force printing final URL.
128 forcetitle: Force printing title.
129 forceid: Force printing ID.
130 forcethumbnail: Force printing thumbnail URL.
131 forcedescription: Force printing description.
132 forcefilename: Force printing final filename.
525ef922 133 forceduration: Force printing duration.
8694c600 134 forcejson: Force printing info_dict as JSON.
63e0be34
PH
135 dump_single_json: Force printing the info_dict of the whole playlist
136 (or video) as a single JSON line.
8222d8de 137 simulate: Do not download the video files.
d8600787 138 format: Video format code. See options.py for more information.
8222d8de
JMF
139 outtmpl: Template for output names.
140 restrictfilenames: Do not allow "&" and spaces in file names
141 ignoreerrors: Do not stop on download errors.
d22dec74 142 force_generic_extractor: Force downloader to use the generic extractor
8222d8de
JMF
143 nooverwrites: Prevent overwriting files.
144 playliststart: Playlist item to start at.
145 playlistend: Playlist item to end at.
c14e88f0 146 playlist_items: Specific indices of playlist to download.
ff815fe6 147 playlistreverse: Download playlist items in reverse order.
8222d8de
JMF
148 matchtitle: Download only matching titles.
149 rejecttitle: Reject downloads for matching titles.
8bf9319e 150 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
151 logtostderr: Log messages to stderr instead of stdout.
152 writedescription: Write the video description to a .description file
153 writeinfojson: Write the video description to a .info.json file
1fb07d10 154 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 155 writethumbnail: Write the thumbnail image to a file
ec82d85a 156 write_all_thumbnails: Write all thumbnail formats to files
8222d8de 157 writesubtitles: Write the video subtitles to a file
b004821f 158 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 159 allsubtitles: Downloads all the subtitles of the video
0b7f3118 160 (requires writesubtitles or writeautomaticsub)
8222d8de 161 listsubtitles: Lists all available subtitles for the video
a504ced0 162 subtitlesformat: The format code for subtitles
aa6a10c4 163 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
164 keepvideo: Keep the video file after post-processing
165 daterange: A DateRange object, download only if the upload_date is in the range.
166 skip_download: Skip the actual download of the video file
c35f9e72 167 cachedir: Location of the cache files in the filesystem.
a0e07d31 168 False to disable filesystem cache.
47192f92 169 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
170 age_limit: An integer representing the user's age in years.
171 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
172 min_views: An integer representing the minimum view count the video
173 must have in order to not be skipped.
174 Videos without view count information are always
175 downloaded. None for no limit.
176 max_views: An integer representing the maximum view count.
177 Videos that are more popular than that are not
178 downloaded.
179 Videos without view count information are always
180 downloaded. None for no limit.
181 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
182 Videos already present in the file are not downloaded
183 again.
dca08720 184 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 185 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
186 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
187 At the moment, this is only supported by YouTube.
a1ee09e8 188 proxy: URL of the proxy server to use
91410c9b
PH
189 cn_verification_proxy: URL of the proxy to use for IP address verification
190 on Chinese sites. (Experimental)
e344693b 191 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
192 bidi_workaround: Work around buggy terminals without bidirectional text
193 support, using fridibi
a0ddb8a2 194 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 195 include_ads: Download ads as well
04b4d394
PH
196 default_search: Prepend this string if an input url is not valid.
197 'auto' for elaborate guessing
62fec3b2 198 encoding: Use this encoding instead of the system-specified.
e8ee972c 199 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
200 Pass in 'in_playlist' to only show this behavior for
201 playlist items.
4f026faf 202 postprocessors: A list of dictionaries, each with an entry
71b640cc
PH
203 * key: The name of the postprocessor. See
204 youtube_dl/postprocessor/__init__.py for a list.
4f026faf
PH
205 as well as any further keyword arguments for the
206 postprocessor.
71b640cc
PH
207 progress_hooks: A list of functions that get called on download
208 progress, with a dictionary with the entries
5cda4eda 209 * status: One of "downloading", "error", or "finished".
ee69b99a 210 Check this first and ignore unknown values.
71b640cc 211
5cda4eda 212 If status is one of "downloading", or "finished", the
ee69b99a
PH
213 following properties may also be present:
214 * filename: The final filename (always present)
5cda4eda 215 * tmpfilename: The filename we're currently writing to
71b640cc
PH
216 * downloaded_bytes: Bytes on disk
217 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
218 * total_bytes_estimate: Guess of the eventual file size,
219 None if unavailable.
220 * elapsed: The number of seconds since download started.
71b640cc
PH
221 * eta: The estimated time in seconds, None if unknown
222 * speed: The download speed in bytes/second, None if
223 unknown
5cda4eda
PH
224 * fragment_index: The counter of the currently
225 downloaded video fragment.
226 * fragment_count: The number of fragments (= individual
227 files that will be merged)
71b640cc
PH
228
229 Progress hooks are guaranteed to be called at least once
230 (with status "finished") if the download is successful.
45598f15 231 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
232 fixup: Automatically correct known faults of the file.
233 One of:
234 - "never": do nothing
235 - "warn": only emit a warning
236 - "detect_or_warn": check whether we can do anything
62cd676c 237 about it, warn otherwise (default)
be4a824d 238 source_address: (Experimental) Client-side IP address to bind to.
8bfa7545
PH
239 call_home: Boolean, true iff we are allowed to contact the
240 youtube-dl servers for debugging.
5f0d813d 241 sleep_interval: Number of seconds to sleep before each download.
cfb56d1a
PH
242 listformats: Print an overview of available video formats and exit.
243 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
244 match_filter: A function that gets called with the info_dict of
245 every video.
246 If it returns a message, the video is ignored.
247 If it returns None, the video is downloaded.
248 match_filter_func in utils.py is one example for this.
7e5db8c9 249 no_color: Do not emit color codes in output.
71b640cc 250
85729c51
PH
251 The following options determine which downloader is picked:
252 external_downloader: Executable of the external downloader to call.
253 None or unset for standard (built-in) downloader.
254 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
fe7e0c98 255
8222d8de 256 The following parameters are not used by YoutubeDL itself, they are used by
c75f0b36 257 the downloader (see youtube_dl/downloader/common.py):
8222d8de 258 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 259 noresizebuffer, retries, continuedl, noprogress, consoletitle,
c75f0b36 260 xattr_set_filesize, external_downloader_args.
76b1bd67
JMF
261
262 The following options are used by the post processors:
263 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
264 otherwise prefer avconv.
f72b0a60
S
265 postprocessor_args: A list of additional command-line arguments for the
266 postprocessor.
8222d8de
JMF
267 """
268
269 params = None
270 _ies = []
271 _pps = []
272 _download_retcode = None
273 _num_downloads = None
274 _screen_file = None
275
3511266b 276 def __init__(self, params=None, auto_init=True):
8222d8de 277 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
278 if params is None:
279 params = {}
8222d8de 280 self._ies = []
56c73665 281 self._ies_instances = {}
8222d8de 282 self._pps = []
933605d7 283 self._progress_hooks = []
8222d8de
JMF
284 self._download_retcode = 0
285 self._num_downloads = 0
286 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 287 self._err_file = sys.stderr
e9f9a10f 288 self.params = params
a0e07d31 289 self.cache = Cache(self)
34308b30 290
0783b09b 291 if params.get('bidi_workaround', False):
1c088fa8
PH
292 try:
293 import pty
294 master, slave = pty.openpty()
003c69a8 295 width = compat_get_terminal_size().columns
1c088fa8
PH
296 if width is None:
297 width_args = []
298 else:
299 width_args = ['-w', str(width)]
5d681e96 300 sp_kwargs = dict(
1c088fa8
PH
301 stdin=subprocess.PIPE,
302 stdout=slave,
303 stderr=self._err_file)
5d681e96
PH
304 try:
305 self._output_process = subprocess.Popen(
306 ['bidiv'] + width_args, **sp_kwargs
307 )
308 except OSError:
5d681e96
PH
309 self._output_process = subprocess.Popen(
310 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
311 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
312 except OSError as ose:
313 if ose.errno == 2:
6febd1c1 314 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
315 else:
316 raise
0783b09b 317
34308b30 318 if (sys.version_info >= (3,) and sys.platform != 'win32' and
8fb3ac36
PH
319 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
320 not params.get('restrictfilenames', False)):
34308b30
PH
321 # On Python 3, the Unicode filesystem API will throw errors (#1474)
322 self.report_warning(
6febd1c1 323 'Assuming --restrict-filenames since file system encoding '
1b725173 324 'cannot encode all characters. '
6febd1c1 325 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 326 self.params['restrictfilenames'] = True
34308b30 327
486dd09e
PH
328 if isinstance(params.get('outtmpl'), bytes):
329 self.report_warning(
330 'Parameter outtmpl is bytes, but should be a unicode string. '
331 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
332
dca08720
PH
333 self._setup_opener()
334
3511266b
PH
335 if auto_init:
336 self.print_debug_header()
337 self.add_default_info_extractors()
338
4f026faf
PH
339 for pp_def_raw in self.params.get('postprocessors', []):
340 pp_class = get_postprocessor(pp_def_raw['key'])
341 pp_def = dict(pp_def_raw)
342 del pp_def['key']
343 pp = pp_class(self, **compat_kwargs(pp_def))
344 self.add_post_processor(pp)
345
71b640cc
PH
346 for ph in self.params.get('progress_hooks', []):
347 self.add_progress_hook(ph)
348
7d4111ed
PH
349 def warn_if_short_id(self, argv):
350 # short YouTube ID starting with dash?
351 idxs = [
352 i for i, a in enumerate(argv)
353 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
354 if idxs:
355 correct_argv = (
356 ['youtube-dl'] +
357 [a for i, a in enumerate(argv) if i not in idxs] +
358 ['--'] + [argv[i] for i in idxs]
359 )
360 self.report_warning(
361 'Long argument string detected. '
362 'Use -- to separate parameters and URLs, like this:\n%s\n' %
363 args_to_str(correct_argv))
364
8222d8de
JMF
365 def add_info_extractor(self, ie):
366 """Add an InfoExtractor object to the end of the list."""
367 self._ies.append(ie)
56c73665 368 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
369 ie.set_downloader(self)
370
56c73665
JMF
371 def get_info_extractor(self, ie_key):
372 """
373 Get an instance of an IE with name ie_key, it will try to get one from
374 the _ies list, if there's no instance it will create a new one and add
375 it to the extractor list.
376 """
377 ie = self._ies_instances.get(ie_key)
378 if ie is None:
379 ie = get_info_extractor(ie_key)()
380 self.add_info_extractor(ie)
381 return ie
382
023fa8c4
JMF
383 def add_default_info_extractors(self):
384 """
385 Add the InfoExtractors returned by gen_extractors to the end of the list
386 """
387 for ie in gen_extractors():
388 self.add_info_extractor(ie)
389
8222d8de
JMF
390 def add_post_processor(self, pp):
391 """Add a PostProcessor object to the end of the chain."""
392 self._pps.append(pp)
393 pp.set_downloader(self)
394
933605d7
JMF
395 def add_progress_hook(self, ph):
396 """Add the progress hook (currently only for the file downloader)"""
397 self._progress_hooks.append(ph)
8ab470f1 398
1c088fa8 399 def _bidi_workaround(self, message):
5d681e96 400 if not hasattr(self, '_output_channel'):
1c088fa8
PH
401 return message
402
5d681e96 403 assert hasattr(self, '_output_process')
11b85ce6 404 assert isinstance(message, compat_str)
6febd1c1
PH
405 line_count = message.count('\n') + 1
406 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 407 self._output_process.stdin.flush()
6febd1c1 408 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 409 for _ in range(line_count))
6febd1c1 410 return res[:-len('\n')]
1c088fa8 411
8222d8de 412 def to_screen(self, message, skip_eol=False):
0783b09b
PH
413 """Print message to stdout if not in quiet mode."""
414 return self.to_stdout(message, skip_eol, check_quiet=True)
415
734f90bb 416 def _write_string(self, s, out=None):
b58ddb32 417 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 418
0783b09b 419 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 420 """Print message to stdout if not in quiet mode."""
8bf9319e 421 if self.params.get('logger'):
43afe285 422 self.params['logger'].debug(message)
0783b09b 423 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 424 message = self._bidi_workaround(message)
6febd1c1 425 terminator = ['\n', ''][skip_eol]
8222d8de 426 output = message + terminator
1c088fa8 427
734f90bb 428 self._write_string(output, self._screen_file)
8222d8de
JMF
429
430 def to_stderr(self, message):
431 """Print message to stderr."""
11b85ce6 432 assert isinstance(message, compat_str)
8bf9319e 433 if self.params.get('logger'):
43afe285
IB
434 self.params['logger'].error(message)
435 else:
1c088fa8 436 message = self._bidi_workaround(message)
6febd1c1 437 output = message + '\n'
734f90bb 438 self._write_string(output, self._err_file)
8222d8de 439
1e5b9a95
PH
440 def to_console_title(self, message):
441 if not self.params.get('consoletitle', False):
442 return
443 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
444 # c_wchar_p() might not be necessary if `message` is
445 # already of type unicode()
446 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
447 elif 'TERM' in os.environ:
734f90bb 448 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 449
bdde425c
PH
450 def save_console_title(self):
451 if not self.params.get('consoletitle', False):
452 return
453 if 'TERM' in os.environ:
efd6c574 454 # Save the title on stack
734f90bb 455 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
456
457 def restore_console_title(self):
458 if not self.params.get('consoletitle', False):
459 return
460 if 'TERM' in os.environ:
efd6c574 461 # Restore the title from stack
734f90bb 462 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
463
464 def __enter__(self):
465 self.save_console_title()
466 return self
467
468 def __exit__(self, *args):
469 self.restore_console_title()
f89197d7 470
dca08720
PH
471 if self.params.get('cookiefile') is not None:
472 self.cookiejar.save()
bdde425c 473
8222d8de
JMF
474 def trouble(self, message=None, tb=None):
475 """Determine action to take when a download problem appears.
476
477 Depending on if the downloader has been configured to ignore
478 download errors or not, this method may throw an exception or
479 not when errors are found, after printing the message.
480
481 tb, if given, is additional traceback information.
482 """
483 if message is not None:
484 self.to_stderr(message)
485 if self.params.get('verbose'):
486 if tb is None:
487 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 488 tb = ''
8222d8de 489 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 490 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
491 tb += compat_str(traceback.format_exc())
492 else:
493 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 494 tb = ''.join(tb_data)
8222d8de
JMF
495 self.to_stderr(tb)
496 if not self.params.get('ignoreerrors', False):
497 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
498 exc_info = sys.exc_info()[1].exc_info
499 else:
500 exc_info = sys.exc_info()
501 raise DownloadError(message, exc_info)
502 self._download_retcode = 1
503
504 def report_warning(self, message):
505 '''
506 Print the message to stderr, it will be prefixed with 'WARNING:'
507 If stderr is a tty file the 'WARNING:' will be colored
508 '''
6d07ce01
JMF
509 if self.params.get('logger') is not None:
510 self.params['logger'].warning(message)
8222d8de 511 else:
ad8915b7
PH
512 if self.params.get('no_warnings'):
513 return
7e5db8c9 514 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
6d07ce01
JMF
515 _msg_header = '\033[0;33mWARNING:\033[0m'
516 else:
517 _msg_header = 'WARNING:'
518 warning_message = '%s %s' % (_msg_header, message)
519 self.to_stderr(warning_message)
8222d8de
JMF
520
521 def report_error(self, message, tb=None):
522 '''
523 Do the same as trouble, but prefixes the message with 'ERROR:', colored
524 in red if stderr is a tty file.
525 '''
7e5db8c9 526 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
6febd1c1 527 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 528 else:
6febd1c1
PH
529 _msg_header = 'ERROR:'
530 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
531 self.trouble(error_message, tb)
532
8222d8de
JMF
533 def report_file_already_downloaded(self, file_name):
534 """Report file has already been fully downloaded."""
535 try:
6febd1c1 536 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 537 except UnicodeEncodeError:
6febd1c1 538 self.to_screen('[download] The file has already been downloaded')
8222d8de 539
8222d8de
JMF
540 def prepare_filename(self, info_dict):
541 """Generate the output filename."""
542 try:
543 template_dict = dict(info_dict)
544
545 template_dict['epoch'] = int(time.time())
546 autonumber_size = self.params.get('autonumber_size')
547 if autonumber_size is None:
548 autonumber_size = 5
6febd1c1 549 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 550 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 551 if template_dict.get('playlist_index') is not None:
c6b4132a 552 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
553 if template_dict.get('resolution') is None:
554 if template_dict.get('width') and template_dict.get('height'):
555 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
556 elif template_dict.get('height'):
805ef3c6 557 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 558 elif template_dict.get('width'):
805ef3c6 559 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 560
586a91b6 561 sanitize = lambda k, v: sanitize_filename(
45598aab 562 compat_str(v),
1bb5c511 563 restricted=self.params.get('restrictfilenames'),
6febd1c1 564 is_id=(k == 'id'))
586a91b6 565 template_dict = dict((k, sanitize(k, v))
45598aab
PH
566 for k, v in template_dict.items()
567 if v is not None)
6febd1c1 568 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 569
1bb5c511 570 outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
4644ac55 571 tmpl = compat_expanduser(outtmpl)
586a91b6 572 filename = tmpl % template_dict
3a0d2f52
S
573 # Temporary fix for #4787
574 # 'Treat' all problem characters by passing filename through preferredencoding
575 # to workaround encoding issues with subprocess on python2 @ Windows
576 if sys.version_info < (3, 0) and sys.platform == 'win32':
577 filename = encodeFilename(filename, True).decode(preferredencoding())
8222d8de 578 return filename
8222d8de 579 except ValueError as err:
6febd1c1 580 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
581 return None
582
442c37b7 583 def _match_entry(self, info_dict, incomplete):
8222d8de
JMF
584 """ Returns None iff the file should be downloaded """
585
6febd1c1 586 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
587 if 'title' in info_dict:
588 # This can happen when we're just evaluating the playlist
589 title = info_dict['title']
590 matchtitle = self.params.get('matchtitle', False)
591 if matchtitle:
592 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 593 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
594 rejecttitle = self.params.get('rejecttitle', False)
595 if rejecttitle:
596 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 597 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
598 date = info_dict.get('upload_date', None)
599 if date is not None:
600 dateRange = self.params.get('daterange', DateRange())
601 if date not in dateRange:
6febd1c1 602 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
603 view_count = info_dict.get('view_count', None)
604 if view_count is not None:
605 min_views = self.params.get('min_views')
606 if min_views is not None and view_count < min_views:
6febd1c1 607 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
608 max_views = self.params.get('max_views')
609 if max_views is not None and view_count > max_views:
6febd1c1 610 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629 611 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
347de493 612 return 'Skipping "%s" because it is age restricted' % video_title
c1c9a79c 613 if self.in_download_archive(info_dict):
6febd1c1 614 return '%s has already been recorded in archive' % video_title
347de493 615
442c37b7
PH
616 if not incomplete:
617 match_filter = self.params.get('match_filter')
618 if match_filter is not None:
619 ret = match_filter(info_dict)
620 if ret is not None:
621 return ret
347de493 622
8222d8de 623 return None
fe7e0c98 624
b6c45014
JMF
625 @staticmethod
626 def add_extra_info(info_dict, extra_info):
627 '''Set the keys from extra_info in info dict if they are missing'''
628 for key, value in extra_info.items():
629 info_dict.setdefault(key, value)
630
7fc3fa05 631 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 632 process=True, force_generic_extractor=False):
8222d8de
JMF
633 '''
634 Returns a list with a dictionary for each video we find.
635 If 'download', also downloads the videos.
636 extra_info is a dict containing the extra values to add to each result
613b2d9d 637 '''
fe7e0c98 638
61aa5ba3 639 if not ie_key and force_generic_extractor:
d22dec74
S
640 ie_key = 'Generic'
641
8222d8de 642 if ie_key:
56c73665 643 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
644 else:
645 ies = self._ies
646
647 for ie in ies:
648 if not ie.suitable(url):
649 continue
650
651 if not ie.working():
6febd1c1
PH
652 self.report_warning('The program functionality for this site has been marked as broken, '
653 'and will probably not work.')
8222d8de
JMF
654
655 try:
656 ie_result = ie.extract(url)
5f6a1245 657 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
658 break
659 if isinstance(ie_result, list):
660 # Backwards compatibility: old IE result format
8222d8de
JMF
661 ie_result = {
662 '_type': 'compat_list',
663 'entries': ie_result,
664 }
ea38e55f 665 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
666 if process:
667 return self.process_ie_result(ie_result, download, extra_info)
668 else:
669 return ie_result
5f6a1245 670 except ExtractorError as de: # An error we somewhat expected
8222d8de
JMF
671 self.report_error(compat_str(de), de.format_traceback())
672 break
d3e5bbf4
PH
673 except MaxDownloadsReached:
674 raise
8222d8de
JMF
675 except Exception as e:
676 if self.params.get('ignoreerrors', False):
677 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
678 break
679 else:
680 raise
681 else:
1a489545 682 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 683
ea38e55f
PH
684 def add_default_extra_info(self, ie_result, ie, url):
685 self.add_extra_info(ie_result, {
686 'extractor': ie.IE_NAME,
687 'webpage_url': url,
688 'webpage_url_basename': url_basename(url),
689 'extractor_key': ie.ie_key(),
690 })
691
8222d8de
JMF
692 def process_ie_result(self, ie_result, download=True, extra_info={}):
693 """
694 Take the result of the ie(may be modified) and resolve all unresolved
695 references (URLs, playlist items).
696
697 It will also download the videos if 'download'.
698 Returns the resolved ie_result.
699 """
700
e8ee972c
PH
701 result_type = ie_result.get('_type', 'video')
702
057a5206
PH
703 if result_type in ('url', 'url_transparent'):
704 extract_flat = self.params.get('extract_flat', False)
705 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
706 extract_flat is True):
057a5206
PH
707 if self.params.get('forcejson', False):
708 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
709 return ie_result
710
8222d8de 711 if result_type == 'video':
b6c45014 712 self.add_extra_info(ie_result, extra_info)
feee2ecf 713 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
714 elif result_type == 'url':
715 # We have to add extra_info to the results because it may be
716 # contained in a playlist
717 return self.extract_info(ie_result['url'],
718 download,
719 ie_key=ie_result.get('ie_key'),
720 extra_info=extra_info)
7fc3fa05
PH
721 elif result_type == 'url_transparent':
722 # Use the information from the embedding page
723 info = self.extract_info(
724 ie_result['url'], ie_key=ie_result.get('ie_key'),
725 extra_info=extra_info, download=False, process=False)
726
412c617d
PH
727 force_properties = dict(
728 (k, v) for k, v in ie_result.items() if v is not None)
729 for f in ('_type', 'url'):
730 if f in force_properties:
731 del force_properties[f]
732 new_result = info.copy()
733 new_result.update(force_properties)
7fc3fa05
PH
734
735 assert new_result.get('_type') != 'url_transparent'
7fc3fa05
PH
736
737 return self.process_ie_result(
738 new_result, download=download, extra_info=extra_info)
42e12102 739 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de
JMF
740 # We process each entry in the playlist
741 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 742 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
743
744 playlist_results = []
745
8222d8de 746 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
747 playlistend = self.params.get('playlistend', None)
748 # For backwards compatibility, interpret -1 as whole list
8222d8de 749 if playlistend == -1:
a19fd00c 750 playlistend = None
8222d8de 751
c14e88f0
PH
752 playlistitems_str = self.params.get('playlist_items', None)
753 playlistitems = None
754 if playlistitems_str is not None:
755 def iter_playlistitems(format):
756 for string_segment in format.split(','):
757 if '-' in string_segment:
758 start, end = string_segment.split('-')
759 for item in range(int(start), int(end) + 1):
760 yield int(item)
761 else:
762 yield int(string_segment)
763 playlistitems = iter_playlistitems(playlistitems_str)
764
b82f815f
PH
765 ie_entries = ie_result['entries']
766 if isinstance(ie_entries, list):
767 n_all_entries = len(ie_entries)
c14e88f0 768 if playlistitems:
3884dcf3
JMF
769 entries = [
770 ie_entries[i - 1] for i in playlistitems
771 if -n_all_entries <= i - 1 < n_all_entries]
c14e88f0
PH
772 else:
773 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
774 n_entries = len(entries)
775 self.to_screen(
776 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
777 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 778 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
779 if playlistitems:
780 entries = []
781 for item in playlistitems:
782 entries.extend(ie_entries.getslice(
783 item - 1, item
784 ))
785 else:
786 entries = ie_entries.getslice(
787 playliststart, playlistend)
b7ab0590
PH
788 n_entries = len(entries)
789 self.to_screen(
790 "[%s] playlist %s: Downloading %d videos" %
791 (ie_result['extractor'], playlist, n_entries))
b82f815f 792 else: # iterable
c14e88f0
PH
793 if playlistitems:
794 entry_list = list(ie_entries)
795 entries = [entry_list[i - 1] for i in playlistitems]
796 else:
797 entries = list(itertools.islice(
798 ie_entries, playliststart, playlistend))
b82f815f
PH
799 n_entries = len(entries)
800 self.to_screen(
801 "[%s] playlist %s: Downloading %d videos" %
802 (ie_result['extractor'], playlist, n_entries))
8222d8de 803
ff815fe6
MS
804 if self.params.get('playlistreverse', False):
805 entries = entries[::-1]
806
fe7e0c98 807 for i, entry in enumerate(entries, 1):
734ea11e 808 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
8222d8de 809 extra = {
c6b4132a 810 'n_entries': n_entries,
fe7e0c98 811 'playlist': playlist,
a1cf99d0
PH
812 'playlist_id': ie_result.get('id'),
813 'playlist_title': ie_result.get('title'),
fe7e0c98 814 'playlist_index': i + playliststart,
b6c45014 815 'extractor': ie_result['extractor'],
9103bbc5 816 'webpage_url': ie_result['webpage_url'],
29eb5174 817 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 818 'extractor_key': ie_result['extractor_key'],
fe7e0c98 819 }
7012b23c 820
442c37b7 821 reason = self._match_entry(entry, incomplete=True)
7012b23c 822 if reason is not None:
6febd1c1 823 self.to_screen('[download] ' + reason)
7012b23c
PH
824 continue
825
8222d8de
JMF
826 entry_result = self.process_ie_result(entry,
827 download=download,
828 extra_info=extra)
829 playlist_results.append(entry_result)
830 ie_result['entries'] = playlist_results
831 return ie_result
832 elif result_type == 'compat_list':
c9bf4114
PH
833 self.report_warning(
834 'Extractor %s returned a compat_list result. '
835 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 836
8222d8de 837 def _fixup(r):
9e1a5b84
JW
838 self.add_extra_info(
839 r,
9103bbc5
JMF
840 {
841 'extractor': ie_result['extractor'],
842 'webpage_url': ie_result['webpage_url'],
29eb5174 843 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 844 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
845 }
846 )
8222d8de
JMF
847 return r
848 ie_result['entries'] = [
b6c45014 849 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
850 for r in ie_result['entries']
851 ]
852 return ie_result
853 else:
854 raise Exception('Invalid result type: %s' % result_type)
855
083c9df9
PH
856 def _apply_format_filter(self, format_spec, available_formats):
857 " Returns a tuple of the remaining format_spec and filtered formats "
858
859 OPERATORS = {
860 '<': operator.lt,
861 '<=': operator.le,
862 '>': operator.gt,
863 '>=': operator.ge,
864 '=': operator.eq,
865 '!=': operator.ne,
866 }
867 operator_rex = re.compile(r'''(?x)\s*\[
2ec19e95 868 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
083c9df9
PH
869 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
870 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
871 \]$
872 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
873 m = operator_rex.search(format_spec)
9ddb6925
S
874 if m:
875 try:
876 comparison_value = int(m.group('value'))
877 except ValueError:
878 comparison_value = parse_filesize(m.group('value'))
879 if comparison_value is None:
880 comparison_value = parse_filesize(m.group('value') + 'B')
881 if comparison_value is None:
882 raise ValueError(
883 'Invalid value %r in format specification %r' % (
884 m.group('value'), format_spec))
885 op = OPERATORS[m.group('op')]
886
083c9df9 887 if not m:
9ddb6925
S
888 STR_OPERATORS = {
889 '=': operator.eq,
890 '!=': operator.ne,
891 }
892 str_operator_rex = re.compile(r'''(?x)\s*\[
893 \s*(?P<key>ext|acodec|vcodec|container|protocol)
894 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
895 \s*(?P<value>[a-zA-Z0-9_-]+)
896 \s*\]$
897 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
898 m = str_operator_rex.search(format_spec)
899 if m:
900 comparison_value = m.group('value')
901 op = STR_OPERATORS[m.group('op')]
083c9df9 902
9ddb6925
S
903 if not m:
904 raise ValueError('Invalid format specification %r' % format_spec)
083c9df9
PH
905
906 def _filter(f):
907 actual_value = f.get(m.group('key'))
908 if actual_value is None:
909 return m.group('none_inclusive')
910 return op(actual_value, comparison_value)
911 new_formats = [f for f in available_formats if _filter(f)]
912
913 new_format_spec = format_spec[:-len(m.group(0))]
914 if not new_format_spec:
915 new_format_spec = 'best'
916
917 return (new_format_spec, new_formats)
918
a9c58ad9 919 def select_format(self, format_spec, available_formats):
083c9df9
PH
920 while format_spec.endswith(']'):
921 format_spec, available_formats = self._apply_format_filter(
922 format_spec, available_formats)
923 if not available_formats:
924 return None
925
e01c56f9
S
926 if format_spec in ['best', 'worst', None]:
927 format_idx = 0 if format_spec == 'worst' else -1
8dd54188
JMF
928 audiovideo_formats = [
929 f for f in available_formats
930 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
931 if audiovideo_formats:
e01c56f9 932 return audiovideo_formats[format_idx]
df15ef8d
S
933 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
934 elif (all(f.get('acodec') != 'none' for f in available_formats) or
935 all(f.get('vcodec') != 'none' for f in available_formats)):
e01c56f9 936 return available_formats[format_idx]
ba7678f9
PH
937 elif format_spec == 'bestaudio':
938 audio_formats = [
939 f for f in available_formats
940 if f.get('vcodec') == 'none']
941 if audio_formats:
942 return audio_formats[-1]
943 elif format_spec == 'worstaudio':
944 audio_formats = [
945 f for f in available_formats
946 if f.get('vcodec') == 'none']
947 if audio_formats:
948 return audio_formats[0]
bc6d5978
JMF
949 elif format_spec == 'bestvideo':
950 video_formats = [
951 f for f in available_formats
952 if f.get('acodec') == 'none']
953 if video_formats:
954 return video_formats[-1]
955 elif format_spec == 'worstvideo':
956 video_formats = [
957 f for f in available_formats
958 if f.get('acodec') == 'none']
959 if video_formats:
960 return video_formats[0]
a9c58ad9 961 else:
0217c783 962 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
49e86983
JMF
963 if format_spec in extensions:
964 filter_f = lambda f: f['ext'] == format_spec
965 else:
966 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 967 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
968 if matches:
969 return matches[-1]
970 return None
971
e5660ee6
JMF
972 def _calc_headers(self, info_dict):
973 res = std_headers.copy()
974
975 add_headers = info_dict.get('http_headers')
976 if add_headers:
977 res.update(add_headers)
978
979 cookies = self._calc_cookies(info_dict)
980 if cookies:
981 res['Cookie'] = cookies
982
983 return res
984
985 def _calc_cookies(self, info_dict):
662435f7 986 pr = compat_urllib_request.Request(info_dict['url'])
e5660ee6 987 self.cookiejar.add_cookie_header(pr)
662435f7 988 return pr.get_header('Cookie')
e5660ee6 989
dd82ffea
JMF
990 def process_video_result(self, info_dict, download=True):
991 assert info_dict.get('_type', 'video') == 'video'
992
bec1fad2
PH
993 if 'id' not in info_dict:
994 raise ExtractorError('Missing "id" field in extractor result')
995 if 'title' not in info_dict:
996 raise ExtractorError('Missing "title" field in extractor result')
997
dd82ffea
JMF
998 if 'playlist' not in info_dict:
999 # It isn't part of a playlist
1000 info_dict['playlist'] = None
1001 info_dict['playlist_index'] = None
1002
d5519808 1003 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1004 if thumbnails is None:
1005 thumbnail = info_dict.get('thumbnail')
1006 if thumbnail:
a7a14d95 1007 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1008 if thumbnails:
be6d7229 1009 thumbnails.sort(key=lambda t: (
cfb56d1a
PH
1010 t.get('preference'), t.get('width'), t.get('height'),
1011 t.get('id'), t.get('url')))
f6c24009 1012 for i, t in enumerate(thumbnails):
9603e8a7 1013 if t.get('width') and t.get('height'):
d5519808 1014 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1015 if t.get('id') is None:
1016 t['id'] = '%d' % i
d5519808
PH
1017
1018 if thumbnails and 'thumbnail' not in info_dict:
1019 info_dict['thumbnail'] = thumbnails[-1]['url']
1020
c9ae7b95 1021 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1022 info_dict['display_id'] = info_dict['id']
1023
955c4514 1024 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1025 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1026 # see http://bugs.python.org/issue1646728)
1027 try:
1028 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1029 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1030 except (ValueError, OverflowError, OSError):
1031 pass
9d2ecdbc 1032
a504ced0 1033 if self.params.get('listsubtitles', False):
360e1ca5
JMF
1034 if 'automatic_captions' in info_dict:
1035 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1036 self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
a504ced0 1037 return
360e1ca5
JMF
1038 info_dict['requested_subtitles'] = self.process_subtitles(
1039 info_dict['id'], info_dict.get('subtitles'),
1040 info_dict.get('automatic_captions'))
a504ced0 1041
dd82ffea
JMF
1042 # We now pick which formats have to be downloaded
1043 if info_dict.get('formats') is None:
1044 # There's only one format available
1045 formats = [info_dict]
1046 else:
1047 formats = info_dict['formats']
1048
db95dc13
PH
1049 if not formats:
1050 raise ExtractorError('No video formats found!')
1051
181c7053
S
1052 formats_dict = {}
1053
dd82ffea 1054 # We check that all the formats have the format and format_id fields
db95dc13 1055 for i, format in enumerate(formats):
bec1fad2
PH
1056 if 'url' not in format:
1057 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1058
dd82ffea 1059 if format.get('format_id') is None:
8016c922 1060 format['format_id'] = compat_str(i)
181c7053
S
1061 format_id = format['format_id']
1062 if format_id not in formats_dict:
1063 formats_dict[format_id] = []
1064 formats_dict[format_id].append(format)
1065
1066 # Make sure all formats have unique format_id
1067 for format_id, ambiguous_formats in formats_dict.items():
1068 if len(ambiguous_formats) > 1:
1069 for i, format in enumerate(ambiguous_formats):
1070 format['format_id'] = '%s-%d' % (format_id, i)
1071
1072 for i, format in enumerate(formats):
8c51aa65 1073 if format.get('format') is None:
6febd1c1 1074 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1075 id=format['format_id'],
1076 res=self.format_resolution(format),
6febd1c1 1077 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1078 )
c1002e96
PH
1079 # Automatically determine file extension if missing
1080 if 'ext' not in format:
cce929ea 1081 format['ext'] = determine_ext(format['url']).lower()
e5660ee6
JMF
1082 # Add HTTP headers, so that external programs can use them from the
1083 # json output
1084 full_format_info = info_dict.copy()
1085 full_format_info.update(format)
1086 format['http_headers'] = self._calc_headers(full_format_info)
dd82ffea 1087
4bcc7bd1 1088 # TODO Central sorting goes here
99e206d5 1089
f89197d7 1090 if formats[0] is not info_dict:
b3d9ef88
JMF
1091 # only set the 'formats' fields if the original info_dict list them
1092 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1093 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
1094 # wich can't be exported to json
1095 info_dict['formats'] = formats
cfb56d1a 1096 if self.params.get('listformats'):
bfaae0a7 1097 self.list_formats(info_dict)
1098 return
cfb56d1a
PH
1099 if self.params.get('list_thumbnails'):
1100 self.list_thumbnails(info_dict)
1101 return
bfaae0a7 1102
de3ef3ed 1103 req_format = self.params.get('format')
a9c58ad9 1104 if req_format is None:
feccf29c 1105 req_format_list = []
3749e36e 1106 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
98b8ec86 1107 info_dict['extractor'] in ['youtube', 'ted']):
7fcb605b 1108 merger = FFmpegMergerPP(self)
97fcf1bb 1109 if merger.available and merger.can_merge():
7fcb605b 1110 req_format_list.append('bestvideo+bestaudio')
feccf29c
S
1111 req_format_list.append('best')
1112 req_format = '/'.join(req_format_list)
dd82ffea 1113 formats_to_download = []
3a77719c 1114 if req_format == 'all':
dd82ffea
JMF
1115 formats_to_download = formats
1116 else:
1de33faf
PH
1117 for rfstr in req_format.split(','):
1118 # We can accept formats requested in the format: 34/5/best, we pick
1119 # the first that is available, starting from left
1120 req_formats = rfstr.split('/')
1121 for rf in req_formats:
1122 if re.match(r'.+?\+.+?', rf) is not None:
1123 # Two formats have been requested like '137+139'
1124 format_1, format_2 = rf.split('+')
1125 formats_info = (self.select_format(format_1, formats),
9e1a5b84 1126 self.select_format(format_2, formats))
1de33faf 1127 if all(formats_info):
c2954908
JMF
1128 # The first format must contain the video and the
1129 # second the audio
1130 if formats_info[0].get('vcodec') == 'none':
1131 self.report_error('The first format must '
9e1a5b84
JW
1132 'contain the video, try using '
1133 '"-f %s+%s"' % (format_2, format_1))
c2954908 1134 return
45598f15
PH
1135 output_ext = (
1136 formats_info[0]['ext']
1137 if self.params.get('merge_output_format') is None
1138 else self.params['merge_output_format'])
1de33faf
PH
1139 selected_format = {
1140 'requested_formats': formats_info,
300753a0
S
1141 'format': '%s+%s' % (formats_info[0].get('format'),
1142 formats_info[1].get('format')),
6d593c32
S
1143 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1144 formats_info[1].get('format_id')),
f90ad273
PH
1145 'width': formats_info[0].get('width'),
1146 'height': formats_info[0].get('height'),
1147 'resolution': formats_info[0].get('resolution'),
1148 'fps': formats_info[0].get('fps'),
1149 'vcodec': formats_info[0].get('vcodec'),
1150 'vbr': formats_info[0].get('vbr'),
6271f1ca 1151 'stretched_ratio': formats_info[0].get('stretched_ratio'),
f90ad273
PH
1152 'acodec': formats_info[1].get('acodec'),
1153 'abr': formats_info[1].get('abr'),
45598f15 1154 'ext': output_ext,
1de33faf
PH
1155 }
1156 else:
1157 selected_format = None
6350728b 1158 else:
1de33faf
PH
1159 selected_format = self.select_format(rf, formats)
1160 if selected_format is not None:
1161 formats_to_download.append(selected_format)
1162 break
dd82ffea 1163 if not formats_to_download:
6febd1c1 1164 raise ExtractorError('requested format not available',
78a3a9f8 1165 expected=True)
dd82ffea
JMF
1166
1167 if download:
1168 if len(formats_to_download) > 1:
6febd1c1 1169 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1170 for format in formats_to_download:
1171 new_info = dict(info_dict)
1172 new_info.update(format)
1173 self.process_info(new_info)
1174 # We update the info dict with the best quality format (backwards compatibility)
1175 info_dict.update(formats_to_download[-1])
1176 return info_dict
1177
98c70d6f 1178 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1179 """Select the requested subtitles and their format"""
98c70d6f
JMF
1180 available_subs = {}
1181 if normal_subtitles and self.params.get('writesubtitles'):
1182 available_subs.update(normal_subtitles)
1183 if automatic_captions and self.params.get('writeautomaticsub'):
1184 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1185 if lang not in available_subs:
1186 available_subs[lang] = cap_info
1187
4d171848
JMF
1188 if (not self.params.get('writesubtitles') and not
1189 self.params.get('writeautomaticsub') or not
1190 available_subs):
1191 return None
a504ced0
JMF
1192
1193 if self.params.get('allsubtitles', False):
1194 requested_langs = available_subs.keys()
1195 else:
1196 if self.params.get('subtitleslangs', False):
1197 requested_langs = self.params.get('subtitleslangs')
1198 elif 'en' in available_subs:
1199 requested_langs = ['en']
1200 else:
1201 requested_langs = [list(available_subs.keys())[0]]
1202
1203 formats_query = self.params.get('subtitlesformat', 'best')
1204 formats_preference = formats_query.split('/') if formats_query else []
1205 subs = {}
1206 for lang in requested_langs:
1207 formats = available_subs.get(lang)
1208 if formats is None:
1209 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1210 continue
a504ced0
JMF
1211 for ext in formats_preference:
1212 if ext == 'best':
1213 f = formats[-1]
1214 break
1215 matches = list(filter(lambda f: f['ext'] == ext, formats))
1216 if matches:
1217 f = matches[-1]
1218 break
1219 else:
1220 f = formats[-1]
1221 self.report_warning(
1222 'No subtitle format found matching "%s" for language %s, '
1223 'using %s' % (formats_query, lang, f['ext']))
1224 subs[lang] = f
1225 return subs
1226
8222d8de
JMF
1227 def process_info(self, info_dict):
1228 """Process a single resolved IE result."""
1229
1230 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1231
1232 max_downloads = self.params.get('max_downloads')
1233 if max_downloads is not None:
1234 if self._num_downloads >= int(max_downloads):
1235 raise MaxDownloadsReached()
8222d8de
JMF
1236
1237 info_dict['fulltitle'] = info_dict['title']
1238 if len(info_dict['title']) > 200:
6febd1c1 1239 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de 1240
11b85ce6 1241 if 'format' not in info_dict:
8222d8de
JMF
1242 info_dict['format'] = info_dict['ext']
1243
442c37b7 1244 reason = self._match_entry(info_dict, incomplete=False)
8222d8de 1245 if reason is not None:
6febd1c1 1246 self.to_screen('[download] ' + reason)
8222d8de
JMF
1247 return
1248
fd288278 1249 self._num_downloads += 1
8222d8de 1250
e72c7e41 1251 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1252
1253 # Forced printings
1254 if self.params.get('forcetitle', False):
0783b09b 1255 self.to_stdout(info_dict['fulltitle'])
8222d8de 1256 if self.params.get('forceid', False):
0783b09b 1257 self.to_stdout(info_dict['id'])
8222d8de 1258 if self.params.get('forceurl', False):
16ae61f6 1259 if info_dict.get('requested_formats') is not None:
1260 for f in info_dict['requested_formats']:
1261 self.to_stdout(f['url'] + f.get('play_path', ''))
1262 else:
1263 # For RTMP URLs, also include the playpath
1264 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 1265 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 1266 self.to_stdout(info_dict['thumbnail'])
216d71d0 1267 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 1268 self.to_stdout(info_dict['description'])
8222d8de 1269 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 1270 self.to_stdout(filename)
525ef922
PH
1271 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1272 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 1273 if self.params.get('forceformat', False):
0783b09b 1274 self.to_stdout(info_dict['format'])
9d153818 1275 if self.params.get('forcejson', False):
0783b09b 1276 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
1277
1278 # Do nothing else if in simulate mode
1279 if self.params.get('simulate', False):
1280 return
1281
1282 if filename is None:
1283 return
1284
1285 try:
e5a11a22 1286 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
d26e981d 1287 if dn and not os.path.exists(dn):
8222d8de
JMF
1288 os.makedirs(dn)
1289 except (OSError, IOError) as err:
6febd1c1 1290 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
1291 return
1292
1293 if self.params.get('writedescription', False):
2699da80 1294 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
7b6fefc9 1295 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1296 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1297 elif info_dict.get('description') is None:
1298 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1299 else:
1300 try:
6febd1c1 1301 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1302 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1303 descfile.write(info_dict['description'])
7b6fefc9 1304 except (OSError, IOError):
6febd1c1 1305 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1306 return
8222d8de 1307
1fb07d10 1308 if self.params.get('writeannotations', False):
98727e12 1309 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
7b6fefc9 1310 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1311 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
1312 else:
1313 try:
6febd1c1 1314 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1315 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1316 annofile.write(info_dict['annotations'])
1317 except (KeyError, TypeError):
6febd1c1 1318 self.report_warning('There are no annotations to write.')
7b6fefc9 1319 except (OSError, IOError):
6febd1c1 1320 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1321 return
1fb07d10 1322
c4a91be7 1323 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1324 self.params.get('writeautomaticsub')])
c4a91be7 1325
c84dd8a9 1326 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1327 # subtitles download errors are already managed as troubles in relevant IE
1328 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1329 subtitles = info_dict['requested_subtitles']
0f2c0d33 1330 ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1331 for sub_lang, sub_info in subtitles.items():
1332 sub_format = sub_info['ext']
1333 if sub_info.get('data') is not None:
1334 sub_data = sub_info['data']
1335 else:
1336 try:
0f2c0d33
JMF
1337 sub_data = ie._download_webpage(
1338 sub_info['url'], info_dict['id'], note=False)
1339 except ExtractorError as err:
a504ced0 1340 self.report_warning('Unable to download subtitle for "%s": %s' %
0f2c0d33 1341 (sub_lang, compat_str(err.cause)))
a504ced0 1342 continue
8222d8de 1343 try:
d4051a8e 1344 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1345 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1346 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1347 else:
6febd1c1 1348 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9 1349 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
a504ced0 1350 subfile.write(sub_data)
8222d8de 1351 except (OSError, IOError):
e4db1951 1352 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1353 return
1354
8222d8de 1355 if self.params.get('writeinfojson', False):
b29e0000 1356 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
7b6fefc9 1357 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1358 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1359 else:
6febd1c1 1360 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1361 try:
cb202fd2 1362 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 1363 except (OSError, IOError):
6febd1c1 1364 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1365 return
8222d8de 1366
ec82d85a 1367 self._write_thumbnails(info_dict, filename)
8222d8de
JMF
1368
1369 if not self.params.get('skip_download', False):
4340deca
P
1370 try:
1371 def dl(name, info):
a055469f 1372 fd = get_suitable_downloader(info, self.params)(self, self.params)
4340deca
P
1373 for ph in self._progress_hooks:
1374 fd.add_progress_hook(ph)
1375 if self.params.get('verbose'):
1376 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1377 return fd.download(name, info)
ee69b99a 1378
4340deca
P
1379 if info_dict.get('requested_formats') is not None:
1380 downloaded = []
1381 success = True
d47aeb22 1382 merger = FFmpegMergerPP(self)
f740fae2 1383 if not merger.available:
4340deca
P
1384 postprocessors = []
1385 self.report_warning('You have requested multiple '
1386 'formats but ffmpeg or avconv are not installed.'
4a5a898a 1387 ' The formats won\'t be merged.')
6350728b 1388 else:
4340deca 1389 postprocessors = [merger]
81cd954a
S
1390
1391 def compatible_formats(formats):
1392 video, audio = formats
1393 # Check extension
1394 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1395 if video_ext and audio_ext:
1396 COMPATIBLE_EXTS = (
6728187a 1397 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
81cd954a
S
1398 ('webm')
1399 )
1400 for exts in COMPATIBLE_EXTS:
1401 if video_ext in exts and audio_ext in exts:
1402 return True
1403 # TODO: Check acodec/vcodec
1404 return False
1405
38c6902b
S
1406 filename_real_ext = os.path.splitext(filename)[1][1:]
1407 filename_wo_ext = (
1408 os.path.splitext(filename)[0]
1409 if filename_real_ext == info_dict['ext']
1410 else filename)
81cd954a 1411 requested_formats = info_dict['requested_formats']
c0dea0a7 1412 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 1413 info_dict['ext'] = 'mkv'
4a5a898a
S
1414 self.report_warning(
1415 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
1416 # Ensure filename always has a correct extension for successful merge
1417 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
5b5fbc08
JMF
1418 if os.path.exists(encodeFilename(filename)):
1419 self.to_screen(
1420 '[download] %s has already been downloaded and '
1421 'merged' % filename)
1422 else:
81cd954a 1423 for f in requested_formats:
5b5fbc08
JMF
1424 new_info = dict(info_dict)
1425 new_info.update(f)
1426 fname = self.prepare_filename(new_info)
666a9a2b 1427 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
5b5fbc08
JMF
1428 downloaded.append(fname)
1429 partial_success = dl(fname, new_info)
1430 success = success and partial_success
1431 info_dict['__postprocessors'] = postprocessors
1432 info_dict['__files_to_merge'] = downloaded
4340deca
P
1433 else:
1434 # Just a single file
1435 success = dl(filename, info_dict)
1436 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1437 self.report_error('unable to download video data: %s' % str(err))
1438 return
1439 except (OSError, IOError) as err:
1440 raise UnavailableVideoError(err)
1441 except (ContentTooShortError, ) as err:
1442 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1443 return
8222d8de
JMF
1444
1445 if success:
6271f1ca 1446 # Fixup content
62cd676c
PH
1447 fixup_policy = self.params.get('fixup')
1448 if fixup_policy is None:
1449 fixup_policy = 'detect_or_warn'
1450
6271f1ca
PH
1451 stretched_ratio = info_dict.get('stretched_ratio')
1452 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
1453 if fixup_policy == 'warn':
1454 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1455 info_dict['id'], stretched_ratio))
1456 elif fixup_policy == 'detect_or_warn':
1457 stretched_pp = FFmpegFixupStretchedPP(self)
1458 if stretched_pp.available:
1459 info_dict.setdefault('__postprocessors', [])
1460 info_dict['__postprocessors'].append(stretched_pp)
1461 else:
1462 self.report_warning(
1463 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1464 info_dict['id'], stretched_ratio))
1465 else:
62cd676c
PH
1466 assert fixup_policy in ('ignore', 'never')
1467
1468 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1469 if fixup_policy == 'warn':
1470 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1471 info_dict['id']))
1472 elif fixup_policy == 'detect_or_warn':
1473 fixup_pp = FFmpegFixupM4aPP(self)
1474 if fixup_pp.available:
1475 info_dict.setdefault('__postprocessors', [])
1476 info_dict['__postprocessors'].append(fixup_pp)
1477 else:
1478 self.report_warning(
1479 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1480 info_dict['id']))
1481 else:
1482 assert fixup_policy in ('ignore', 'never')
6271f1ca 1483
8222d8de
JMF
1484 try:
1485 self.post_process(filename, info_dict)
1486 except (PostProcessingError) as err:
6febd1c1 1487 self.report_error('postprocessing: %s' % str(err))
8222d8de 1488 return
cd58dc3e 1489 self.record_download_archive(info_dict)
8222d8de
JMF
1490
1491 def download(self, url_list):
1492 """Download a given list of URLs."""
acd69589 1493 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1494 if (len(url_list) > 1 and
8fb3ac36
PH
1495 '%' not in outtmpl and
1496 self.params.get('max_downloads') != 1):
acd69589 1497 raise SameFileError(outtmpl)
8222d8de
JMF
1498
1499 for url in url_list:
1500 try:
5f6a1245 1501 # It also downloads the videos
61aa5ba3
S
1502 res = self.extract_info(
1503 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 1504 except UnavailableVideoError:
6febd1c1 1505 self.report_error('unable to download video')
8222d8de 1506 except MaxDownloadsReached:
6febd1c1 1507 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1508 raise
63e0be34
PH
1509 else:
1510 if self.params.get('dump_single_json', False):
1511 self.to_stdout(json.dumps(res))
8222d8de
JMF
1512
1513 return self._download_retcode
1514
1dcc4c0c 1515 def download_with_info_file(self, info_filename):
31bd3925
JMF
1516 with contextlib.closing(fileinput.FileInput(
1517 [info_filename], mode='r',
1518 openhook=fileinput.hook_encoded('utf-8'))) as f:
1519 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 1520 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
1521 try:
1522 self.process_ie_result(info, download=True)
1523 except DownloadError:
1524 webpage_url = info.get('webpage_url')
1525 if webpage_url is not None:
6febd1c1 1526 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1527 return self.download([webpage_url])
1528 else:
1529 raise
1530 return self._download_retcode
1dcc4c0c 1531
cb202fd2
S
1532 @staticmethod
1533 def filter_requested_info(info_dict):
1534 return dict(
1535 (k, v) for k, v in info_dict.items()
1536 if k not in ['requested_formats', 'requested_subtitles'])
1537
8222d8de
JMF
1538 def post_process(self, filename, ie_info):
1539 """Run all the postprocessors on the given file."""
1540 info = dict(ie_info)
1541 info['filepath'] = filename
6350728b
JMF
1542 pps_chain = []
1543 if ie_info.get('__postprocessors') is not None:
1544 pps_chain.extend(ie_info['__postprocessors'])
1545 pps_chain.extend(self._pps)
1546 for pp in pps_chain:
71646e46 1547 files_to_delete = []
8222d8de 1548 try:
592e97e8 1549 files_to_delete, info = pp.run(info)
8222d8de 1550 except PostProcessingError as e:
bbcbf4d4 1551 self.report_error(e.msg)
592e97e8
JMF
1552 if files_to_delete and not self.params.get('keepvideo', False):
1553 for old_filename in files_to_delete:
f3ff1a36 1554 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
1555 try:
1556 os.remove(encodeFilename(old_filename))
1557 except (IOError, OSError):
1558 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 1559
5db07df6
PH
1560 def _make_archive_id(self, info_dict):
1561 # Future-proof against any change in case
1562 # and backwards compatibility with prior versions
d31209a1 1563 extractor = info_dict.get('extractor_key')
7012b23c
PH
1564 if extractor is None:
1565 if 'id' in info_dict:
1566 extractor = info_dict.get('ie_key') # key in a playlist
1567 if extractor is None:
5db07df6 1568 return None # Incomplete video information
6febd1c1 1569 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1570
1571 def in_download_archive(self, info_dict):
1572 fn = self.params.get('download_archive')
1573 if fn is None:
1574 return False
1575
1576 vid_id = self._make_archive_id(info_dict)
1577 if vid_id is None:
7012b23c 1578 return False # Incomplete video information
5db07df6 1579
c1c9a79c
PH
1580 try:
1581 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1582 for line in archive_file:
1583 if line.strip() == vid_id:
1584 return True
1585 except IOError as ioe:
1586 if ioe.errno != errno.ENOENT:
1587 raise
1588 return False
1589
1590 def record_download_archive(self, info_dict):
1591 fn = self.params.get('download_archive')
1592 if fn is None:
1593 return
5db07df6
PH
1594 vid_id = self._make_archive_id(info_dict)
1595 assert vid_id
c1c9a79c 1596 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1597 archive_file.write(vid_id + '\n')
dd82ffea 1598
8c51aa65 1599 @staticmethod
8abeeb94 1600 def format_resolution(format, default='unknown'):
fb04e403
PH
1601 if format.get('vcodec') == 'none':
1602 return 'audio only'
f49d89ee
PH
1603 if format.get('resolution') is not None:
1604 return format['resolution']
8c51aa65
JMF
1605 if format.get('height') is not None:
1606 if format.get('width') is not None:
6febd1c1 1607 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1608 else:
6febd1c1 1609 res = '%sp' % format['height']
f49d89ee 1610 elif format.get('width') is not None:
6febd1c1 1611 res = '?x%d' % format['width']
8c51aa65 1612 else:
8abeeb94 1613 res = default
8c51aa65
JMF
1614 return res
1615
c57f7757
PH
1616 def _format_note(self, fdict):
1617 res = ''
1618 if fdict.get('ext') in ['f4f', 'f4m']:
1619 res += '(unsupported) '
1620 if fdict.get('format_note') is not None:
1621 res += fdict['format_note'] + ' '
1622 if fdict.get('tbr') is not None:
1623 res += '%4dk ' % fdict['tbr']
1624 if fdict.get('container') is not None:
1625 if res:
1626 res += ', '
1627 res += '%s container' % fdict['container']
1628 if (fdict.get('vcodec') is not None and
1629 fdict.get('vcodec') != 'none'):
1630 if res:
1631 res += ', '
1632 res += fdict['vcodec']
91c7271a 1633 if fdict.get('vbr') is not None:
c57f7757
PH
1634 res += '@'
1635 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1636 res += 'video@'
1637 if fdict.get('vbr') is not None:
1638 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1639 if fdict.get('fps') is not None:
1640 res += ', %sfps' % fdict['fps']
c57f7757
PH
1641 if fdict.get('acodec') is not None:
1642 if res:
1643 res += ', '
1644 if fdict['acodec'] == 'none':
1645 res += 'video only'
1646 else:
1647 res += '%-5s' % fdict['acodec']
1648 elif fdict.get('abr') is not None:
1649 if res:
1650 res += ', '
1651 res += 'audio'
1652 if fdict.get('abr') is not None:
1653 res += '@%3dk' % fdict['abr']
1654 if fdict.get('asr') is not None:
1655 res += ' (%5dHz)' % fdict['asr']
1656 if fdict.get('filesize') is not None:
1657 if res:
1658 res += ', '
1659 res += format_bytes(fdict['filesize'])
9732d77e
PH
1660 elif fdict.get('filesize_approx') is not None:
1661 if res:
1662 res += ', '
1663 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1664 return res
91c7271a 1665
c57f7757 1666 def list_formats(self, info_dict):
94badb25 1667 formats = info_dict.get('formats', [info_dict])
b81a359e
PH
1668 table = [
1669 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1670 for f in formats
e65566a9 1671 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 1672 if len(formats) > 1:
b81a359e 1673 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
57dd9a8f 1674
b81a359e 1675 header_line = ['format code', 'extension', 'resolution', 'note']
cfb56d1a 1676 self.to_screen(
b81a359e
PH
1677 '[info] Available formats for %s:\n%s' %
1678 (info_dict['id'], render_table(header_line, table)))
cfb56d1a
PH
1679
1680 def list_thumbnails(self, info_dict):
1681 thumbnails = info_dict.get('thumbnails')
1682 if not thumbnails:
1683 tn_url = info_dict.get('thumbnail')
1684 if tn_url:
1685 thumbnails = [{'id': '0', 'url': tn_url}]
1686 else:
1687 self.to_screen(
1688 '[info] No thumbnails present for %s' % info_dict['id'])
1689 return
1690
1691 self.to_screen(
1692 '[info] Thumbnails for %s:' % info_dict['id'])
1693 self.to_screen(render_table(
1694 ['ID', 'width', 'height', 'URL'],
1695 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 1696
360e1ca5 1697 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 1698 if not subtitles:
360e1ca5 1699 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 1700 return
a504ced0 1701 self.to_screen(
edab9dbf
JMF
1702 'Available %s for %s:' % (name, video_id))
1703 self.to_screen(render_table(
1704 ['Language', 'formats'],
1705 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1706 for lang, formats in subtitles.items()]))
a504ced0 1707
dca08720
PH
1708 def urlopen(self, req):
1709 """ Start an HTTP download """
37419b4f 1710
d05cfe06
S
1711 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1712 # always respected by websites, some tend to give out URLs with non percent-encoded
1713 # non-ASCII characters (see telemb.py, ard.py [#3412])
37419b4f 1714 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
d05cfe06
S
1715 # To work around aforementioned issue we will replace request's original URL with
1716 # percent-encoded one
8f9312c3 1717 req_is_string = isinstance(req, compat_basestring)
68b09730 1718 url = req if req_is_string else req.get_full_url()
d05cfe06 1719 url_escaped = escape_url(url)
37419b4f
S
1720
1721 # Substitute URL if any change after escaping
1722 if url != url_escaped:
68b09730 1723 if req_is_string:
37419b4f
S
1724 req = url_escaped
1725 else:
931bc3c3
S
1726 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1727 req = req_type(
37419b4f
S
1728 url_escaped, data=req.data, headers=req.headers,
1729 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1730
19a41fc6 1731 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1732
1733 def print_debug_header(self):
1734 if not self.params.get('verbose'):
1735 return
62fec3b2 1736
4192b51c
PH
1737 if type('') is not compat_str:
1738 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1739 self.report_warning(
1740 'Your Python is broken! Update to a newer and supported version')
1741
c6afed48
PH
1742 stdout_encoding = getattr(
1743 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1744 encoding_str = (
734f90bb
PH
1745 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1746 locale.getpreferredencoding(),
1747 sys.getfilesystemencoding(),
c6afed48 1748 stdout_encoding,
b0472057 1749 self.get_encoding()))
4192b51c 1750 write_string(encoding_str, encoding=None)
734f90bb
PH
1751
1752 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1753 try:
1754 sp = subprocess.Popen(
1755 ['git', 'rev-parse', '--short', 'HEAD'],
1756 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1757 cwd=os.path.dirname(os.path.abspath(__file__)))
1758 out, err = sp.communicate()
1759 out = out.decode().strip()
1760 if re.match('[0-9a-f]+', out):
734f90bb 1761 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 1762 except Exception:
dca08720
PH
1763 try:
1764 sys.exc_clear()
70a1165b 1765 except Exception:
dca08720 1766 pass
d28b5171
PH
1767 self._write_string('[debug] Python version %s - %s\n' % (
1768 platform.python_version(), platform_name()))
1769
73fac4e9 1770 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 1771 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1772 exe_str = ', '.join(
1773 '%s %s' % (exe, v)
1774 for exe, v in sorted(exe_versions.items())
1775 if v
1776 )
1777 if not exe_str:
1778 exe_str = 'none'
1779 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1780
1781 proxy_map = {}
1782 for handler in self._opener.handlers:
1783 if hasattr(handler, 'proxies'):
1784 proxy_map.update(handler.proxies)
734f90bb 1785 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1786
58b1f00d
PH
1787 if self.params.get('call_home', False):
1788 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1789 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1790 latest_version = self.urlopen(
1791 'https://yt-dl.org/latest/version').read().decode('utf-8')
1792 if version_tuple(latest_version) > version_tuple(__version__):
1793 self.report_warning(
1794 'You are using an outdated version (newest version: %s)! '
1795 'See https://yt-dl.org/update if you need help updating.' %
1796 latest_version)
1797
e344693b 1798 def _setup_opener(self):
6ad14cab 1799 timeout_val = self.params.get('socket_timeout')
19a41fc6 1800 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1801
dca08720
PH
1802 opts_cookiefile = self.params.get('cookiefile')
1803 opts_proxy = self.params.get('proxy')
1804
1805 if opts_cookiefile is None:
1806 self.cookiejar = compat_cookiejar.CookieJar()
1807 else:
1808 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1809 opts_cookiefile)
1810 if os.access(opts_cookiefile, os.R_OK):
1811 self.cookiejar.load()
1812
1813 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1814 self.cookiejar)
1815 if opts_proxy is not None:
1816 if opts_proxy == '':
1817 proxies = {}
1818 else:
1819 proxies = {'http': opts_proxy, 'https': opts_proxy}
1820 else:
1821 proxies = compat_urllib_request.getproxies()
1822 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1823 if 'http' in proxies and 'https' not in proxies:
1824 proxies['https'] = proxies['http']
91410c9b 1825 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
1826
1827 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
1828 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1829 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
dca08720 1830 opener = compat_urllib_request.build_opener(
2461f79d
PH
1831 proxy_handler, https_handler, cookie_processor, ydlh)
1832
dca08720
PH
1833 # Delete the default user-agent header, which would otherwise apply in
1834 # cases where our custom HTTP handler doesn't come into play
1835 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1836 opener.addheaders = []
1837 self._opener = opener
62fec3b2
PH
1838
1839 def encode(self, s):
1840 if isinstance(s, bytes):
1841 return s # Already encoded
1842
1843 try:
1844 return s.encode(self.get_encoding())
1845 except UnicodeEncodeError as err:
1846 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1847 raise
1848
1849 def get_encoding(self):
1850 encoding = self.params.get('encoding')
1851 if encoding is None:
1852 encoding = preferredencoding()
1853 return encoding
ec82d85a
PH
1854
1855 def _write_thumbnails(self, info_dict, filename):
1856 if self.params.get('writethumbnail', False):
1857 thumbnails = info_dict.get('thumbnails')
1858 if thumbnails:
1859 thumbnails = [thumbnails[-1]]
1860 elif self.params.get('write_all_thumbnails', False):
1861 thumbnails = info_dict.get('thumbnails')
1862 else:
1863 return
1864
1865 if not thumbnails:
1866 # No thumbnails present, so return immediately
1867 return
1868
1869 for t in thumbnails:
1870 thumb_ext = determine_ext(t['url'], 'jpg')
1871 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1872 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
82245a6d 1873 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
ec82d85a
PH
1874
1875 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1876 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1877 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1878 else:
1879 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1880 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1881 try:
1882 uf = self.urlopen(t['url'])
1883 with open(thumb_filename, 'wb') as thumbf:
1884 shutil.copyfileobj(uf, thumbf)
1885 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1886 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1887 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1888 self.report_warning('Unable to download thumbnail "%s": %s' %
1889 (t['url'], compat_str(err)))