youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import tokenize
  25 import traceback
  26
  27 from .compat import (
  28     compat_basestring,
  29     compat_cookiejar,
  30     compat_expanduser,
  31     compat_get_terminal_size,
  32     compat_http_client,
  33     compat_kwargs,
  34     compat_os_name,
  35     compat_str,
  36     compat_tokenize_tokenize,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39     compat_urllib_request_DataHandler,
  40 )
  41 from .utils import (
  42     age_restricted,
  43     args_to_str,
  44     ContentTooShortError,
  45     date_from_str,
  46     DateRange,
  47     DEFAULT_OUTTMPL,
  48     determine_ext,
  49     determine_protocol,
  50     DownloadError,
  51     encode_compat_str,
  52     encodeFilename,
  53     error_to_compat_str,
  54     ExtractorError,
  55     format_bytes,
  56     formatSeconds,
  57     locked_file,
  58     make_HTTPS_handler,
  59     MaxDownloadsReached,
  60     PagedList,
  61     parse_filesize,
  62     PerRequestProxyHandler,
  63     platform_name,
  64     PostProcessingError,
  65     preferredencoding,
  66     prepend_extension,
  67     render_table,
  68     replace_extension,
  69     SameFileError,
  70     sanitize_filename,
  71     sanitize_path,
  72     sanitize_url,
  73     sanitized_Request,
  74     std_headers,
  75     subtitles_filename,
  76     UnavailableVideoError,
  77     url_basename,
  78     version_tuple,
  79     write_json_file,
  80     write_string,
  81     YoutubeDLCookieProcessor,
  82     YoutubeDLHandler,
  83 )
  84 from .cache import Cache
  85 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  86 from .downloader import get_suitable_downloader
  87 from .downloader.rtmp import rtmpdump_version
  88 from .postprocessor import (
  89     FFmpegFixupM3u8PP,
  90     FFmpegFixupM4aPP,
  91     FFmpegFixupStretchedPP,
  92     FFmpegMergerPP,
  93     FFmpegPostProcessor,
  94     get_postprocessor,
  95 )
  96 from .version import __version__
  97
  98 if compat_os_name == 'nt':
  99     import ctypes
 100
 101
 102 class YoutubeDL(object):
 103     """YoutubeDL class.
 104
 105     YoutubeDL objects are the ones responsible of downloading the
 106     actual video file and writing it to disk if the user has requested
 107     it, among some other tasks. In most cases there should be one per
 108     program. As, given a video URL, the downloader doesn't know how to
 109     extract all the needed information, task that InfoExtractors do, it
 110     has to pass the URL to one of them.
 111
 112     For this, YoutubeDL objects have a method that allows
 113     InfoExtractors to be registered in a given order. When it is passed
 114     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 115     finds that reports being able to handle it. The InfoExtractor extracts
 116     all the information about the video or videos the URL refers to, and
 117     YoutubeDL process the extracted information, possibly using a File
 118     Downloader to download the video.
 119
 120     YoutubeDL objects accept a lot of parameters. In order not to saturate
 121     the object constructor with arguments, it receives a dictionary of
 122     options instead. These options are available through the params
 123     attribute for the InfoExtractors to use. The YoutubeDL also
 124     registers itself as the downloader in charge for the InfoExtractors
 125     that are added to it, so this is a "mutual registration".
 126
 127     Available options:
 128
 129     username:          Username for authentication purposes.
 130     password:          Password for authentication purposes.
 131     videopassword:     Password for accessing a video.
 132     usenetrc:          Use netrc for authentication instead.
 133     verbose:           Print additional info to stdout.
 134     quiet:             Do not print messages to stdout.
 135     no_warnings:       Do not print out anything for warnings.
 136     forceurl:          Force printing final URL.
 137     forcetitle:        Force printing title.
 138     forceid:           Force printing ID.
 139     forcethumbnail:    Force printing thumbnail URL.
 140     forcedescription:  Force printing description.
 141     forcefilename:     Force printing final filename.
 142     forceduration:     Force printing duration.
 143     forcejson:         Force printing info_dict as JSON.
 144     dump_single_json:  Force printing the info_dict of the whole playlist
 145                        (or video) as a single JSON line.
 146     simulate:          Do not download the video files.
 147     format:            Video format code. See options.py for more information.
 148     outtmpl:           Template for output names.
 149     restrictfilenames: Do not allow "&" and spaces in file names
 150     ignoreerrors:      Do not stop on download errors.
 151     force_generic_extractor: Force downloader to use the generic extractor
 152     nooverwrites:      Prevent overwriting files.
 153     playliststart:     Playlist item to start at.
 154     playlistend:       Playlist item to end at.
 155     playlist_items:    Specific indices of playlist to download.
 156     playlistreverse:   Download playlist items in reverse order.
 157     matchtitle:        Download only matching titles.
 158     rejecttitle:       Reject downloads for matching titles.
 159     logger:            Log messages to a logging.Logger instance.
 160     logtostderr:       Log messages to stderr instead of stdout.
 161     writedescription:  Write the video description to a .description file
 162     writeinfojson:     Write the video description to a .info.json file
 163     writeannotations:  Write the video annotations to a .annotations.xml file
 164     writethumbnail:    Write the thumbnail image to a file
 165     write_all_thumbnails:  Write all thumbnail formats to files
 166     writesubtitles:    Write the video subtitles to a file
 167     writeautomaticsub: Write the automatically generated subtitles to a file
 168     allsubtitles:      Downloads all the subtitles of the video
 169                        (requires writesubtitles or writeautomaticsub)
 170     listsubtitles:     Lists all available subtitles for the video
 171     subtitlesformat:   The format code for subtitles
 172     subtitleslangs:    List of languages of the subtitles to download
 173     keepvideo:         Keep the video file after post-processing
 174     daterange:         A DateRange object, download only if the upload_date is in the range.
 175     skip_download:     Skip the actual download of the video file
 176     cachedir:          Location of the cache files in the filesystem.
 177                        False to disable filesystem cache.
 178     noplaylist:        Download single video instead of a playlist if in doubt.
 179     age_limit:         An integer representing the user's age in years.
 180                        Unsuitable videos for the given age are skipped.
 181     min_views:         An integer representing the minimum view count the video
 182                        must have in order to not be skipped.
 183                        Videos without view count information are always
 184                        downloaded. None for no limit.
 185     max_views:         An integer representing the maximum view count.
 186                        Videos that are more popular than that are not
 187                        downloaded.
 188                        Videos without view count information are always
 189                        downloaded. None for no limit.
 190     download_archive:  File name of a file where all downloads are recorded.
 191                        Videos already present in the file are not downloaded
 192                        again.
 193     cookiefile:        File name where cookies should be read from and dumped to.
 194     nocheckcertificate:Do not verify SSL certificates
 195     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 196                        At the moment, this is only supported by YouTube.
 197     proxy:             URL of the proxy server to use
 198     cn_verification_proxy:  URL of the proxy to use for IP address verification
 199                        on Chinese sites. (Experimental)
 200     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 201     bidi_workaround:   Work around buggy terminals without bidirectional text
 202                        support, using fridibi
 203     debug_printtraffic:Print out sent and received HTTP traffic
 204     include_ads:       Download ads as well
 205     default_search:    Prepend this string if an input url is not valid.
 206                        'auto' for elaborate guessing
 207     encoding:          Use this encoding instead of the system-specified.
 208     extract_flat:      Do not resolve URLs, return the immediate result.
 209                        Pass in 'in_playlist' to only show this behavior for
 210                        playlist items.
 211     postprocessors:    A list of dictionaries, each with an entry
 212                        * key:  The name of the postprocessor. See
 213                                youtube_dl/postprocessor/__init__.py for a list.
 214                        as well as any further keyword arguments for the
 215                        postprocessor.
 216     progress_hooks:    A list of functions that get called on download
 217                        progress, with a dictionary with the entries
 218                        * status: One of "downloading", "error", or "finished".
 219                                  Check this first and ignore unknown values.
 220
 221                        If status is one of "downloading", or "finished", the
 222                        following properties may also be present:
 223                        * filename: The final filename (always present)
 224                        * tmpfilename: The filename we're currently writing to
 225                        * downloaded_bytes: Bytes on disk
 226                        * total_bytes: Size of the whole file, None if unknown
 227                        * total_bytes_estimate: Guess of the eventual file size,
 228                                                None if unavailable.
 229                        * elapsed: The number of seconds since download started.
 230                        * eta: The estimated time in seconds, None if unknown
 231                        * speed: The download speed in bytes/second, None if
 232                                 unknown
 233                        * fragment_index: The counter of the currently
 234                                          downloaded video fragment.
 235                        * fragment_count: The number of fragments (= individual
 236                                          files that will be merged)
 237
 238                        Progress hooks are guaranteed to be called at least once
 239                        (with status "finished") if the download is successful.
 240     merge_output_format: Extension to use when merging formats.
 241     fixup:             Automatically correct known faults of the file.
 242                        One of:
 243                        - "never": do nothing
 244                        - "warn": only emit a warning
 245                        - "detect_or_warn": check whether we can do anything
 246                                            about it, warn otherwise (default)
 247     source_address:    (Experimental) Client-side IP address to bind to.
 248     call_home:         Boolean, true iff we are allowed to contact the
 249                        youtube-dl servers for debugging.
 250     sleep_interval:    Number of seconds to sleep before each download.
 251     listformats:       Print an overview of available video formats and exit.
 252     list_thumbnails:   Print a table of all thumbnails and exit.
 253     match_filter:      A function that gets called with the info_dict of
 254                        every video.
 255                        If it returns a message, the video is ignored.
 256                        If it returns None, the video is downloaded.
 257                        match_filter_func in utils.py is one example for this.
 258     no_color:          Do not emit color codes in output.
 259
 260     The following options determine which downloader is picked:
 261     external_downloader: Executable of the external downloader to call.
 262                        None or unset for standard (built-in) downloader.
 263     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 264
 265     The following parameters are not used by YoutubeDL itself, they are used by
 266     the downloader (see youtube_dl/downloader/common.py):
 267     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 268     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 269     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
 270
 271     The following options are used by the post processors:
 272     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 273                        otherwise prefer avconv.
 274     postprocessor_args: A list of additional command-line arguments for the
 275                         postprocessor.
 276     """
 277
 278     params = None
 279     _ies = []
 280     _pps = []
 281     _download_retcode = None
 282     _num_downloads = None
 283     _screen_file = None
 284
 285     def __init__(self, params=None, auto_init=True):
 286         """Create a FileDownloader object with the given options."""
 287         if params is None:
 288             params = {}
 289         self._ies = []
 290         self._ies_instances = {}
 291         self._pps = []
 292         self._progress_hooks = []
 293         self._download_retcode = 0
 294         self._num_downloads = 0
 295         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 296         self._err_file = sys.stderr
 297         self.params = {
 298             # Default parameters
 299             'nocheckcertificate': False,
 300         }
 301         self.params.update(params)
 302         self.cache = Cache(self)
 303
 304         if params.get('bidi_workaround', False):
 305             try:
 306                 import pty
 307                 master, slave = pty.openpty()
 308                 width = compat_get_terminal_size().columns
 309                 if width is None:
 310                     width_args = []
 311                 else:
 312                     width_args = ['-w', str(width)]
 313                 sp_kwargs = dict(
 314                     stdin=subprocess.PIPE,
 315                     stdout=slave,
 316                     stderr=self._err_file)
 317                 try:
 318                     self._output_process = subprocess.Popen(
 319                         ['bidiv'] + width_args, **sp_kwargs
 320                     )
 321                 except OSError:
 322                     self._output_process = subprocess.Popen(
 323                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 324                 self._output_channel = os.fdopen(master, 'rb')
 325             except OSError as ose:
 326                 if ose.errno == 2:
 327                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 328                 else:
 329                     raise
 330
 331         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 332                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 333                 not params.get('restrictfilenames', False)):
 334             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 335             self.report_warning(
 336                 'Assuming --restrict-filenames since file system encoding '
 337                 'cannot encode all characters. '
 338                 'Set the LC_ALL environment variable to fix this.')
 339             self.params['restrictfilenames'] = True
 340
 341         if isinstance(params.get('outtmpl'), bytes):
 342             self.report_warning(
 343                 'Parameter outtmpl is bytes, but should be a unicode string. '
 344                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 345
 346         self._setup_opener()
 347
 348         if auto_init:
 349             self.print_debug_header()
 350             self.add_default_info_extractors()
 351
 352         for pp_def_raw in self.params.get('postprocessors', []):
 353             pp_class = get_postprocessor(pp_def_raw['key'])
 354             pp_def = dict(pp_def_raw)
 355             del pp_def['key']
 356             pp = pp_class(self, **compat_kwargs(pp_def))
 357             self.add_post_processor(pp)
 358
 359         for ph in self.params.get('progress_hooks', []):
 360             self.add_progress_hook(ph)
 361
 362     def warn_if_short_id(self, argv):
 363         # short YouTube ID starting with dash?
 364         idxs = [
 365             i for i, a in enumerate(argv)
 366             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 367         if idxs:
 368             correct_argv = (
 369                 ['youtube-dl'] +
 370                 [a for i, a in enumerate(argv) if i not in idxs] +
 371                 ['--'] + [argv[i] for i in idxs]
 372             )
 373             self.report_warning(
 374                 'Long argument string detected. '
 375                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 376                 args_to_str(correct_argv))
 377
 378     def add_info_extractor(self, ie):
 379         """Add an InfoExtractor object to the end of the list."""
 380         self._ies.append(ie)
 381         if not isinstance(ie, type):
 382             self._ies_instances[ie.ie_key()] = ie
 383             ie.set_downloader(self)
 384
 385     def get_info_extractor(self, ie_key):
 386         """
 387         Get an instance of an IE with name ie_key, it will try to get one from
 388         the _ies list, if there's no instance it will create a new one and add
 389         it to the extractor list.
 390         """
 391         ie = self._ies_instances.get(ie_key)
 392         if ie is None:
 393             ie = get_info_extractor(ie_key)()
 394             self.add_info_extractor(ie)
 395         return ie
 396
 397     def add_default_info_extractors(self):
 398         """
 399         Add the InfoExtractors returned by gen_extractors to the end of the list
 400         """
 401         for ie in gen_extractor_classes():
 402             self.add_info_extractor(ie)
 403
 404     def add_post_processor(self, pp):
 405         """Add a PostProcessor object to the end of the chain."""
 406         self._pps.append(pp)
 407         pp.set_downloader(self)
 408
 409     def add_progress_hook(self, ph):
 410         """Add the progress hook (currently only for the file downloader)"""
 411         self._progress_hooks.append(ph)
 412
 413     def _bidi_workaround(self, message):
 414         if not hasattr(self, '_output_channel'):
 415             return message
 416
 417         assert hasattr(self, '_output_process')
 418         assert isinstance(message, compat_str)
 419         line_count = message.count('\n') + 1
 420         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 421         self._output_process.stdin.flush()
 422         res = ''.join(self._output_channel.readline().decode('utf-8')
 423                       for _ in range(line_count))
 424         return res[:-len('\n')]
 425
 426     def to_screen(self, message, skip_eol=False):
 427         """Print message to stdout if not in quiet mode."""
 428         return self.to_stdout(message, skip_eol, check_quiet=True)
 429
 430     def _write_string(self, s, out=None):
 431         write_string(s, out=out, encoding=self.params.get('encoding'))
 432
 433     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 434         """Print message to stdout if not in quiet mode."""
 435         if self.params.get('logger'):
 436             self.params['logger'].debug(message)
 437         elif not check_quiet or not self.params.get('quiet', False):
 438             message = self._bidi_workaround(message)
 439             terminator = ['\n', ''][skip_eol]
 440             output = message + terminator
 441
 442             self._write_string(output, self._screen_file)
 443
 444     def to_stderr(self, message):
 445         """Print message to stderr."""
 446         assert isinstance(message, compat_str)
 447         if self.params.get('logger'):
 448             self.params['logger'].error(message)
 449         else:
 450             message = self._bidi_workaround(message)
 451             output = message + '\n'
 452             self._write_string(output, self._err_file)
 453
 454     def to_console_title(self, message):
 455         if not self.params.get('consoletitle', False):
 456             return
 457         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 458             # c_wchar_p() might not be necessary if `message` is
 459             # already of type unicode()
 460             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 461         elif 'TERM' in os.environ:
 462             self._write_string('\033]0;%s\007' % message, self._screen_file)
 463
 464     def save_console_title(self):
 465         if not self.params.get('consoletitle', False):
 466             return
 467         if 'TERM' in os.environ:
 468             # Save the title on stack
 469             self._write_string('\033[22;0t', self._screen_file)
 470
 471     def restore_console_title(self):
 472         if not self.params.get('consoletitle', False):
 473             return
 474         if 'TERM' in os.environ:
 475             # Restore the title from stack
 476             self._write_string('\033[23;0t', self._screen_file)
 477
 478     def __enter__(self):
 479         self.save_console_title()
 480         return self
 481
 482     def __exit__(self, *args):
 483         self.restore_console_title()
 484
 485         if self.params.get('cookiefile') is not None:
 486             self.cookiejar.save()
 487
 488     def trouble(self, message=None, tb=None):
 489         """Determine action to take when a download problem appears.
 490
 491         Depending on if the downloader has been configured to ignore
 492         download errors or not, this method may throw an exception or
 493         not when errors are found, after printing the message.
 494
 495         tb, if given, is additional traceback information.
 496         """
 497         if message is not None:
 498             self.to_stderr(message)
 499         if self.params.get('verbose'):
 500             if tb is None:
 501                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 502                     tb = ''
 503                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 504                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 505                     tb += encode_compat_str(traceback.format_exc())
 506                 else:
 507                     tb_data = traceback.format_list(traceback.extract_stack())
 508                     tb = ''.join(tb_data)
 509             self.to_stderr(tb)
 510         if not self.params.get('ignoreerrors', False):
 511             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 512                 exc_info = sys.exc_info()[1].exc_info
 513             else:
 514                 exc_info = sys.exc_info()
 515             raise DownloadError(message, exc_info)
 516         self._download_retcode = 1
 517
 518     def report_warning(self, message):
 519         '''
 520         Print the message to stderr, it will be prefixed with 'WARNING:'
 521         If stderr is a tty file the 'WARNING:' will be colored
 522         '''
 523         if self.params.get('logger') is not None:
 524             self.params['logger'].warning(message)
 525         else:
 526             if self.params.get('no_warnings'):
 527                 return
 528             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 529                 _msg_header = '\033[0;33mWARNING:\033[0m'
 530             else:
 531                 _msg_header = 'WARNING:'
 532             warning_message = '%s %s' % (_msg_header, message)
 533             self.to_stderr(warning_message)
 534
 535     def report_error(self, message, tb=None):
 536         '''
 537         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 538         in red if stderr is a tty file.
 539         '''
 540         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 541             _msg_header = '\033[0;31mERROR:\033[0m'
 542         else:
 543             _msg_header = 'ERROR:'
 544         error_message = '%s %s' % (_msg_header, message)
 545         self.trouble(error_message, tb)
 546
 547     def report_file_already_downloaded(self, file_name):
 548         """Report file has already been fully downloaded."""
 549         try:
 550             self.to_screen('[download] %s has already been downloaded' % file_name)
 551         except UnicodeEncodeError:
 552             self.to_screen('[download] The file has already been downloaded')
 553
 554     def prepare_filename(self, info_dict):
 555         """Generate the output filename."""
 556         try:
 557             template_dict = dict(info_dict)
 558
 559             template_dict['epoch'] = int(time.time())
 560             autonumber_size = self.params.get('autonumber_size')
 561             if autonumber_size is None:
 562                 autonumber_size = 5
 563             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 564             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 565             if template_dict.get('playlist_index') is not None:
 566                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 567             if template_dict.get('resolution') is None:
 568                 if template_dict.get('width') and template_dict.get('height'):
 569                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 570                 elif template_dict.get('height'):
 571                     template_dict['resolution'] = '%sp' % template_dict['height']
 572                 elif template_dict.get('width'):
 573                     template_dict['resolution'] = '%dx?' % template_dict['width']
 574
 575             sanitize = lambda k, v: sanitize_filename(
 576                 compat_str(v),
 577                 restricted=self.params.get('restrictfilenames'),
 578                 is_id=(k == 'id'))
 579             template_dict = dict((k, sanitize(k, v))
 580                                  for k, v in template_dict.items()
 581                                  if v is not None)
 582             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 583
 584             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 585             tmpl = compat_expanduser(outtmpl)
 586             filename = tmpl % template_dict
 587             # Temporary fix for #4787
 588             # 'Treat' all problem characters by passing filename through preferredencoding
 589             # to workaround encoding issues with subprocess on python2 @ Windows
 590             if sys.version_info < (3, 0) and sys.platform == 'win32':
 591                 filename = encodeFilename(filename, True).decode(preferredencoding())
 592             return sanitize_path(filename)
 593         except ValueError as err:
 594             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 595             return None
 596
 597     def _match_entry(self, info_dict, incomplete):
 598         """ Returns None iff the file should be downloaded """
 599
 600         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 601         if 'title' in info_dict:
 602             # This can happen when we're just evaluating the playlist
 603             title = info_dict['title']
 604             matchtitle = self.params.get('matchtitle', False)
 605             if matchtitle:
 606                 if not re.search(matchtitle, title, re.IGNORECASE):
 607                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 608             rejecttitle = self.params.get('rejecttitle', False)
 609             if rejecttitle:
 610                 if re.search(rejecttitle, title, re.IGNORECASE):
 611                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 612         date = info_dict.get('upload_date')
 613         if date is not None:
 614             dateRange = self.params.get('daterange', DateRange())
 615             if date not in dateRange:
 616                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 617         view_count = info_dict.get('view_count')
 618         if view_count is not None:
 619             min_views = self.params.get('min_views')
 620             if min_views is not None and view_count < min_views:
 621                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 622             max_views = self.params.get('max_views')
 623             if max_views is not None and view_count > max_views:
 624                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 625         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 626             return 'Skipping "%s" because it is age restricted' % video_title
 627         if self.in_download_archive(info_dict):
 628             return '%s has already been recorded in archive' % video_title
 629
 630         if not incomplete:
 631             match_filter = self.params.get('match_filter')
 632             if match_filter is not None:
 633                 ret = match_filter(info_dict)
 634                 if ret is not None:
 635                     return ret
 636
 637         return None
 638
 639     @staticmethod
 640     def add_extra_info(info_dict, extra_info):
 641         '''Set the keys from extra_info in info dict if they are missing'''
 642         for key, value in extra_info.items():
 643             info_dict.setdefault(key, value)
 644
 645     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 646                      process=True, force_generic_extractor=False):
 647         '''
 648         Returns a list with a dictionary for each video we find.
 649         If 'download', also downloads the videos.
 650         extra_info is a dict containing the extra values to add to each result
 651         '''
 652
 653         if not ie_key and force_generic_extractor:
 654             ie_key = 'Generic'
 655
 656         if ie_key:
 657             ies = [self.get_info_extractor(ie_key)]
 658         else:
 659             ies = self._ies
 660
 661         for ie in ies:
 662             if not ie.suitable(url):
 663                 continue
 664
 665             ie = self.get_info_extractor(ie.ie_key())
 666             if not ie.working():
 667                 self.report_warning('The program functionality for this site has been marked as broken, '
 668                                     'and will probably not work.')
 669
 670             try:
 671                 ie_result = ie.extract(url)
 672                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 673                     break
 674                 if isinstance(ie_result, list):
 675                     # Backwards compatibility: old IE result format
 676                     ie_result = {
 677                         '_type': 'compat_list',
 678                         'entries': ie_result,
 679                     }
 680                 self.add_default_extra_info(ie_result, ie, url)
 681                 if process:
 682                     return self.process_ie_result(ie_result, download, extra_info)
 683                 else:
 684                     return ie_result
 685             except ExtractorError as e:  # An error we somewhat expected
 686                 self.report_error(compat_str(e), e.format_traceback())
 687                 break
 688             except MaxDownloadsReached:
 689                 raise
 690             except Exception as e:
 691                 if self.params.get('ignoreerrors', False):
 692                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 693                     break
 694                 else:
 695                     raise
 696         else:
 697             self.report_error('no suitable InfoExtractor for URL %s' % url)
 698
 699     def add_default_extra_info(self, ie_result, ie, url):
 700         self.add_extra_info(ie_result, {
 701             'extractor': ie.IE_NAME,
 702             'webpage_url': url,
 703             'webpage_url_basename': url_basename(url),
 704             'extractor_key': ie.ie_key(),
 705         })
 706
 707     def process_ie_result(self, ie_result, download=True, extra_info={}):
 708         """
 709         Take the result of the ie(may be modified) and resolve all unresolved
 710         references (URLs, playlist items).
 711
 712         It will also download the videos if 'download'.
 713         Returns the resolved ie_result.
 714         """
 715         result_type = ie_result.get('_type', 'video')
 716
 717         if result_type in ('url', 'url_transparent'):
 718             extract_flat = self.params.get('extract_flat', False)
 719             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 720                     extract_flat is True):
 721                 if self.params.get('forcejson', False):
 722                     self.to_stdout(json.dumps(ie_result))
 723                 return ie_result
 724
 725         if result_type == 'video':
 726             self.add_extra_info(ie_result, extra_info)
 727             return self.process_video_result(ie_result, download=download)
 728         elif result_type == 'url':
 729             # We have to add extra_info to the results because it may be
 730             # contained in a playlist
 731             return self.extract_info(ie_result['url'],
 732                                      download,
 733                                      ie_key=ie_result.get('ie_key'),
 734                                      extra_info=extra_info)
 735         elif result_type == 'url_transparent':
 736             # Use the information from the embedding page
 737             info = self.extract_info(
 738                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 739                 extra_info=extra_info, download=False, process=False)
 740
 741             force_properties = dict(
 742                 (k, v) for k, v in ie_result.items() if v is not None)
 743             for f in ('_type', 'url', 'ie_key'):
 744                 if f in force_properties:
 745                     del force_properties[f]
 746             new_result = info.copy()
 747             new_result.update(force_properties)
 748
 749             assert new_result.get('_type') != 'url_transparent'
 750
 751             return self.process_ie_result(
 752                 new_result, download=download, extra_info=extra_info)
 753         elif result_type == 'playlist' or result_type == 'multi_video':
 754             # We process each entry in the playlist
 755             playlist = ie_result.get('title') or ie_result.get('id')
 756             self.to_screen('[download] Downloading playlist: %s' % playlist)
 757
 758             playlist_results = []
 759
 760             playliststart = self.params.get('playliststart', 1) - 1
 761             playlistend = self.params.get('playlistend')
 762             # For backwards compatibility, interpret -1 as whole list
 763             if playlistend == -1:
 764                 playlistend = None
 765
 766             playlistitems_str = self.params.get('playlist_items')
 767             playlistitems = None
 768             if playlistitems_str is not None:
 769                 def iter_playlistitems(format):
 770                     for string_segment in format.split(','):
 771                         if '-' in string_segment:
 772                             start, end = string_segment.split('-')
 773                             for item in range(int(start), int(end) + 1):
 774                                 yield int(item)
 775                         else:
 776                             yield int(string_segment)
 777                 playlistitems = iter_playlistitems(playlistitems_str)
 778
 779             ie_entries = ie_result['entries']
 780             if isinstance(ie_entries, list):
 781                 n_all_entries = len(ie_entries)
 782                 if playlistitems:
 783                     entries = [
 784                         ie_entries[i - 1] for i in playlistitems
 785                         if -n_all_entries <= i - 1 < n_all_entries]
 786                 else:
 787                     entries = ie_entries[playliststart:playlistend]
 788                 n_entries = len(entries)
 789                 self.to_screen(
 790                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 791                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 792             elif isinstance(ie_entries, PagedList):
 793                 if playlistitems:
 794                     entries = []
 795                     for item in playlistitems:
 796                         entries.extend(ie_entries.getslice(
 797                             item - 1, item
 798                         ))
 799                 else:
 800                     entries = ie_entries.getslice(
 801                         playliststart, playlistend)
 802                 n_entries = len(entries)
 803                 self.to_screen(
 804                     '[%s] playlist %s: Downloading %d videos' %
 805                     (ie_result['extractor'], playlist, n_entries))
 806             else:  # iterable
 807                 if playlistitems:
 808                     entry_list = list(ie_entries)
 809                     entries = [entry_list[i - 1] for i in playlistitems]
 810                 else:
 811                     entries = list(itertools.islice(
 812                         ie_entries, playliststart, playlistend))
 813                 n_entries = len(entries)
 814                 self.to_screen(
 815                     '[%s] playlist %s: Downloading %d videos' %
 816                     (ie_result['extractor'], playlist, n_entries))
 817
 818             if self.params.get('playlistreverse', False):
 819                 entries = entries[::-1]
 820
 821             for i, entry in enumerate(entries, 1):
 822                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 823                 extra = {
 824                     'n_entries': n_entries,
 825                     'playlist': playlist,
 826                     'playlist_id': ie_result.get('id'),
 827                     'playlist_title': ie_result.get('title'),
 828                     'playlist_index': i + playliststart,
 829                     'extractor': ie_result['extractor'],
 830                     'webpage_url': ie_result['webpage_url'],
 831                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 832                     'extractor_key': ie_result['extractor_key'],
 833                 }
 834
 835                 reason = self._match_entry(entry, incomplete=True)
 836                 if reason is not None:
 837                     self.to_screen('[download] ' + reason)
 838                     continue
 839
 840                 entry_result = self.process_ie_result(entry,
 841                                                       download=download,
 842                                                       extra_info=extra)
 843                 playlist_results.append(entry_result)
 844             ie_result['entries'] = playlist_results
 845             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
 846             return ie_result
 847         elif result_type == 'compat_list':
 848             self.report_warning(
 849                 'Extractor %s returned a compat_list result. '
 850                 'It needs to be updated.' % ie_result.get('extractor'))
 851
 852             def _fixup(r):
 853                 self.add_extra_info(
 854                     r,
 855                     {
 856                         'extractor': ie_result['extractor'],
 857                         'webpage_url': ie_result['webpage_url'],
 858                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 859                         'extractor_key': ie_result['extractor_key'],
 860                     }
 861                 )
 862                 return r
 863             ie_result['entries'] = [
 864                 self.process_ie_result(_fixup(r), download, extra_info)
 865                 for r in ie_result['entries']
 866             ]
 867             return ie_result
 868         else:
 869             raise Exception('Invalid result type: %s' % result_type)
 870
 871     def _build_format_filter(self, filter_spec):
 872         " Returns a function to filter the formats according to the filter_spec "
 873
 874         OPERATORS = {
 875             '<': operator.lt,
 876             '<=': operator.le,
 877             '>': operator.gt,
 878             '>=': operator.ge,
 879             '=': operator.eq,
 880             '!=': operator.ne,
 881         }
 882         operator_rex = re.compile(r'''(?x)\s*
 883             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 884             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 885             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 886             $
 887             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 888         m = operator_rex.search(filter_spec)
 889         if m:
 890             try:
 891                 comparison_value = int(m.group('value'))
 892             except ValueError:
 893                 comparison_value = parse_filesize(m.group('value'))
 894                 if comparison_value is None:
 895                     comparison_value = parse_filesize(m.group('value') + 'B')
 896                 if comparison_value is None:
 897                     raise ValueError(
 898                         'Invalid value %r in format specification %r' % (
 899                             m.group('value'), filter_spec))
 900             op = OPERATORS[m.group('op')]
 901
 902         if not m:
 903             STR_OPERATORS = {
 904                 '=': operator.eq,
 905                 '!=': operator.ne,
 906                 '^=': lambda attr, value: attr.startswith(value),
 907                 '$=': lambda attr, value: attr.endswith(value),
 908                 '*=': lambda attr, value: value in attr,
 909             }
 910             str_operator_rex = re.compile(r'''(?x)
 911                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
 912                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 913                 \s*(?P<value>[a-zA-Z0-9._-]+)
 914                 \s*$
 915                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 916             m = str_operator_rex.search(filter_spec)
 917             if m:
 918                 comparison_value = m.group('value')
 919                 op = STR_OPERATORS[m.group('op')]
 920
 921         if not m:
 922             raise ValueError('Invalid filter specification %r' % filter_spec)
 923
 924         def _filter(f):
 925             actual_value = f.get(m.group('key'))
 926             if actual_value is None:
 927                 return m.group('none_inclusive')
 928             return op(actual_value, comparison_value)
 929         return _filter
 930
 931     def build_format_selector(self, format_spec):
 932         def syntax_error(note, start):
 933             message = (
 934                 'Invalid format specification: '
 935                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
 936             return SyntaxError(message)
 937
 938         PICKFIRST = 'PICKFIRST'
 939         MERGE = 'MERGE'
 940         SINGLE = 'SINGLE'
 941         GROUP = 'GROUP'
 942         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 943
 944         def _parse_filter(tokens):
 945             filter_parts = []
 946             for type, string, start, _, _ in tokens:
 947                 if type == tokenize.OP and string == ']':
 948                     return ''.join(filter_parts)
 949                 else:
 950                     filter_parts.append(string)
 951
 952         def _remove_unused_ops(tokens):
 953             # Remove operators that we don't use and join them with the surrounding strings
 954             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
 955             ALLOWED_OPS = ('/', '+', ',', '(', ')')
 956             last_string, last_start, last_end, last_line = None, None, None, None
 957             for type, string, start, end, line in tokens:
 958                 if type == tokenize.OP and string == '[':
 959                     if last_string:
 960                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 961                         last_string = None
 962                     yield type, string, start, end, line
 963                     # everything inside brackets will be handled by _parse_filter
 964                     for type, string, start, end, line in tokens:
 965                         yield type, string, start, end, line
 966                         if type == tokenize.OP and string == ']':
 967                             break
 968                 elif type == tokenize.OP and string in ALLOWED_OPS:
 969                     if last_string:
 970                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 971                         last_string = None
 972                     yield type, string, start, end, line
 973                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
 974                     if not last_string:
 975                         last_string = string
 976                         last_start = start
 977                         last_end = end
 978                     else:
 979                         last_string += string
 980             if last_string:
 981                 yield tokenize.NAME, last_string, last_start, last_end, last_line
 982
 983         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
 984             selectors = []
 985             current_selector = None
 986             for type, string, start, _, _ in tokens:
 987                 # ENCODING is only defined in python 3.x
 988                 if type == getattr(tokenize, 'ENCODING', None):
 989                     continue
 990                 elif type in [tokenize.NAME, tokenize.NUMBER]:
 991                     current_selector = FormatSelector(SINGLE, string, [])
 992                 elif type == tokenize.OP:
 993                     if string == ')':
 994                         if not inside_group:
 995                             # ')' will be handled by the parentheses group
 996                             tokens.restore_last_token()
 997                         break
 998                     elif inside_merge and string in ['/', ',']:
 999                         tokens.restore_last_token()
1000                         break
1001                     elif inside_choice and string == ',':
1002                         tokens.restore_last_token()
1003                         break
1004                     elif string == ',':
1005                         if not current_selector:
1006                             raise syntax_error('"," must follow a format selector', start)
1007                         selectors.append(current_selector)
1008                         current_selector = None
1009                     elif string == '/':
1010                         if not current_selector:
1011                             raise syntax_error('"/" must follow a format selector', start)
1012                         first_choice = current_selector
1013                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1014                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1015                     elif string == '[':
1016                         if not current_selector:
1017                             current_selector = FormatSelector(SINGLE, 'best', [])
1018                         format_filter = _parse_filter(tokens)
1019                         current_selector.filters.append(format_filter)
1020                     elif string == '(':
1021                         if current_selector:
1022                             raise syntax_error('Unexpected "("', start)
1023                         group = _parse_format_selection(tokens, inside_group=True)
1024                         current_selector = FormatSelector(GROUP, group, [])
1025                     elif string == '+':
1026                         video_selector = current_selector
1027                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1028                         if not video_selector or not audio_selector:
1029                             raise syntax_error('"+" must be between two format selectors', start)
1030                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1031                     else:
1032                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1033                 elif type == tokenize.ENDMARKER:
1034                     break
1035             if current_selector:
1036                 selectors.append(current_selector)
1037             return selectors
1038
1039         def _build_selector_function(selector):
1040             if isinstance(selector, list):
1041                 fs = [_build_selector_function(s) for s in selector]
1042
1043                 def selector_function(formats):
1044                     for f in fs:
1045                         for format in f(formats):
1046                             yield format
1047                 return selector_function
1048             elif selector.type == GROUP:
1049                 selector_function = _build_selector_function(selector.selector)
1050             elif selector.type == PICKFIRST:
1051                 fs = [_build_selector_function(s) for s in selector.selector]
1052
1053                 def selector_function(formats):
1054                     for f in fs:
1055                         picked_formats = list(f(formats))
1056                         if picked_formats:
1057                             return picked_formats
1058                     return []
1059             elif selector.type == SINGLE:
1060                 format_spec = selector.selector
1061
1062                 def selector_function(formats):
1063                     formats = list(formats)
1064                     if not formats:
1065                         return
1066                     if format_spec == 'all':
1067                         for f in formats:
1068                             yield f
1069                     elif format_spec in ['best', 'worst', None]:
1070                         format_idx = 0 if format_spec == 'worst' else -1
1071                         audiovideo_formats = [
1072                             f for f in formats
1073                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1074                         if audiovideo_formats:
1075                             yield audiovideo_formats[format_idx]
1076                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1077                         elif (all(f.get('acodec') != 'none' for f in formats) or
1078                               all(f.get('vcodec') != 'none' for f in formats)):
1079                             yield formats[format_idx]
1080                     elif format_spec == 'bestaudio':
1081                         audio_formats = [
1082                             f for f in formats
1083                             if f.get('vcodec') == 'none']
1084                         if audio_formats:
1085                             yield audio_formats[-1]
1086                     elif format_spec == 'worstaudio':
1087                         audio_formats = [
1088                             f for f in formats
1089                             if f.get('vcodec') == 'none']
1090                         if audio_formats:
1091                             yield audio_formats[0]
1092                     elif format_spec == 'bestvideo':
1093                         video_formats = [
1094                             f for f in formats
1095                             if f.get('acodec') == 'none']
1096                         if video_formats:
1097                             yield video_formats[-1]
1098                     elif format_spec == 'worstvideo':
1099                         video_formats = [
1100                             f for f in formats
1101                             if f.get('acodec') == 'none']
1102                         if video_formats:
1103                             yield video_formats[0]
1104                     else:
1105                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1106                         if format_spec in extensions:
1107                             filter_f = lambda f: f['ext'] == format_spec
1108                         else:
1109                             filter_f = lambda f: f['format_id'] == format_spec
1110                         matches = list(filter(filter_f, formats))
1111                         if matches:
1112                             yield matches[-1]
1113             elif selector.type == MERGE:
1114                 def _merge(formats_info):
1115                     format_1, format_2 = [f['format_id'] for f in formats_info]
1116                     # The first format must contain the video and the
1117                     # second the audio
1118                     if formats_info[0].get('vcodec') == 'none':
1119                         self.report_error('The first format must '
1120                                           'contain the video, try using '
1121                                           '"-f %s+%s"' % (format_2, format_1))
1122                         return
1123                     # Formats must be opposite (video+audio)
1124                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1125                         self.report_error(
1126                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1127                             % (format_1, format_2))
1128                         return
1129                     output_ext = (
1130                         formats_info[0]['ext']
1131                         if self.params.get('merge_output_format') is None
1132                         else self.params['merge_output_format'])
1133                     return {
1134                         'requested_formats': formats_info,
1135                         'format': '%s+%s' % (formats_info[0].get('format'),
1136                                              formats_info[1].get('format')),
1137                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1138                                                 formats_info[1].get('format_id')),
1139                         'width': formats_info[0].get('width'),
1140                         'height': formats_info[0].get('height'),
1141                         'resolution': formats_info[0].get('resolution'),
1142                         'fps': formats_info[0].get('fps'),
1143                         'vcodec': formats_info[0].get('vcodec'),
1144                         'vbr': formats_info[0].get('vbr'),
1145                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1146                         'acodec': formats_info[1].get('acodec'),
1147                         'abr': formats_info[1].get('abr'),
1148                         'ext': output_ext,
1149                     }
1150                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1151
1152                 def selector_function(formats):
1153                     formats = list(formats)
1154                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1155                         yield _merge(pair)
1156
1157             filters = [self._build_format_filter(f) for f in selector.filters]
1158
1159             def final_selector(formats):
1160                 for _filter in filters:
1161                     formats = list(filter(_filter, formats))
1162                 return selector_function(formats)
1163             return final_selector
1164
1165         stream = io.BytesIO(format_spec.encode('utf-8'))
1166         try:
1167             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1168         except tokenize.TokenError:
1169             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1170
1171         class TokenIterator(object):
1172             def __init__(self, tokens):
1173                 self.tokens = tokens
1174                 self.counter = 0
1175
1176             def __iter__(self):
1177                 return self
1178
1179             def __next__(self):
1180                 if self.counter >= len(self.tokens):
1181                     raise StopIteration()
1182                 value = self.tokens[self.counter]
1183                 self.counter += 1
1184                 return value
1185
1186             next = __next__
1187
1188             def restore_last_token(self):
1189                 self.counter -= 1
1190
1191         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1192         return _build_selector_function(parsed_selector)
1193
1194     def _calc_headers(self, info_dict):
1195         res = std_headers.copy()
1196
1197         add_headers = info_dict.get('http_headers')
1198         if add_headers:
1199             res.update(add_headers)
1200
1201         cookies = self._calc_cookies(info_dict)
1202         if cookies:
1203             res['Cookie'] = cookies
1204
1205         return res
1206
1207     def _calc_cookies(self, info_dict):
1208         pr = sanitized_Request(info_dict['url'])
1209         self.cookiejar.add_cookie_header(pr)
1210         return pr.get_header('Cookie')
1211
1212     def process_video_result(self, info_dict, download=True):
1213         assert info_dict.get('_type', 'video') == 'video'
1214
1215         if 'id' not in info_dict:
1216             raise ExtractorError('Missing "id" field in extractor result')
1217         if 'title' not in info_dict:
1218             raise ExtractorError('Missing "title" field in extractor result')
1219
1220         if 'playlist' not in info_dict:
1221             # It isn't part of a playlist
1222             info_dict['playlist'] = None
1223             info_dict['playlist_index'] = None
1224
1225         thumbnails = info_dict.get('thumbnails')
1226         if thumbnails is None:
1227             thumbnail = info_dict.get('thumbnail')
1228             if thumbnail:
1229                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1230         if thumbnails:
1231             thumbnails.sort(key=lambda t: (
1232                 t.get('preference'), t.get('width'), t.get('height'),
1233                 t.get('id'), t.get('url')))
1234             for i, t in enumerate(thumbnails):
1235                 t['url'] = sanitize_url(t['url'])
1236                 if t.get('width') and t.get('height'):
1237                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1238                 if t.get('id') is None:
1239                     t['id'] = '%d' % i
1240
1241         if self.params.get('list_thumbnails'):
1242             self.list_thumbnails(info_dict)
1243             return
1244
1245         thumbnail = info_dict.get('thumbnail')
1246         if thumbnail:
1247             info_dict['thumbnail'] = sanitize_url(thumbnail)
1248         elif thumbnails:
1249             info_dict['thumbnail'] = thumbnails[-1]['url']
1250
1251         if 'display_id' not in info_dict and 'id' in info_dict:
1252             info_dict['display_id'] = info_dict['id']
1253
1254         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1255             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1256             # see http://bugs.python.org/issue1646728)
1257             try:
1258                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1259                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1260             except (ValueError, OverflowError, OSError):
1261                 pass
1262
1263         # Auto generate title fields corresponding to the *_number fields when missing
1264         # in order to always have clean titles. This is very common for TV series.
1265         for field in ('chapter', 'season', 'episode'):
1266             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1267                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1268
1269         subtitles = info_dict.get('subtitles')
1270         if subtitles:
1271             for _, subtitle in subtitles.items():
1272                 for subtitle_format in subtitle:
1273                     if subtitle_format.get('url'):
1274                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1275                     if 'ext' not in subtitle_format:
1276                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1277
1278         if self.params.get('listsubtitles', False):
1279             if 'automatic_captions' in info_dict:
1280                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1281             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1282             return
1283         info_dict['requested_subtitles'] = self.process_subtitles(
1284             info_dict['id'], subtitles,
1285             info_dict.get('automatic_captions'))
1286
1287         # We now pick which formats have to be downloaded
1288         if info_dict.get('formats') is None:
1289             # There's only one format available
1290             formats = [info_dict]
1291         else:
1292             formats = info_dict['formats']
1293
1294         if not formats:
1295             raise ExtractorError('No video formats found!')
1296
1297         formats_dict = {}
1298
1299         # We check that all the formats have the format and format_id fields
1300         for i, format in enumerate(formats):
1301             if 'url' not in format:
1302                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1303
1304             format['url'] = sanitize_url(format['url'])
1305
1306             if format.get('format_id') is None:
1307                 format['format_id'] = compat_str(i)
1308             else:
1309                 # Sanitize format_id from characters used in format selector expression
1310                 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1311             format_id = format['format_id']
1312             if format_id not in formats_dict:
1313                 formats_dict[format_id] = []
1314             formats_dict[format_id].append(format)
1315
1316         # Make sure all formats have unique format_id
1317         for format_id, ambiguous_formats in formats_dict.items():
1318             if len(ambiguous_formats) > 1:
1319                 for i, format in enumerate(ambiguous_formats):
1320                     format['format_id'] = '%s-%d' % (format_id, i)
1321
1322         for i, format in enumerate(formats):
1323             if format.get('format') is None:
1324                 format['format'] = '{id} - {res}{note}'.format(
1325                     id=format['format_id'],
1326                     res=self.format_resolution(format),
1327                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1328                 )
1329             # Automatically determine file extension if missing
1330             if 'ext' not in format:
1331                 format['ext'] = determine_ext(format['url']).lower()
1332             # Automatically determine protocol if missing (useful for format
1333             # selection purposes)
1334             if 'protocol' not in format:
1335                 format['protocol'] = determine_protocol(format)
1336             # Add HTTP headers, so that external programs can use them from the
1337             # json output
1338             full_format_info = info_dict.copy()
1339             full_format_info.update(format)
1340             format['http_headers'] = self._calc_headers(full_format_info)
1341
1342         # TODO Central sorting goes here
1343
1344         if formats[0] is not info_dict:
1345             # only set the 'formats' fields if the original info_dict list them
1346             # otherwise we end up with a circular reference, the first (and unique)
1347             # element in the 'formats' field in info_dict is info_dict itself,
1348             # which can't be exported to json
1349             info_dict['formats'] = formats
1350         if self.params.get('listformats'):
1351             self.list_formats(info_dict)
1352             return
1353
1354         req_format = self.params.get('format')
1355         if req_format is None:
1356             req_format_list = []
1357             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1358                     not info_dict.get('is_live')):
1359                 merger = FFmpegMergerPP(self)
1360                 if merger.available and merger.can_merge():
1361                     req_format_list.append('bestvideo+bestaudio')
1362             req_format_list.append('best')
1363             req_format = '/'.join(req_format_list)
1364         format_selector = self.build_format_selector(req_format)
1365         formats_to_download = list(format_selector(formats))
1366         if not formats_to_download:
1367             raise ExtractorError('requested format not available',
1368                                  expected=True)
1369
1370         if download:
1371             if len(formats_to_download) > 1:
1372                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1373             for format in formats_to_download:
1374                 new_info = dict(info_dict)
1375                 new_info.update(format)
1376                 self.process_info(new_info)
1377         # We update the info dict with the best quality format (backwards compatibility)
1378         info_dict.update(formats_to_download[-1])
1379         return info_dict
1380
1381     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1382         """Select the requested subtitles and their format"""
1383         available_subs = {}
1384         if normal_subtitles and self.params.get('writesubtitles'):
1385             available_subs.update(normal_subtitles)
1386         if automatic_captions and self.params.get('writeautomaticsub'):
1387             for lang, cap_info in automatic_captions.items():
1388                 if lang not in available_subs:
1389                     available_subs[lang] = cap_info
1390
1391         if (not self.params.get('writesubtitles') and not
1392                 self.params.get('writeautomaticsub') or not
1393                 available_subs):
1394             return None
1395
1396         if self.params.get('allsubtitles', False):
1397             requested_langs = available_subs.keys()
1398         else:
1399             if self.params.get('subtitleslangs', False):
1400                 requested_langs = self.params.get('subtitleslangs')
1401             elif 'en' in available_subs:
1402                 requested_langs = ['en']
1403             else:
1404                 requested_langs = [list(available_subs.keys())[0]]
1405
1406         formats_query = self.params.get('subtitlesformat', 'best')
1407         formats_preference = formats_query.split('/') if formats_query else []
1408         subs = {}
1409         for lang in requested_langs:
1410             formats = available_subs.get(lang)
1411             if formats is None:
1412                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1413                 continue
1414             for ext in formats_preference:
1415                 if ext == 'best':
1416                     f = formats[-1]
1417                     break
1418                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1419                 if matches:
1420                     f = matches[-1]
1421                     break
1422             else:
1423                 f = formats[-1]
1424                 self.report_warning(
1425                     'No subtitle format found matching "%s" for language %s, '
1426                     'using %s' % (formats_query, lang, f['ext']))
1427             subs[lang] = f
1428         return subs
1429
1430     def process_info(self, info_dict):
1431         """Process a single resolved IE result."""
1432
1433         assert info_dict.get('_type', 'video') == 'video'
1434
1435         max_downloads = self.params.get('max_downloads')
1436         if max_downloads is not None:
1437             if self._num_downloads >= int(max_downloads):
1438                 raise MaxDownloadsReached()
1439
1440         info_dict['fulltitle'] = info_dict['title']
1441         if len(info_dict['title']) > 200:
1442             info_dict['title'] = info_dict['title'][:197] + '...'
1443
1444         if 'format' not in info_dict:
1445             info_dict['format'] = info_dict['ext']
1446
1447         reason = self._match_entry(info_dict, incomplete=False)
1448         if reason is not None:
1449             self.to_screen('[download] ' + reason)
1450             return
1451
1452         self._num_downloads += 1
1453
1454         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1455
1456         # Forced printings
1457         if self.params.get('forcetitle', False):
1458             self.to_stdout(info_dict['fulltitle'])
1459         if self.params.get('forceid', False):
1460             self.to_stdout(info_dict['id'])
1461         if self.params.get('forceurl', False):
1462             if info_dict.get('requested_formats') is not None:
1463                 for f in info_dict['requested_formats']:
1464                     self.to_stdout(f['url'] + f.get('play_path', ''))
1465             else:
1466                 # For RTMP URLs, also include the playpath
1467                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1468         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1469             self.to_stdout(info_dict['thumbnail'])
1470         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1471             self.to_stdout(info_dict['description'])
1472         if self.params.get('forcefilename', False) and filename is not None:
1473             self.to_stdout(filename)
1474         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1475             self.to_stdout(formatSeconds(info_dict['duration']))
1476         if self.params.get('forceformat', False):
1477             self.to_stdout(info_dict['format'])
1478         if self.params.get('forcejson', False):
1479             self.to_stdout(json.dumps(info_dict))
1480
1481         # Do nothing else if in simulate mode
1482         if self.params.get('simulate', False):
1483             return
1484
1485         if filename is None:
1486             return
1487
1488         try:
1489             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1490             if dn and not os.path.exists(dn):
1491                 os.makedirs(dn)
1492         except (OSError, IOError) as err:
1493             self.report_error('unable to create directory ' + error_to_compat_str(err))
1494             return
1495
1496         if self.params.get('writedescription', False):
1497             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1498             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1499                 self.to_screen('[info] Video description is already present')
1500             elif info_dict.get('description') is None:
1501                 self.report_warning('There\'s no description to write.')
1502             else:
1503                 try:
1504                     self.to_screen('[info] Writing video description to: ' + descfn)
1505                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1506                         descfile.write(info_dict['description'])
1507                 except (OSError, IOError):
1508                     self.report_error('Cannot write description file ' + descfn)
1509                     return
1510
1511         if self.params.get('writeannotations', False):
1512             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1513             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1514                 self.to_screen('[info] Video annotations are already present')
1515             else:
1516                 try:
1517                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1518                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1519                         annofile.write(info_dict['annotations'])
1520                 except (KeyError, TypeError):
1521                     self.report_warning('There are no annotations to write.')
1522                 except (OSError, IOError):
1523                     self.report_error('Cannot write annotations file: ' + annofn)
1524                     return
1525
1526         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1527                                        self.params.get('writeautomaticsub')])
1528
1529         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1530             # subtitles download errors are already managed as troubles in relevant IE
1531             # that way it will silently go on when used with unsupporting IE
1532             subtitles = info_dict['requested_subtitles']
1533             ie = self.get_info_extractor(info_dict['extractor_key'])
1534             for sub_lang, sub_info in subtitles.items():
1535                 sub_format = sub_info['ext']
1536                 if sub_info.get('data') is not None:
1537                     sub_data = sub_info['data']
1538                 else:
1539                     try:
1540                         sub_data = ie._download_webpage(
1541                             sub_info['url'], info_dict['id'], note=False)
1542                     except ExtractorError as err:
1543                         self.report_warning('Unable to download subtitle for "%s": %s' %
1544                                             (sub_lang, error_to_compat_str(err.cause)))
1545                         continue
1546                 try:
1547                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1548                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1549                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1550                     else:
1551                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1552                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1553                             subfile.write(sub_data)
1554                 except (OSError, IOError):
1555                     self.report_error('Cannot write subtitles file ' + sub_filename)
1556                     return
1557
1558         if self.params.get('writeinfojson', False):
1559             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1560             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1561                 self.to_screen('[info] Video description metadata is already present')
1562             else:
1563                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1564                 try:
1565                     write_json_file(self.filter_requested_info(info_dict), infofn)
1566                 except (OSError, IOError):
1567                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1568                     return
1569
1570         self._write_thumbnails(info_dict, filename)
1571
1572         if not self.params.get('skip_download', False):
1573             try:
1574                 def dl(name, info):
1575                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1576                     for ph in self._progress_hooks:
1577                         fd.add_progress_hook(ph)
1578                     if self.params.get('verbose'):
1579                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1580                     return fd.download(name, info)
1581
1582                 if info_dict.get('requested_formats') is not None:
1583                     downloaded = []
1584                     success = True
1585                     merger = FFmpegMergerPP(self)
1586                     if not merger.available:
1587                         postprocessors = []
1588                         self.report_warning('You have requested multiple '
1589                                             'formats but ffmpeg or avconv are not installed.'
1590                                             ' The formats won\'t be merged.')
1591                     else:
1592                         postprocessors = [merger]
1593
1594                     def compatible_formats(formats):
1595                         video, audio = formats
1596                         # Check extension
1597                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1598                         if video_ext and audio_ext:
1599                             COMPATIBLE_EXTS = (
1600                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1601                                 ('webm')
1602                             )
1603                             for exts in COMPATIBLE_EXTS:
1604                                 if video_ext in exts and audio_ext in exts:
1605                                     return True
1606                         # TODO: Check acodec/vcodec
1607                         return False
1608
1609                     filename_real_ext = os.path.splitext(filename)[1][1:]
1610                     filename_wo_ext = (
1611                         os.path.splitext(filename)[0]
1612                         if filename_real_ext == info_dict['ext']
1613                         else filename)
1614                     requested_formats = info_dict['requested_formats']
1615                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1616                         info_dict['ext'] = 'mkv'
1617                         self.report_warning(
1618                             'Requested formats are incompatible for merge and will be merged into mkv.')
1619                     # Ensure filename always has a correct extension for successful merge
1620                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1621                     if os.path.exists(encodeFilename(filename)):
1622                         self.to_screen(
1623                             '[download] %s has already been downloaded and '
1624                             'merged' % filename)
1625                     else:
1626                         for f in requested_formats:
1627                             new_info = dict(info_dict)
1628                             new_info.update(f)
1629                             fname = self.prepare_filename(new_info)
1630                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1631                             downloaded.append(fname)
1632                             partial_success = dl(fname, new_info)
1633                             success = success and partial_success
1634                         info_dict['__postprocessors'] = postprocessors
1635                         info_dict['__files_to_merge'] = downloaded
1636                 else:
1637                     # Just a single file
1638                     success = dl(filename, info_dict)
1639             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1640                 self.report_error('unable to download video data: %s' % str(err))
1641                 return
1642             except (OSError, IOError) as err:
1643                 raise UnavailableVideoError(err)
1644             except (ContentTooShortError, ) as err:
1645                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1646                 return
1647
1648             if success and filename != '-':
1649                 # Fixup content
1650                 fixup_policy = self.params.get('fixup')
1651                 if fixup_policy is None:
1652                     fixup_policy = 'detect_or_warn'
1653
1654                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1655
1656                 stretched_ratio = info_dict.get('stretched_ratio')
1657                 if stretched_ratio is not None and stretched_ratio != 1:
1658                     if fixup_policy == 'warn':
1659                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1660                             info_dict['id'], stretched_ratio))
1661                     elif fixup_policy == 'detect_or_warn':
1662                         stretched_pp = FFmpegFixupStretchedPP(self)
1663                         if stretched_pp.available:
1664                             info_dict.setdefault('__postprocessors', [])
1665                             info_dict['__postprocessors'].append(stretched_pp)
1666                         else:
1667                             self.report_warning(
1668                                 '%s: Non-uniform pixel ratio (%s). %s'
1669                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1670                     else:
1671                         assert fixup_policy in ('ignore', 'never')
1672
1673                 if (info_dict.get('requested_formats') is None and
1674                         info_dict.get('container') == 'm4a_dash'):
1675                     if fixup_policy == 'warn':
1676                         self.report_warning(
1677                             '%s: writing DASH m4a. '
1678                             'Only some players support this container.'
1679                             % info_dict['id'])
1680                     elif fixup_policy == 'detect_or_warn':
1681                         fixup_pp = FFmpegFixupM4aPP(self)
1682                         if fixup_pp.available:
1683                             info_dict.setdefault('__postprocessors', [])
1684                             info_dict['__postprocessors'].append(fixup_pp)
1685                         else:
1686                             self.report_warning(
1687                                 '%s: writing DASH m4a. '
1688                                 'Only some players support this container. %s'
1689                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1690                     else:
1691                         assert fixup_policy in ('ignore', 'never')
1692
1693                 if (info_dict.get('protocol') == 'm3u8_native' or
1694                         info_dict.get('protocol') == 'm3u8' and
1695                         self.params.get('hls_prefer_native')):
1696                     if fixup_policy == 'warn':
1697                         self.report_warning('%s: malformated aac bitstream.' % (
1698                             info_dict['id']))
1699                     elif fixup_policy == 'detect_or_warn':
1700                         fixup_pp = FFmpegFixupM3u8PP(self)
1701                         if fixup_pp.available:
1702                             info_dict.setdefault('__postprocessors', [])
1703                             info_dict['__postprocessors'].append(fixup_pp)
1704                         else:
1705                             self.report_warning(
1706                                 '%s: malformated aac bitstream. %s'
1707                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1708                     else:
1709                         assert fixup_policy in ('ignore', 'never')
1710
1711                 try:
1712                     self.post_process(filename, info_dict)
1713                 except (PostProcessingError) as err:
1714                     self.report_error('postprocessing: %s' % str(err))
1715                     return
1716                 self.record_download_archive(info_dict)
1717
1718     def download(self, url_list):
1719         """Download a given list of URLs."""
1720         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1721         if (len(url_list) > 1 and
1722                 '%' not in outtmpl and
1723                 self.params.get('max_downloads') != 1):
1724             raise SameFileError(outtmpl)
1725
1726         for url in url_list:
1727             try:
1728                 # It also downloads the videos
1729                 res = self.extract_info(
1730                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1731             except UnavailableVideoError:
1732                 self.report_error('unable to download video')
1733             except MaxDownloadsReached:
1734                 self.to_screen('[info] Maximum number of downloaded files reached.')
1735                 raise
1736             else:
1737                 if self.params.get('dump_single_json', False):
1738                     self.to_stdout(json.dumps(res))
1739
1740         return self._download_retcode
1741
1742     def download_with_info_file(self, info_filename):
1743         with contextlib.closing(fileinput.FileInput(
1744                 [info_filename], mode='r',
1745                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1746             # FileInput doesn't have a read method, we can't call json.load
1747             info = self.filter_requested_info(json.loads('\n'.join(f)))
1748         try:
1749             self.process_ie_result(info, download=True)
1750         except DownloadError:
1751             webpage_url = info.get('webpage_url')
1752             if webpage_url is not None:
1753                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1754                 return self.download([webpage_url])
1755             else:
1756                 raise
1757         return self._download_retcode
1758
1759     @staticmethod
1760     def filter_requested_info(info_dict):
1761         return dict(
1762             (k, v) for k, v in info_dict.items()
1763             if k not in ['requested_formats', 'requested_subtitles'])
1764
1765     def post_process(self, filename, ie_info):
1766         """Run all the postprocessors on the given file."""
1767         info = dict(ie_info)
1768         info['filepath'] = filename
1769         pps_chain = []
1770         if ie_info.get('__postprocessors') is not None:
1771             pps_chain.extend(ie_info['__postprocessors'])
1772         pps_chain.extend(self._pps)
1773         for pp in pps_chain:
1774             files_to_delete = []
1775             try:
1776                 files_to_delete, info = pp.run(info)
1777             except PostProcessingError as e:
1778                 self.report_error(e.msg)
1779             if files_to_delete and not self.params.get('keepvideo', False):
1780                 for old_filename in files_to_delete:
1781                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1782                     try:
1783                         os.remove(encodeFilename(old_filename))
1784                     except (IOError, OSError):
1785                         self.report_warning('Unable to remove downloaded original file')
1786
1787     def _make_archive_id(self, info_dict):
1788         # Future-proof against any change in case
1789         # and backwards compatibility with prior versions
1790         extractor = info_dict.get('extractor_key')
1791         if extractor is None:
1792             if 'id' in info_dict:
1793                 extractor = info_dict.get('ie_key')  # key in a playlist
1794         if extractor is None:
1795             return None  # Incomplete video information
1796         return extractor.lower() + ' ' + info_dict['id']
1797
1798     def in_download_archive(self, info_dict):
1799         fn = self.params.get('download_archive')
1800         if fn is None:
1801             return False
1802
1803         vid_id = self._make_archive_id(info_dict)
1804         if vid_id is None:
1805             return False  # Incomplete video information
1806
1807         try:
1808             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1809                 for line in archive_file:
1810                     if line.strip() == vid_id:
1811                         return True
1812         except IOError as ioe:
1813             if ioe.errno != errno.ENOENT:
1814                 raise
1815         return False
1816
1817     def record_download_archive(self, info_dict):
1818         fn = self.params.get('download_archive')
1819         if fn is None:
1820             return
1821         vid_id = self._make_archive_id(info_dict)
1822         assert vid_id
1823         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1824             archive_file.write(vid_id + '\n')
1825
1826     @staticmethod
1827     def format_resolution(format, default='unknown'):
1828         if format.get('vcodec') == 'none':
1829             return 'audio only'
1830         if format.get('resolution') is not None:
1831             return format['resolution']
1832         if format.get('height') is not None:
1833             if format.get('width') is not None:
1834                 res = '%sx%s' % (format['width'], format['height'])
1835             else:
1836                 res = '%sp' % format['height']
1837         elif format.get('width') is not None:
1838             res = '%dx?' % format['width']
1839         else:
1840             res = default
1841         return res
1842
1843     def _format_note(self, fdict):
1844         res = ''
1845         if fdict.get('ext') in ['f4f', 'f4m']:
1846             res += '(unsupported) '
1847         if fdict.get('language'):
1848             if res:
1849                 res += ' '
1850             res += '[%s] ' % fdict['language']
1851         if fdict.get('format_note') is not None:
1852             res += fdict['format_note'] + ' '
1853         if fdict.get('tbr') is not None:
1854             res += '%4dk ' % fdict['tbr']
1855         if fdict.get('container') is not None:
1856             if res:
1857                 res += ', '
1858             res += '%s container' % fdict['container']
1859         if (fdict.get('vcodec') is not None and
1860                 fdict.get('vcodec') != 'none'):
1861             if res:
1862                 res += ', '
1863             res += fdict['vcodec']
1864             if fdict.get('vbr') is not None:
1865                 res += '@'
1866         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1867             res += 'video@'
1868         if fdict.get('vbr') is not None:
1869             res += '%4dk' % fdict['vbr']
1870         if fdict.get('fps') is not None:
1871             if res:
1872                 res += ', '
1873             res += '%sfps' % fdict['fps']
1874         if fdict.get('acodec') is not None:
1875             if res:
1876                 res += ', '
1877             if fdict['acodec'] == 'none':
1878                 res += 'video only'
1879             else:
1880                 res += '%-5s' % fdict['acodec']
1881         elif fdict.get('abr') is not None:
1882             if res:
1883                 res += ', '
1884             res += 'audio'
1885         if fdict.get('abr') is not None:
1886             res += '@%3dk' % fdict['abr']
1887         if fdict.get('asr') is not None:
1888             res += ' (%5dHz)' % fdict['asr']
1889         if fdict.get('filesize') is not None:
1890             if res:
1891                 res += ', '
1892             res += format_bytes(fdict['filesize'])
1893         elif fdict.get('filesize_approx') is not None:
1894             if res:
1895                 res += ', '
1896             res += '~' + format_bytes(fdict['filesize_approx'])
1897         return res
1898
1899     def list_formats(self, info_dict):
1900         formats = info_dict.get('formats', [info_dict])
1901         table = [
1902             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1903             for f in formats
1904             if f.get('preference') is None or f['preference'] >= -1000]
1905         if len(formats) > 1:
1906             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1907
1908         header_line = ['format code', 'extension', 'resolution', 'note']
1909         self.to_screen(
1910             '[info] Available formats for %s:\n%s' %
1911             (info_dict['id'], render_table(header_line, table)))
1912
1913     def list_thumbnails(self, info_dict):
1914         thumbnails = info_dict.get('thumbnails')
1915         if not thumbnails:
1916             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
1917             return
1918
1919         self.to_screen(
1920             '[info] Thumbnails for %s:' % info_dict['id'])
1921         self.to_screen(render_table(
1922             ['ID', 'width', 'height', 'URL'],
1923             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1924
1925     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1926         if not subtitles:
1927             self.to_screen('%s has no %s' % (video_id, name))
1928             return
1929         self.to_screen(
1930             'Available %s for %s:' % (name, video_id))
1931         self.to_screen(render_table(
1932             ['Language', 'formats'],
1933             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1934                 for lang, formats in subtitles.items()]))
1935
1936     def urlopen(self, req):
1937         """ Start an HTTP download """
1938         if isinstance(req, compat_basestring):
1939             req = sanitized_Request(req)
1940         return self._opener.open(req, timeout=self._socket_timeout)
1941
1942     def print_debug_header(self):
1943         if not self.params.get('verbose'):
1944             return
1945
1946         if type('') is not compat_str:
1947             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1948             self.report_warning(
1949                 'Your Python is broken! Update to a newer and supported version')
1950
1951         stdout_encoding = getattr(
1952             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1953         encoding_str = (
1954             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1955                 locale.getpreferredencoding(),
1956                 sys.getfilesystemencoding(),
1957                 stdout_encoding,
1958                 self.get_encoding()))
1959         write_string(encoding_str, encoding=None)
1960
1961         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1962         if _LAZY_LOADER:
1963             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
1964         try:
1965             sp = subprocess.Popen(
1966                 ['git', 'rev-parse', '--short', 'HEAD'],
1967                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1968                 cwd=os.path.dirname(os.path.abspath(__file__)))
1969             out, err = sp.communicate()
1970             out = out.decode().strip()
1971             if re.match('[0-9a-f]+', out):
1972                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1973         except Exception:
1974             try:
1975                 sys.exc_clear()
1976             except Exception:
1977                 pass
1978         self._write_string('[debug] Python version %s - %s\n' % (
1979             platform.python_version(), platform_name()))
1980
1981         exe_versions = FFmpegPostProcessor.get_versions(self)
1982         exe_versions['rtmpdump'] = rtmpdump_version()
1983         exe_str = ', '.join(
1984             '%s %s' % (exe, v)
1985             for exe, v in sorted(exe_versions.items())
1986             if v
1987         )
1988         if not exe_str:
1989             exe_str = 'none'
1990         self._write_string('[debug] exe versions: %s\n' % exe_str)
1991
1992         proxy_map = {}
1993         for handler in self._opener.handlers:
1994             if hasattr(handler, 'proxies'):
1995                 proxy_map.update(handler.proxies)
1996         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1997
1998         if self.params.get('call_home', False):
1999             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2000             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2001             latest_version = self.urlopen(
2002                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2003             if version_tuple(latest_version) > version_tuple(__version__):
2004                 self.report_warning(
2005                     'You are using an outdated version (newest version: %s)! '
2006                     'See https://yt-dl.org/update if you need help updating.' %
2007                     latest_version)
2008
2009     def _setup_opener(self):
2010         timeout_val = self.params.get('socket_timeout')
2011         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2012
2013         opts_cookiefile = self.params.get('cookiefile')
2014         opts_proxy = self.params.get('proxy')
2015
2016         if opts_cookiefile is None:
2017             self.cookiejar = compat_cookiejar.CookieJar()
2018         else:
2019             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2020                 opts_cookiefile)
2021             if os.access(opts_cookiefile, os.R_OK):
2022                 self.cookiejar.load()
2023
2024         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2025         if opts_proxy is not None:
2026             if opts_proxy == '':
2027                 proxies = {}
2028             else:
2029                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2030         else:
2031             proxies = compat_urllib_request.getproxies()
2032             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2033             if 'http' in proxies and 'https' not in proxies:
2034                 proxies['https'] = proxies['http']
2035         proxy_handler = PerRequestProxyHandler(proxies)
2036
2037         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2038         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2039         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2040         data_handler = compat_urllib_request_DataHandler()
2041
2042         # When passing our own FileHandler instance, build_opener won't add the
2043         # default FileHandler and allows us to disable the file protocol, which
2044         # can be used for malicious purposes (see
2045         # https://github.com/rg3/youtube-dl/issues/8227)
2046         file_handler = compat_urllib_request.FileHandler()
2047
2048         def file_open(*args, **kwargs):
2049             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2050         file_handler.file_open = file_open
2051
2052         opener = compat_urllib_request.build_opener(
2053             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2054
2055         # Delete the default user-agent header, which would otherwise apply in
2056         # cases where our custom HTTP handler doesn't come into play
2057         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2058         opener.addheaders = []
2059         self._opener = opener
2060
2061     def encode(self, s):
2062         if isinstance(s, bytes):
2063             return s  # Already encoded
2064
2065         try:
2066             return s.encode(self.get_encoding())
2067         except UnicodeEncodeError as err:
2068             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2069             raise
2070
2071     def get_encoding(self):
2072         encoding = self.params.get('encoding')
2073         if encoding is None:
2074             encoding = preferredencoding()
2075         return encoding
2076
2077     def _write_thumbnails(self, info_dict, filename):
2078         if self.params.get('writethumbnail', False):
2079             thumbnails = info_dict.get('thumbnails')
2080             if thumbnails:
2081                 thumbnails = [thumbnails[-1]]
2082         elif self.params.get('write_all_thumbnails', False):
2083             thumbnails = info_dict.get('thumbnails')
2084         else:
2085             return
2086
2087         if not thumbnails:
2088             # No thumbnails present, so return immediately
2089             return
2090
2091         for t in thumbnails:
2092             thumb_ext = determine_ext(t['url'], 'jpg')
2093             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2094             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2095             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2096
2097             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2098                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2099                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2100             else:
2101                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2102                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2103                 try:
2104                     uf = self.urlopen(t['url'])
2105                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2106                         shutil.copyfileobj(uf, thumbf)
2107                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2108                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2109                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2110                     self.report_warning('Unable to download thumbnail "%s": %s' %
2111                                         (t['url'], error_to_compat_str(err)))