youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import traceback
  25
  26 if os.name == 'nt':
  27     import ctypes
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_expanduser,
  33     compat_get_terminal_size,
  34     compat_http_client,
  35     compat_kwargs,
  36     compat_str,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39 )
  40 from .utils import (
  41     escape_url,
  42     ContentTooShortError,
  43     date_from_str,
  44     DateRange,
  45     DEFAULT_OUTTMPL,
  46     determine_ext,
  47     DownloadError,
  48     encodeFilename,
  49     ExtractorError,
  50     format_bytes,
  51     formatSeconds,
  52     HEADRequest,
  53     locked_file,
  54     make_HTTPS_handler,
  55     MaxDownloadsReached,
  56     PagedList,
  57     parse_filesize,
  58     PerRequestProxyHandler,
  59     PostProcessingError,
  60     platform_name,
  61     preferredencoding,
  62     render_table,
  63     SameFileError,
  64     sanitize_filename,
  65     sanitize_path,
  66     std_headers,
  67     subtitles_filename,
  68     UnavailableVideoError,
  69     url_basename,
  70     version_tuple,
  71     write_json_file,
  72     write_string,
  73     YoutubeDLHandler,
  74     prepend_extension,
  75     replace_extension,
  76     args_to_str,
  77     age_restricted,
  78 )
  79 from .cache import Cache
  80 from .extractor import get_info_extractor, gen_extractors
  81 from .downloader import get_suitable_downloader
  82 from .downloader.rtmp import rtmpdump_version
  83 from .postprocessor import (
  84     FFmpegFixupM4aPP,
  85     FFmpegFixupStretchedPP,
  86     FFmpegMergerPP,
  87     FFmpegPostProcessor,
  88     get_postprocessor,
  89 )
  90 from .version import __version__
  91
  92
  93 class YoutubeDL(object):
  94     """YoutubeDL class.
  95
  96     YoutubeDL objects are the ones responsible of downloading the
  97     actual video file and writing it to disk if the user has requested
  98     it, among some other tasks. In most cases there should be one per
  99     program. As, given a video URL, the downloader doesn't know how to
 100     extract all the needed information, task that InfoExtractors do, it
 101     has to pass the URL to one of them.
 102
 103     For this, YoutubeDL objects have a method that allows
 104     InfoExtractors to be registered in a given order. When it is passed
 105     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 106     finds that reports being able to handle it. The InfoExtractor extracts
 107     all the information about the video or videos the URL refers to, and
 108     YoutubeDL process the extracted information, possibly using a File
 109     Downloader to download the video.
 110
 111     YoutubeDL objects accept a lot of parameters. In order not to saturate
 112     the object constructor with arguments, it receives a dictionary of
 113     options instead. These options are available through the params
 114     attribute for the InfoExtractors to use. The YoutubeDL also
 115     registers itself as the downloader in charge for the InfoExtractors
 116     that are added to it, so this is a "mutual registration".
 117
 118     Available options:
 119
 120     username:          Username for authentication purposes.
 121     password:          Password for authentication purposes.
 122     videopassword:     Password for acces a video.
 123     usenetrc:          Use netrc for authentication instead.
 124     verbose:           Print additional info to stdout.
 125     quiet:             Do not print messages to stdout.
 126     no_warnings:       Do not print out anything for warnings.
 127     forceurl:          Force printing final URL.
 128     forcetitle:        Force printing title.
 129     forceid:           Force printing ID.
 130     forcethumbnail:    Force printing thumbnail URL.
 131     forcedescription:  Force printing description.
 132     forcefilename:     Force printing final filename.
 133     forceduration:     Force printing duration.
 134     forcejson:         Force printing info_dict as JSON.
 135     dump_single_json:  Force printing the info_dict of the whole playlist
 136                        (or video) as a single JSON line.
 137     simulate:          Do not download the video files.
 138     format:            Video format code. See options.py for more information.
 139     outtmpl:           Template for output names.
 140     restrictfilenames: Do not allow "&" and spaces in file names
 141     ignoreerrors:      Do not stop on download errors.
 142     nooverwrites:      Prevent overwriting files.
 143     playliststart:     Playlist item to start at.
 144     playlistend:       Playlist item to end at.
 145     playlist_items:    Specific indices of playlist to download.
 146     playlistreverse:   Download playlist items in reverse order.
 147     matchtitle:        Download only matching titles.
 148     rejecttitle:       Reject downloads for matching titles.
 149     logger:            Log messages to a logging.Logger instance.
 150     logtostderr:       Log messages to stderr instead of stdout.
 151     writedescription:  Write the video description to a .description file
 152     writeinfojson:     Write the video description to a .info.json file
 153     writeannotations:  Write the video annotations to a .annotations.xml file
 154     writethumbnail:    Write the thumbnail image to a file
 155     write_all_thumbnails:  Write all thumbnail formats to files
 156     writesubtitles:    Write the video subtitles to a file
 157     writeautomaticsub: Write the automatic subtitles to a file
 158     allsubtitles:      Downloads all the subtitles of the video
 159                        (requires writesubtitles or writeautomaticsub)
 160     listsubtitles:     Lists all available subtitles for the video
 161     subtitlesformat:   The format code for subtitles
 162     subtitleslangs:    List of languages of the subtitles to download
 163     keepvideo:         Keep the video file after post-processing
 164     daterange:         A DateRange object, download only if the upload_date is in the range.
 165     skip_download:     Skip the actual download of the video file
 166     cachedir:          Location of the cache files in the filesystem.
 167                        False to disable filesystem cache.
 168     noplaylist:        Download single video instead of a playlist if in doubt.
 169     age_limit:         An integer representing the user's age in years.
 170                        Unsuitable videos for the given age are skipped.
 171     min_views:         An integer representing the minimum view count the video
 172                        must have in order to not be skipped.
 173                        Videos without view count information are always
 174                        downloaded. None for no limit.
 175     max_views:         An integer representing the maximum view count.
 176                        Videos that are more popular than that are not
 177                        downloaded.
 178                        Videos without view count information are always
 179                        downloaded. None for no limit.
 180     download_archive:  File name of a file where all downloads are recorded.
 181                        Videos already present in the file are not downloaded
 182                        again.
 183     cookiefile:        File name where cookies should be read from and dumped to.
 184     nocheckcertificate:Do not verify SSL certificates
 185     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 186                        At the moment, this is only supported by YouTube.
 187     proxy:             URL of the proxy server to use
 188     cn_verification_proxy:  URL of the proxy to use for IP address verification
 189                        on Chinese sites. (Experimental)
 190     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 191     bidi_workaround:   Work around buggy terminals without bidirectional text
 192                        support, using fridibi
 193     debug_printtraffic:Print out sent and received HTTP traffic
 194     include_ads:       Download ads as well
 195     default_search:    Prepend this string if an input url is not valid.
 196                        'auto' for elaborate guessing
 197     encoding:          Use this encoding instead of the system-specified.
 198     extract_flat:      Do not resolve URLs, return the immediate result.
 199                        Pass in 'in_playlist' to only show this behavior for
 200                        playlist items.
 201     postprocessors:    A list of dictionaries, each with an entry
 202                        * key:  The name of the postprocessor. See
 203                                youtube_dl/postprocessor/__init__.py for a list.
 204                        as well as any further keyword arguments for the
 205                        postprocessor.
 206     progress_hooks:    A list of functions that get called on download
 207                        progress, with a dictionary with the entries
 208                        * status: One of "downloading", "error", or "finished".
 209                                  Check this first and ignore unknown values.
 210
 211                        If status is one of "downloading", or "finished", the
 212                        following properties may also be present:
 213                        * filename: The final filename (always present)
 214                        * tmpfilename: The filename we're currently writing to
 215                        * downloaded_bytes: Bytes on disk
 216                        * total_bytes: Size of the whole file, None if unknown
 217                        * total_bytes_estimate: Guess of the eventual file size,
 218                                                None if unavailable.
 219                        * elapsed: The number of seconds since download started.
 220                        * eta: The estimated time in seconds, None if unknown
 221                        * speed: The download speed in bytes/second, None if
 222                                 unknown
 223                        * fragment_index: The counter of the currently
 224                                          downloaded video fragment.
 225                        * fragment_count: The number of fragments (= individual
 226                                          files that will be merged)
 227
 228                        Progress hooks are guaranteed to be called at least once
 229                        (with status "finished") if the download is successful.
 230     merge_output_format: Extension to use when merging formats.
 231     fixup:             Automatically correct known faults of the file.
 232                        One of:
 233                        - "never": do nothing
 234                        - "warn": only emit a warning
 235                        - "detect_or_warn": check whether we can do anything
 236                                            about it, warn otherwise (default)
 237     source_address:    (Experimental) Client-side IP address to bind to.
 238     call_home:         Boolean, true iff we are allowed to contact the
 239                        youtube-dl servers for debugging.
 240     sleep_interval:    Number of seconds to sleep before each download.
 241     listformats:       Print an overview of available video formats and exit.
 242     list_thumbnails:   Print a table of all thumbnails and exit.
 243     match_filter:      A function that gets called with the info_dict of
 244                        every video.
 245                        If it returns a message, the video is ignored.
 246                        If it returns None, the video is downloaded.
 247                        match_filter_func in utils.py is one example for this.
 248     no_color:          Do not emit color codes in output.
 249
 250     The following options determine which downloader is picked:
 251     external_downloader: Executable of the external downloader to call.
 252                        None or unset for standard (built-in) downloader.
 253     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 254
 255     The following parameters are not used by YoutubeDL itself, they are used by
 256     the downloader (see youtube_dl/downloader/common.py):
 257     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 258     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 259     xattr_set_filesize, external_downloader_args.
 260
 261     The following options are used by the post processors:
 262     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 263                        otherwise prefer avconv.
 264     pp_params:         Extra parameters for external apps, like avconv.
 265     """
 266
 267     params = None
 268     _ies = []
 269     _pps = []
 270     _download_retcode = None
 271     _num_downloads = None
 272     _screen_file = None
 273
 274     def __init__(self, params=None, auto_init=True):
 275         """Create a FileDownloader object with the given options."""
 276         if params is None:
 277             params = {}
 278         self._ies = []
 279         self._ies_instances = {}
 280         self._pps = []
 281         self._progress_hooks = []
 282         self._download_retcode = 0
 283         self._num_downloads = 0
 284         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 285         self._err_file = sys.stderr
 286         self.params = params
 287         self.cache = Cache(self)
 288
 289         if params.get('bidi_workaround', False):
 290             try:
 291                 import pty
 292                 master, slave = pty.openpty()
 293                 width = compat_get_terminal_size().columns
 294                 if width is None:
 295                     width_args = []
 296                 else:
 297                     width_args = ['-w', str(width)]
 298                 sp_kwargs = dict(
 299                     stdin=subprocess.PIPE,
 300                     stdout=slave,
 301                     stderr=self._err_file)
 302                 try:
 303                     self._output_process = subprocess.Popen(
 304                         ['bidiv'] + width_args, **sp_kwargs
 305                     )
 306                 except OSError:
 307                     self._output_process = subprocess.Popen(
 308                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 309                 self._output_channel = os.fdopen(master, 'rb')
 310             except OSError as ose:
 311                 if ose.errno == 2:
 312                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 313                 else:
 314                     raise
 315
 316         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 317                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 318                 not params.get('restrictfilenames', False)):
 319             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 320             self.report_warning(
 321                 'Assuming --restrict-filenames since file system encoding '
 322                 'cannot encode all characters. '
 323                 'Set the LC_ALL environment variable to fix this.')
 324             self.params['restrictfilenames'] = True
 325
 326         if isinstance(params.get('outtmpl'), bytes):
 327             self.report_warning(
 328                 'Parameter outtmpl is bytes, but should be a unicode string. '
 329                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 330
 331         self._setup_opener()
 332
 333         if auto_init:
 334             self.print_debug_header()
 335             self.add_default_info_extractors()
 336
 337         for pp_def_raw in self.params.get('postprocessors', []):
 338             pp_class = get_postprocessor(pp_def_raw['key'])
 339             pp_def = dict(pp_def_raw)
 340             del pp_def['key']
 341             pp = pp_class(self, **compat_kwargs(pp_def))
 342             self.add_post_processor(pp)
 343
 344         for ph in self.params.get('progress_hooks', []):
 345             self.add_progress_hook(ph)
 346
 347     def warn_if_short_id(self, argv):
 348         # short YouTube ID starting with dash?
 349         idxs = [
 350             i for i, a in enumerate(argv)
 351             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 352         if idxs:
 353             correct_argv = (
 354                 ['youtube-dl'] +
 355                 [a for i, a in enumerate(argv) if i not in idxs] +
 356                 ['--'] + [argv[i] for i in idxs]
 357             )
 358             self.report_warning(
 359                 'Long argument string detected. '
 360                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 361                 args_to_str(correct_argv))
 362
 363     def add_info_extractor(self, ie):
 364         """Add an InfoExtractor object to the end of the list."""
 365         self._ies.append(ie)
 366         self._ies_instances[ie.ie_key()] = ie
 367         ie.set_downloader(self)
 368
 369     def get_info_extractor(self, ie_key):
 370         """
 371         Get an instance of an IE with name ie_key, it will try to get one from
 372         the _ies list, if there's no instance it will create a new one and add
 373         it to the extractor list.
 374         """
 375         ie = self._ies_instances.get(ie_key)
 376         if ie is None:
 377             ie = get_info_extractor(ie_key)()
 378             self.add_info_extractor(ie)
 379         return ie
 380
 381     def add_default_info_extractors(self):
 382         """
 383         Add the InfoExtractors returned by gen_extractors to the end of the list
 384         """
 385         for ie in gen_extractors():
 386             self.add_info_extractor(ie)
 387
 388     def add_post_processor(self, pp):
 389         """Add a PostProcessor object to the end of the chain."""
 390         self._pps.append(pp)
 391         pp.set_downloader(self)
 392
 393     def add_progress_hook(self, ph):
 394         """Add the progress hook (currently only for the file downloader)"""
 395         self._progress_hooks.append(ph)
 396
 397     def _bidi_workaround(self, message):
 398         if not hasattr(self, '_output_channel'):
 399             return message
 400
 401         assert hasattr(self, '_output_process')
 402         assert isinstance(message, compat_str)
 403         line_count = message.count('\n') + 1
 404         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 405         self._output_process.stdin.flush()
 406         res = ''.join(self._output_channel.readline().decode('utf-8')
 407                       for _ in range(line_count))
 408         return res[:-len('\n')]
 409
 410     def to_screen(self, message, skip_eol=False):
 411         """Print message to stdout if not in quiet mode."""
 412         return self.to_stdout(message, skip_eol, check_quiet=True)
 413
 414     def _write_string(self, s, out=None):
 415         write_string(s, out=out, encoding=self.params.get('encoding'))
 416
 417     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 418         """Print message to stdout if not in quiet mode."""
 419         if self.params.get('logger'):
 420             self.params['logger'].debug(message)
 421         elif not check_quiet or not self.params.get('quiet', False):
 422             message = self._bidi_workaround(message)
 423             terminator = ['\n', ''][skip_eol]
 424             output = message + terminator
 425
 426             self._write_string(output, self._screen_file)
 427
 428     def to_stderr(self, message):
 429         """Print message to stderr."""
 430         assert isinstance(message, compat_str)
 431         if self.params.get('logger'):
 432             self.params['logger'].error(message)
 433         else:
 434             message = self._bidi_workaround(message)
 435             output = message + '\n'
 436             self._write_string(output, self._err_file)
 437
 438     def to_console_title(self, message):
 439         if not self.params.get('consoletitle', False):
 440             return
 441         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 442             # c_wchar_p() might not be necessary if `message` is
 443             # already of type unicode()
 444             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 445         elif 'TERM' in os.environ:
 446             self._write_string('\033]0;%s\007' % message, self._screen_file)
 447
 448     def save_console_title(self):
 449         if not self.params.get('consoletitle', False):
 450             return
 451         if 'TERM' in os.environ:
 452             # Save the title on stack
 453             self._write_string('\033[22;0t', self._screen_file)
 454
 455     def restore_console_title(self):
 456         if not self.params.get('consoletitle', False):
 457             return
 458         if 'TERM' in os.environ:
 459             # Restore the title from stack
 460             self._write_string('\033[23;0t', self._screen_file)
 461
 462     def __enter__(self):
 463         self.save_console_title()
 464         return self
 465
 466     def __exit__(self, *args):
 467         self.restore_console_title()
 468
 469         if self.params.get('cookiefile') is not None:
 470             self.cookiejar.save()
 471
 472     def trouble(self, message=None, tb=None):
 473         """Determine action to take when a download problem appears.
 474
 475         Depending on if the downloader has been configured to ignore
 476         download errors or not, this method may throw an exception or
 477         not when errors are found, after printing the message.
 478
 479         tb, if given, is additional traceback information.
 480         """
 481         if message is not None:
 482             self.to_stderr(message)
 483         if self.params.get('verbose'):
 484             if tb is None:
 485                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 486                     tb = ''
 487                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 488                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 489                     tb += compat_str(traceback.format_exc())
 490                 else:
 491                     tb_data = traceback.format_list(traceback.extract_stack())
 492                     tb = ''.join(tb_data)
 493             self.to_stderr(tb)
 494         if not self.params.get('ignoreerrors', False):
 495             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 496                 exc_info = sys.exc_info()[1].exc_info
 497             else:
 498                 exc_info = sys.exc_info()
 499             raise DownloadError(message, exc_info)
 500         self._download_retcode = 1
 501
 502     def report_warning(self, message):
 503         '''
 504         Print the message to stderr, it will be prefixed with 'WARNING:'
 505         If stderr is a tty file the 'WARNING:' will be colored
 506         '''
 507         if self.params.get('logger') is not None:
 508             self.params['logger'].warning(message)
 509         else:
 510             if self.params.get('no_warnings'):
 511                 return
 512             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 513                 _msg_header = '\033[0;33mWARNING:\033[0m'
 514             else:
 515                 _msg_header = 'WARNING:'
 516             warning_message = '%s %s' % (_msg_header, message)
 517             self.to_stderr(warning_message)
 518
 519     def report_error(self, message, tb=None):
 520         '''
 521         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 522         in red if stderr is a tty file.
 523         '''
 524         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 525             _msg_header = '\033[0;31mERROR:\033[0m'
 526         else:
 527             _msg_header = 'ERROR:'
 528         error_message = '%s %s' % (_msg_header, message)
 529         self.trouble(error_message, tb)
 530
 531     def report_file_already_downloaded(self, file_name):
 532         """Report file has already been fully downloaded."""
 533         try:
 534             self.to_screen('[download] %s has already been downloaded' % file_name)
 535         except UnicodeEncodeError:
 536             self.to_screen('[download] The file has already been downloaded')
 537
 538     def prepare_filename(self, info_dict):
 539         """Generate the output filename."""
 540         try:
 541             template_dict = dict(info_dict)
 542
 543             template_dict['epoch'] = int(time.time())
 544             autonumber_size = self.params.get('autonumber_size')
 545             if autonumber_size is None:
 546                 autonumber_size = 5
 547             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 548             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 549             if template_dict.get('playlist_index') is not None:
 550                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 551             if template_dict.get('resolution') is None:
 552                 if template_dict.get('width') and template_dict.get('height'):
 553                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 554                 elif template_dict.get('height'):
 555                     template_dict['resolution'] = '%sp' % template_dict['height']
 556                 elif template_dict.get('width'):
 557                     template_dict['resolution'] = '?x%d' % template_dict['width']
 558
 559             sanitize = lambda k, v: sanitize_filename(
 560                 compat_str(v),
 561                 restricted=self.params.get('restrictfilenames'),
 562                 is_id=(k == 'id'))
 563             template_dict = dict((k, sanitize(k, v))
 564                                  for k, v in template_dict.items()
 565                                  if v is not None)
 566             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 567
 568             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 569             tmpl = compat_expanduser(outtmpl)
 570             filename = tmpl % template_dict
 571             # Temporary fix for #4787
 572             # 'Treat' all problem characters by passing filename through preferredencoding
 573             # to workaround encoding issues with subprocess on python2 @ Windows
 574             if sys.version_info < (3, 0) and sys.platform == 'win32':
 575                 filename = encodeFilename(filename, True).decode(preferredencoding())
 576             return filename
 577         except ValueError as err:
 578             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 579             return None
 580
 581     def _match_entry(self, info_dict, incomplete):
 582         """ Returns None iff the file should be downloaded """
 583
 584         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 585         if 'title' in info_dict:
 586             # This can happen when we're just evaluating the playlist
 587             title = info_dict['title']
 588             matchtitle = self.params.get('matchtitle', False)
 589             if matchtitle:
 590                 if not re.search(matchtitle, title, re.IGNORECASE):
 591                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 592             rejecttitle = self.params.get('rejecttitle', False)
 593             if rejecttitle:
 594                 if re.search(rejecttitle, title, re.IGNORECASE):
 595                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 596         date = info_dict.get('upload_date', None)
 597         if date is not None:
 598             dateRange = self.params.get('daterange', DateRange())
 599             if date not in dateRange:
 600                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 601         view_count = info_dict.get('view_count', None)
 602         if view_count is not None:
 603             min_views = self.params.get('min_views')
 604             if min_views is not None and view_count < min_views:
 605                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 606             max_views = self.params.get('max_views')
 607             if max_views is not None and view_count > max_views:
 608                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 609         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 610             return 'Skipping "%s" because it is age restricted' % video_title
 611         if self.in_download_archive(info_dict):
 612             return '%s has already been recorded in archive' % video_title
 613
 614         if not incomplete:
 615             match_filter = self.params.get('match_filter')
 616             if match_filter is not None:
 617                 ret = match_filter(info_dict)
 618                 if ret is not None:
 619                     return ret
 620
 621         return None
 622
 623     @staticmethod
 624     def add_extra_info(info_dict, extra_info):
 625         '''Set the keys from extra_info in info dict if they are missing'''
 626         for key, value in extra_info.items():
 627             info_dict.setdefault(key, value)
 628
 629     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 630                      process=True):
 631         '''
 632         Returns a list with a dictionary for each video we find.
 633         If 'download', also downloads the videos.
 634         extra_info is a dict containing the extra values to add to each result
 635         '''
 636
 637         if ie_key:
 638             ies = [self.get_info_extractor(ie_key)]
 639         else:
 640             ies = self._ies
 641
 642         for ie in ies:
 643             if not ie.suitable(url):
 644                 continue
 645
 646             if not ie.working():
 647                 self.report_warning('The program functionality for this site has been marked as broken, '
 648                                     'and will probably not work.')
 649
 650             try:
 651                 ie_result = ie.extract(url)
 652                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 653                     break
 654                 if isinstance(ie_result, list):
 655                     # Backwards compatibility: old IE result format
 656                     ie_result = {
 657                         '_type': 'compat_list',
 658                         'entries': ie_result,
 659                     }
 660                 self.add_default_extra_info(ie_result, ie, url)
 661                 if process:
 662                     return self.process_ie_result(ie_result, download, extra_info)
 663                 else:
 664                     return ie_result
 665             except ExtractorError as de:  # An error we somewhat expected
 666                 self.report_error(compat_str(de), de.format_traceback())
 667                 break
 668             except MaxDownloadsReached:
 669                 raise
 670             except Exception as e:
 671                 if self.params.get('ignoreerrors', False):
 672                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 673                     break
 674                 else:
 675                     raise
 676         else:
 677             self.report_error('no suitable InfoExtractor for URL %s' % url)
 678
 679     def add_default_extra_info(self, ie_result, ie, url):
 680         self.add_extra_info(ie_result, {
 681             'extractor': ie.IE_NAME,
 682             'webpage_url': url,
 683             'webpage_url_basename': url_basename(url),
 684             'extractor_key': ie.ie_key(),
 685         })
 686
 687     def process_ie_result(self, ie_result, download=True, extra_info={}):
 688         """
 689         Take the result of the ie(may be modified) and resolve all unresolved
 690         references (URLs, playlist items).
 691
 692         It will also download the videos if 'download'.
 693         Returns the resolved ie_result.
 694         """
 695
 696         result_type = ie_result.get('_type', 'video')
 697
 698         if result_type in ('url', 'url_transparent'):
 699             extract_flat = self.params.get('extract_flat', False)
 700             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 701                     extract_flat is True):
 702                 if self.params.get('forcejson', False):
 703                     self.to_stdout(json.dumps(ie_result))
 704                 return ie_result
 705
 706         if result_type == 'video':
 707             self.add_extra_info(ie_result, extra_info)
 708             return self.process_video_result(ie_result, download=download)
 709         elif result_type == 'url':
 710             # We have to add extra_info to the results because it may be
 711             # contained in a playlist
 712             return self.extract_info(ie_result['url'],
 713                                      download,
 714                                      ie_key=ie_result.get('ie_key'),
 715                                      extra_info=extra_info)
 716         elif result_type == 'url_transparent':
 717             # Use the information from the embedding page
 718             info = self.extract_info(
 719                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 720                 extra_info=extra_info, download=False, process=False)
 721
 722             force_properties = dict(
 723                 (k, v) for k, v in ie_result.items() if v is not None)
 724             for f in ('_type', 'url'):
 725                 if f in force_properties:
 726                     del force_properties[f]
 727             new_result = info.copy()
 728             new_result.update(force_properties)
 729
 730             assert new_result.get('_type') != 'url_transparent'
 731
 732             return self.process_ie_result(
 733                 new_result, download=download, extra_info=extra_info)
 734         elif result_type == 'playlist' or result_type == 'multi_video':
 735             # We process each entry in the playlist
 736             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 737             self.to_screen('[download] Downloading playlist: %s' % playlist)
 738
 739             playlist_results = []
 740
 741             playliststart = self.params.get('playliststart', 1) - 1
 742             playlistend = self.params.get('playlistend', None)
 743             # For backwards compatibility, interpret -1 as whole list
 744             if playlistend == -1:
 745                 playlistend = None
 746
 747             playlistitems_str = self.params.get('playlist_items', None)
 748             playlistitems = None
 749             if playlistitems_str is not None:
 750                 def iter_playlistitems(format):
 751                     for string_segment in format.split(','):
 752                         if '-' in string_segment:
 753                             start, end = string_segment.split('-')
 754                             for item in range(int(start), int(end) + 1):
 755                                 yield int(item)
 756                         else:
 757                             yield int(string_segment)
 758                 playlistitems = iter_playlistitems(playlistitems_str)
 759
 760             ie_entries = ie_result['entries']
 761             if isinstance(ie_entries, list):
 762                 n_all_entries = len(ie_entries)
 763                 if playlistitems:
 764                     entries = [
 765                         ie_entries[i - 1] for i in playlistitems
 766                         if -n_all_entries <= i - 1 < n_all_entries]
 767                 else:
 768                     entries = ie_entries[playliststart:playlistend]
 769                 n_entries = len(entries)
 770                 self.to_screen(
 771                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 772                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 773             elif isinstance(ie_entries, PagedList):
 774                 if playlistitems:
 775                     entries = []
 776                     for item in playlistitems:
 777                         entries.extend(ie_entries.getslice(
 778                             item - 1, item
 779                         ))
 780                 else:
 781                     entries = ie_entries.getslice(
 782                         playliststart, playlistend)
 783                 n_entries = len(entries)
 784                 self.to_screen(
 785                     "[%s] playlist %s: Downloading %d videos" %
 786                     (ie_result['extractor'], playlist, n_entries))
 787             else:  # iterable
 788                 if playlistitems:
 789                     entry_list = list(ie_entries)
 790                     entries = [entry_list[i - 1] for i in playlistitems]
 791                 else:
 792                     entries = list(itertools.islice(
 793                         ie_entries, playliststart, playlistend))
 794                 n_entries = len(entries)
 795                 self.to_screen(
 796                     "[%s] playlist %s: Downloading %d videos" %
 797                     (ie_result['extractor'], playlist, n_entries))
 798
 799             if self.params.get('playlistreverse', False):
 800                 entries = entries[::-1]
 801
 802             for i, entry in enumerate(entries, 1):
 803                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 804                 extra = {
 805                     'n_entries': n_entries,
 806                     'playlist': playlist,
 807                     'playlist_id': ie_result.get('id'),
 808                     'playlist_title': ie_result.get('title'),
 809                     'playlist_index': i + playliststart,
 810                     'extractor': ie_result['extractor'],
 811                     'webpage_url': ie_result['webpage_url'],
 812                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 813                     'extractor_key': ie_result['extractor_key'],
 814                 }
 815
 816                 reason = self._match_entry(entry, incomplete=True)
 817                 if reason is not None:
 818                     self.to_screen('[download] ' + reason)
 819                     continue
 820
 821                 entry_result = self.process_ie_result(entry,
 822                                                       download=download,
 823                                                       extra_info=extra)
 824                 playlist_results.append(entry_result)
 825             ie_result['entries'] = playlist_results
 826             return ie_result
 827         elif result_type == 'compat_list':
 828             self.report_warning(
 829                 'Extractor %s returned a compat_list result. '
 830                 'It needs to be updated.' % ie_result.get('extractor'))
 831
 832             def _fixup(r):
 833                 self.add_extra_info(
 834                     r,
 835                     {
 836                         'extractor': ie_result['extractor'],
 837                         'webpage_url': ie_result['webpage_url'],
 838                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 839                         'extractor_key': ie_result['extractor_key'],
 840                     }
 841                 )
 842                 return r
 843             ie_result['entries'] = [
 844                 self.process_ie_result(_fixup(r), download, extra_info)
 845                 for r in ie_result['entries']
 846             ]
 847             return ie_result
 848         else:
 849             raise Exception('Invalid result type: %s' % result_type)
 850
 851     def _apply_format_filter(self, format_spec, available_formats):
 852         " Returns a tuple of the remaining format_spec and filtered formats "
 853
 854         OPERATORS = {
 855             '<': operator.lt,
 856             '<=': operator.le,
 857             '>': operator.gt,
 858             '>=': operator.ge,
 859             '=': operator.eq,
 860             '!=': operator.ne,
 861         }
 862         operator_rex = re.compile(r'''(?x)\s*\[
 863             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 864             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 865             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 866             \]$
 867             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 868         m = operator_rex.search(format_spec)
 869         if m:
 870             try:
 871                 comparison_value = int(m.group('value'))
 872             except ValueError:
 873                 comparison_value = parse_filesize(m.group('value'))
 874                 if comparison_value is None:
 875                     comparison_value = parse_filesize(m.group('value') + 'B')
 876                 if comparison_value is None:
 877                     raise ValueError(
 878                         'Invalid value %r in format specification %r' % (
 879                             m.group('value'), format_spec))
 880             op = OPERATORS[m.group('op')]
 881
 882         if not m:
 883             STR_OPERATORS = {
 884                 '=': operator.eq,
 885                 '!=': operator.ne,
 886             }
 887             str_operator_rex = re.compile(r'''(?x)\s*\[
 888                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 889                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 890                 \s*(?P<value>[a-zA-Z0-9_-]+)
 891                 \s*\]$
 892                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 893             m = str_operator_rex.search(format_spec)
 894             if m:
 895                 comparison_value = m.group('value')
 896                 op = STR_OPERATORS[m.group('op')]
 897
 898         if not m:
 899             raise ValueError('Invalid format specification %r' % format_spec)
 900
 901         def _filter(f):
 902             actual_value = f.get(m.group('key'))
 903             if actual_value is None:
 904                 return m.group('none_inclusive')
 905             return op(actual_value, comparison_value)
 906         new_formats = [f for f in available_formats if _filter(f)]
 907
 908         new_format_spec = format_spec[:-len(m.group(0))]
 909         if not new_format_spec:
 910             new_format_spec = 'best'
 911
 912         return (new_format_spec, new_formats)
 913
 914     def select_format(self, format_spec, available_formats):
 915         while format_spec.endswith(']'):
 916             format_spec, available_formats = self._apply_format_filter(
 917                 format_spec, available_formats)
 918         if not available_formats:
 919             return None
 920
 921         if format_spec in ['best', 'worst', None]:
 922             format_idx = 0 if format_spec == 'worst' else -1
 923             audiovideo_formats = [
 924                 f for f in available_formats
 925                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 926             if audiovideo_formats:
 927                 return audiovideo_formats[format_idx]
 928             # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
 929             elif (all(f.get('acodec') != 'none' for f in available_formats) or
 930                   all(f.get('vcodec') != 'none' for f in available_formats)):
 931                 return available_formats[format_idx]
 932         elif format_spec == 'bestaudio':
 933             audio_formats = [
 934                 f for f in available_formats
 935                 if f.get('vcodec') == 'none']
 936             if audio_formats:
 937                 return audio_formats[-1]
 938         elif format_spec == 'worstaudio':
 939             audio_formats = [
 940                 f for f in available_formats
 941                 if f.get('vcodec') == 'none']
 942             if audio_formats:
 943                 return audio_formats[0]
 944         elif format_spec == 'bestvideo':
 945             video_formats = [
 946                 f for f in available_formats
 947                 if f.get('acodec') == 'none']
 948             if video_formats:
 949                 return video_formats[-1]
 950         elif format_spec == 'worstvideo':
 951             video_formats = [
 952                 f for f in available_formats
 953                 if f.get('acodec') == 'none']
 954             if video_formats:
 955                 return video_formats[0]
 956         else:
 957             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 958             if format_spec in extensions:
 959                 filter_f = lambda f: f['ext'] == format_spec
 960             else:
 961                 filter_f = lambda f: f['format_id'] == format_spec
 962             matches = list(filter(filter_f, available_formats))
 963             if matches:
 964                 return matches[-1]
 965         return None
 966
 967     def _calc_headers(self, info_dict):
 968         res = std_headers.copy()
 969
 970         add_headers = info_dict.get('http_headers')
 971         if add_headers:
 972             res.update(add_headers)
 973
 974         cookies = self._calc_cookies(info_dict)
 975         if cookies:
 976             res['Cookie'] = cookies
 977
 978         return res
 979
 980     def _calc_cookies(self, info_dict):
 981         pr = compat_urllib_request.Request(info_dict['url'])
 982         self.cookiejar.add_cookie_header(pr)
 983         return pr.get_header('Cookie')
 984
 985     def process_video_result(self, info_dict, download=True):
 986         assert info_dict.get('_type', 'video') == 'video'
 987
 988         if 'id' not in info_dict:
 989             raise ExtractorError('Missing "id" field in extractor result')
 990         if 'title' not in info_dict:
 991             raise ExtractorError('Missing "title" field in extractor result')
 992
 993         if 'playlist' not in info_dict:
 994             # It isn't part of a playlist
 995             info_dict['playlist'] = None
 996             info_dict['playlist_index'] = None
 997
 998         thumbnails = info_dict.get('thumbnails')
 999         if thumbnails is None:
1000             thumbnail = info_dict.get('thumbnail')
1001             if thumbnail:
1002                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1003         if thumbnails:
1004             thumbnails.sort(key=lambda t: (
1005                 t.get('preference'), t.get('width'), t.get('height'),
1006                 t.get('id'), t.get('url')))
1007             for i, t in enumerate(thumbnails):
1008                 if 'width' in t and 'height' in t:
1009                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1010                 if t.get('id') is None:
1011                     t['id'] = '%d' % i
1012
1013         if thumbnails and 'thumbnail' not in info_dict:
1014             info_dict['thumbnail'] = thumbnails[-1]['url']
1015
1016         if 'display_id' not in info_dict and 'id' in info_dict:
1017             info_dict['display_id'] = info_dict['id']
1018
1019         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1020             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1021             # see http://bugs.python.org/issue1646728)
1022             try:
1023                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1024                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1025             except (ValueError, OverflowError, OSError):
1026                 pass
1027
1028         if self.params.get('listsubtitles', False):
1029             if 'automatic_captions' in info_dict:
1030                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1031             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1032             return
1033         info_dict['requested_subtitles'] = self.process_subtitles(
1034             info_dict['id'], info_dict.get('subtitles'),
1035             info_dict.get('automatic_captions'))
1036
1037         # This extractors handle format selection themselves
1038         if info_dict['extractor'] in ['Youku']:
1039             if download:
1040                 self.process_info(info_dict)
1041             return info_dict
1042
1043         # We now pick which formats have to be downloaded
1044         if info_dict.get('formats') is None:
1045             # There's only one format available
1046             formats = [info_dict]
1047         else:
1048             formats = info_dict['formats']
1049
1050         if not formats:
1051             raise ExtractorError('No video formats found!')
1052
1053         formats_dict = {}
1054
1055         # We check that all the formats have the format and format_id fields
1056         for i, format in enumerate(formats):
1057             if 'url' not in format:
1058                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1059
1060             if format.get('format_id') is None:
1061                 format['format_id'] = compat_str(i)
1062             format_id = format['format_id']
1063             if format_id not in formats_dict:
1064                 formats_dict[format_id] = []
1065             formats_dict[format_id].append(format)
1066
1067         # Make sure all formats have unique format_id
1068         for format_id, ambiguous_formats in formats_dict.items():
1069             if len(ambiguous_formats) > 1:
1070                 for i, format in enumerate(ambiguous_formats):
1071                     format['format_id'] = '%s-%d' % (format_id, i)
1072
1073         for i, format in enumerate(formats):
1074             if format.get('format') is None:
1075                 format['format'] = '{id} - {res}{note}'.format(
1076                     id=format['format_id'],
1077                     res=self.format_resolution(format),
1078                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1079                 )
1080             # Automatically determine file extension if missing
1081             if 'ext' not in format:
1082                 format['ext'] = determine_ext(format['url']).lower()
1083             # Add HTTP headers, so that external programs can use them from the
1084             # json output
1085             full_format_info = info_dict.copy()
1086             full_format_info.update(format)
1087             format['http_headers'] = self._calc_headers(full_format_info)
1088
1089         # TODO Central sorting goes here
1090
1091         if formats[0] is not info_dict:
1092             # only set the 'formats' fields if the original info_dict list them
1093             # otherwise we end up with a circular reference, the first (and unique)
1094             # element in the 'formats' field in info_dict is info_dict itself,
1095             # wich can't be exported to json
1096             info_dict['formats'] = formats
1097         if self.params.get('listformats'):
1098             self.list_formats(info_dict)
1099             return
1100         if self.params.get('list_thumbnails'):
1101             self.list_thumbnails(info_dict)
1102             return
1103
1104         req_format = self.params.get('format')
1105         if req_format is None:
1106             req_format_list = []
1107             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1108                     info_dict['extractor'] in ['youtube', 'ted']):
1109                 merger = FFmpegMergerPP(self)
1110                 if merger.available and merger.can_merge():
1111                     req_format_list.append('bestvideo+bestaudio')
1112             req_format_list.append('best')
1113             req_format = '/'.join(req_format_list)
1114         formats_to_download = []
1115         if req_format == 'all':
1116             formats_to_download = formats
1117         else:
1118             for rfstr in req_format.split(','):
1119                 # We can accept formats requested in the format: 34/5/best, we pick
1120                 # the first that is available, starting from left
1121                 req_formats = rfstr.split('/')
1122                 for rf in req_formats:
1123                     if re.match(r'.+?\+.+?', rf) is not None:
1124                         # Two formats have been requested like '137+139'
1125                         format_1, format_2 = rf.split('+')
1126                         formats_info = (self.select_format(format_1, formats),
1127                                         self.select_format(format_2, formats))
1128                         if all(formats_info):
1129                             # The first format must contain the video and the
1130                             # second the audio
1131                             if formats_info[0].get('vcodec') == 'none':
1132                                 self.report_error('The first format must '
1133                                                   'contain the video, try using '
1134                                                   '"-f %s+%s"' % (format_2, format_1))
1135                                 return
1136                             output_ext = (
1137                                 formats_info[0]['ext']
1138                                 if self.params.get('merge_output_format') is None
1139                                 else self.params['merge_output_format'])
1140                             selected_format = {
1141                                 'requested_formats': formats_info,
1142                                 'format': '%s+%s' % (formats_info[0].get('format'),
1143                                                      formats_info[1].get('format')),
1144                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1145                                                         formats_info[1].get('format_id')),
1146                                 'width': formats_info[0].get('width'),
1147                                 'height': formats_info[0].get('height'),
1148                                 'resolution': formats_info[0].get('resolution'),
1149                                 'fps': formats_info[0].get('fps'),
1150                                 'vcodec': formats_info[0].get('vcodec'),
1151                                 'vbr': formats_info[0].get('vbr'),
1152                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1153                                 'acodec': formats_info[1].get('acodec'),
1154                                 'abr': formats_info[1].get('abr'),
1155                                 'ext': output_ext,
1156                             }
1157                         else:
1158                             selected_format = None
1159                     else:
1160                         selected_format = self.select_format(rf, formats)
1161                     if selected_format is not None:
1162                         formats_to_download.append(selected_format)
1163                         break
1164         if not formats_to_download:
1165             raise ExtractorError('requested format not available',
1166                                  expected=True)
1167
1168         if download:
1169             if len(formats_to_download) > 1:
1170                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1171             for format in formats_to_download:
1172                 new_info = dict(info_dict)
1173                 new_info.update(format)
1174                 self.process_info(new_info)
1175         # We update the info dict with the best quality format (backwards compatibility)
1176         info_dict.update(formats_to_download[-1])
1177         return info_dict
1178
1179     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1180         """Select the requested subtitles and their format"""
1181         available_subs = {}
1182         if normal_subtitles and self.params.get('writesubtitles'):
1183             available_subs.update(normal_subtitles)
1184         if automatic_captions and self.params.get('writeautomaticsub'):
1185             for lang, cap_info in automatic_captions.items():
1186                 if lang not in available_subs:
1187                     available_subs[lang] = cap_info
1188
1189         if (not self.params.get('writesubtitles') and not
1190                 self.params.get('writeautomaticsub') or not
1191                 available_subs):
1192             return None
1193
1194         if self.params.get('allsubtitles', False):
1195             requested_langs = available_subs.keys()
1196         else:
1197             if self.params.get('subtitleslangs', False):
1198                 requested_langs = self.params.get('subtitleslangs')
1199             elif 'en' in available_subs:
1200                 requested_langs = ['en']
1201             else:
1202                 requested_langs = [list(available_subs.keys())[0]]
1203
1204         formats_query = self.params.get('subtitlesformat', 'best')
1205         formats_preference = formats_query.split('/') if formats_query else []
1206         subs = {}
1207         for lang in requested_langs:
1208             formats = available_subs.get(lang)
1209             if formats is None:
1210                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1211                 continue
1212             for ext in formats_preference:
1213                 if ext == 'best':
1214                     f = formats[-1]
1215                     break
1216                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1217                 if matches:
1218                     f = matches[-1]
1219                     break
1220             else:
1221                 f = formats[-1]
1222                 self.report_warning(
1223                     'No subtitle format found matching "%s" for language %s, '
1224                     'using %s' % (formats_query, lang, f['ext']))
1225             subs[lang] = f
1226         return subs
1227
1228     def process_info(self, info_dict):
1229         """Process a single resolved IE result."""
1230
1231         assert info_dict.get('_type', 'video') == 'video'
1232
1233         max_downloads = self.params.get('max_downloads')
1234         if max_downloads is not None:
1235             if self._num_downloads >= int(max_downloads):
1236                 raise MaxDownloadsReached()
1237
1238         info_dict['fulltitle'] = info_dict['title']
1239         if len(info_dict['title']) > 200:
1240             info_dict['title'] = info_dict['title'][:197] + '...'
1241
1242         if 'format' not in info_dict:
1243             info_dict['format'] = info_dict['ext']
1244
1245         reason = self._match_entry(info_dict, incomplete=False)
1246         if reason is not None:
1247             self.to_screen('[download] ' + reason)
1248             return
1249
1250         self._num_downloads += 1
1251
1252         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1253
1254         # Forced printings
1255         if self.params.get('forcetitle', False):
1256             self.to_stdout(info_dict['fulltitle'])
1257         if self.params.get('forceid', False):
1258             self.to_stdout(info_dict['id'])
1259         if self.params.get('forceurl', False):
1260             if info_dict.get('requested_formats') is not None:
1261                 for f in info_dict['requested_formats']:
1262                     self.to_stdout(f['url'] + f.get('play_path', ''))
1263             else:
1264                 # For RTMP URLs, also include the playpath
1265                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1266         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1267             self.to_stdout(info_dict['thumbnail'])
1268         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1269             self.to_stdout(info_dict['description'])
1270         if self.params.get('forcefilename', False) and filename is not None:
1271             self.to_stdout(filename)
1272         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1273             self.to_stdout(formatSeconds(info_dict['duration']))
1274         if self.params.get('forceformat', False):
1275             self.to_stdout(info_dict['format'])
1276         if self.params.get('forcejson', False):
1277             self.to_stdout(json.dumps(info_dict))
1278
1279         # Do nothing else if in simulate mode
1280         if self.params.get('simulate', False):
1281             return
1282
1283         if filename is None:
1284             return
1285
1286         try:
1287             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1288             if dn and not os.path.exists(dn):
1289                 os.makedirs(dn)
1290         except (OSError, IOError) as err:
1291             self.report_error('unable to create directory ' + compat_str(err))
1292             return
1293
1294         if self.params.get('writedescription', False):
1295             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1296             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1297                 self.to_screen('[info] Video description is already present')
1298             elif info_dict.get('description') is None:
1299                 self.report_warning('There\'s no description to write.')
1300             else:
1301                 try:
1302                     self.to_screen('[info] Writing video description to: ' + descfn)
1303                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1304                         descfile.write(info_dict['description'])
1305                 except (OSError, IOError):
1306                     self.report_error('Cannot write description file ' + descfn)
1307                     return
1308
1309         if self.params.get('writeannotations', False):
1310             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1311             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1312                 self.to_screen('[info] Video annotations are already present')
1313             else:
1314                 try:
1315                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1316                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1317                         annofile.write(info_dict['annotations'])
1318                 except (KeyError, TypeError):
1319                     self.report_warning('There are no annotations to write.')
1320                 except (OSError, IOError):
1321                     self.report_error('Cannot write annotations file: ' + annofn)
1322                     return
1323
1324         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1325                                        self.params.get('writeautomaticsub')])
1326
1327         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1328             # subtitles download errors are already managed as troubles in relevant IE
1329             # that way it will silently go on when used with unsupporting IE
1330             subtitles = info_dict['requested_subtitles']
1331             ie = self.get_info_extractor(info_dict['extractor_key'])
1332             for sub_lang, sub_info in subtitles.items():
1333                 sub_format = sub_info['ext']
1334                 if sub_info.get('data') is not None:
1335                     sub_data = sub_info['data']
1336                 else:
1337                     try:
1338                         sub_data = ie._download_webpage(
1339                             sub_info['url'], info_dict['id'], note=False)
1340                     except ExtractorError as err:
1341                         self.report_warning('Unable to download subtitle for "%s": %s' %
1342                                             (sub_lang, compat_str(err.cause)))
1343                         continue
1344                 try:
1345                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1346                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1347                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1348                     else:
1349                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1350                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1351                             subfile.write(sub_data)
1352                 except (OSError, IOError):
1353                     self.report_error('Cannot write subtitles file ' + sub_filename)
1354                     return
1355
1356         if self.params.get('writeinfojson', False):
1357             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1358             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1359                 self.to_screen('[info] Video description metadata is already present')
1360             else:
1361                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1362                 try:
1363                     write_json_file(self.filter_requested_info(info_dict), infofn)
1364                 except (OSError, IOError):
1365                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1366                     return
1367
1368         self._write_thumbnails(info_dict, filename)
1369
1370         if not self.params.get('skip_download', False):
1371             try:
1372                 def dl(name, info):
1373                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1374                     for ph in self._progress_hooks:
1375                         fd.add_progress_hook(ph)
1376                     if self.params.get('verbose'):
1377                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1378                     return fd.download(name, info)
1379
1380                 if info_dict.get('requested_formats') is not None:
1381                     downloaded = []
1382                     success = True
1383                     merger = FFmpegMergerPP(self)
1384                     if not merger.available:
1385                         postprocessors = []
1386                         self.report_warning('You have requested multiple '
1387                                             'formats but ffmpeg or avconv are not installed.'
1388                                             ' The formats won\'t be merged.')
1389                     else:
1390                         postprocessors = [merger]
1391
1392                     def compatible_formats(formats):
1393                         video, audio = formats
1394                         # Check extension
1395                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1396                         if video_ext and audio_ext:
1397                             COMPATIBLE_EXTS = (
1398                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1399                                 ('webm')
1400                             )
1401                             for exts in COMPATIBLE_EXTS:
1402                                 if video_ext in exts and audio_ext in exts:
1403                                     return True
1404                         # TODO: Check acodec/vcodec
1405                         return False
1406
1407                     filename_real_ext = os.path.splitext(filename)[1][1:]
1408                     filename_wo_ext = (
1409                         os.path.splitext(filename)[0]
1410                         if filename_real_ext == info_dict['ext']
1411                         else filename)
1412                     requested_formats = info_dict['requested_formats']
1413                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1414                         info_dict['ext'] = 'mkv'
1415                         self.report_warning(
1416                             'Requested formats are incompatible for merge and will be merged into mkv.')
1417                     # Ensure filename always has a correct extension for successful merge
1418                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1419                     if os.path.exists(encodeFilename(filename)):
1420                         self.to_screen(
1421                             '[download] %s has already been downloaded and '
1422                             'merged' % filename)
1423                     else:
1424                         for f in requested_formats:
1425                             new_info = dict(info_dict)
1426                             new_info.update(f)
1427                             fname = self.prepare_filename(new_info)
1428                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1429                             downloaded.append(fname)
1430                             partial_success = dl(fname, new_info)
1431                             success = success and partial_success
1432                         info_dict['__postprocessors'] = postprocessors
1433                         info_dict['__files_to_merge'] = downloaded
1434                 else:
1435                     # Just a single file
1436                     success = dl(filename, info_dict)
1437             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1438                 self.report_error('unable to download video data: %s' % str(err))
1439                 return
1440             except (OSError, IOError) as err:
1441                 raise UnavailableVideoError(err)
1442             except (ContentTooShortError, ) as err:
1443                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1444                 return
1445
1446             if success:
1447                 # Fixup content
1448                 fixup_policy = self.params.get('fixup')
1449                 if fixup_policy is None:
1450                     fixup_policy = 'detect_or_warn'
1451
1452                 stretched_ratio = info_dict.get('stretched_ratio')
1453                 if stretched_ratio is not None and stretched_ratio != 1:
1454                     if fixup_policy == 'warn':
1455                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1456                             info_dict['id'], stretched_ratio))
1457                     elif fixup_policy == 'detect_or_warn':
1458                         stretched_pp = FFmpegFixupStretchedPP(self)
1459                         if stretched_pp.available:
1460                             info_dict.setdefault('__postprocessors', [])
1461                             info_dict['__postprocessors'].append(stretched_pp)
1462                         else:
1463                             self.report_warning(
1464                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1465                                     info_dict['id'], stretched_ratio))
1466                     else:
1467                         assert fixup_policy in ('ignore', 'never')
1468
1469                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1470                     if fixup_policy == 'warn':
1471                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1472                             info_dict['id']))
1473                     elif fixup_policy == 'detect_or_warn':
1474                         fixup_pp = FFmpegFixupM4aPP(self)
1475                         if fixup_pp.available:
1476                             info_dict.setdefault('__postprocessors', [])
1477                             info_dict['__postprocessors'].append(fixup_pp)
1478                         else:
1479                             self.report_warning(
1480                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1481                                     info_dict['id']))
1482                     else:
1483                         assert fixup_policy in ('ignore', 'never')
1484
1485                 try:
1486                     self.post_process(filename, info_dict)
1487                 except (PostProcessingError) as err:
1488                     self.report_error('postprocessing: %s' % str(err))
1489                     return
1490                 self.record_download_archive(info_dict)
1491
1492     def download(self, url_list):
1493         """Download a given list of URLs."""
1494         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1495         if (len(url_list) > 1 and
1496                 '%' not in outtmpl and
1497                 self.params.get('max_downloads') != 1):
1498             raise SameFileError(outtmpl)
1499
1500         for url in url_list:
1501             try:
1502                 # It also downloads the videos
1503                 res = self.extract_info(url)
1504             except UnavailableVideoError:
1505                 self.report_error('unable to download video')
1506             except MaxDownloadsReached:
1507                 self.to_screen('[info] Maximum number of downloaded files reached.')
1508                 raise
1509             else:
1510                 if self.params.get('dump_single_json', False):
1511                     self.to_stdout(json.dumps(res))
1512
1513         return self._download_retcode
1514
1515     def download_with_info_file(self, info_filename):
1516         with contextlib.closing(fileinput.FileInput(
1517                 [info_filename], mode='r',
1518                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1519             # FileInput doesn't have a read method, we can't call json.load
1520             info = self.filter_requested_info(json.loads('\n'.join(f)))
1521         try:
1522             self.process_ie_result(info, download=True)
1523         except DownloadError:
1524             webpage_url = info.get('webpage_url')
1525             if webpage_url is not None:
1526                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1527                 return self.download([webpage_url])
1528             else:
1529                 raise
1530         return self._download_retcode
1531
1532     @staticmethod
1533     def filter_requested_info(info_dict):
1534         return dict(
1535             (k, v) for k, v in info_dict.items()
1536             if k not in ['requested_formats', 'requested_subtitles'])
1537
1538     def post_process(self, filename, ie_info):
1539         """Run all the postprocessors on the given file."""
1540         info = dict(ie_info)
1541         info['filepath'] = filename
1542         pps_chain = []
1543         if ie_info.get('__postprocessors') is not None:
1544             pps_chain.extend(ie_info['__postprocessors'])
1545         pps_chain.extend(self._pps)
1546         for pp in pps_chain:
1547             files_to_delete = []
1548             try:
1549                 files_to_delete, info = pp.run(info)
1550             except PostProcessingError as e:
1551                 self.report_error(e.msg)
1552             if files_to_delete and not self.params.get('keepvideo', False):
1553                 for old_filename in files_to_delete:
1554                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1555                     try:
1556                         os.remove(encodeFilename(old_filename))
1557                     except (IOError, OSError):
1558                         self.report_warning('Unable to remove downloaded original file')
1559
1560     def _make_archive_id(self, info_dict):
1561         # Future-proof against any change in case
1562         # and backwards compatibility with prior versions
1563         extractor = info_dict.get('extractor_key')
1564         if extractor is None:
1565             if 'id' in info_dict:
1566                 extractor = info_dict.get('ie_key')  # key in a playlist
1567         if extractor is None:
1568             return None  # Incomplete video information
1569         return extractor.lower() + ' ' + info_dict['id']
1570
1571     def in_download_archive(self, info_dict):
1572         fn = self.params.get('download_archive')
1573         if fn is None:
1574             return False
1575
1576         vid_id = self._make_archive_id(info_dict)
1577         if vid_id is None:
1578             return False  # Incomplete video information
1579
1580         try:
1581             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1582                 for line in archive_file:
1583                     if line.strip() == vid_id:
1584                         return True
1585         except IOError as ioe:
1586             if ioe.errno != errno.ENOENT:
1587                 raise
1588         return False
1589
1590     def record_download_archive(self, info_dict):
1591         fn = self.params.get('download_archive')
1592         if fn is None:
1593             return
1594         vid_id = self._make_archive_id(info_dict)
1595         assert vid_id
1596         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1597             archive_file.write(vid_id + '\n')
1598
1599     @staticmethod
1600     def format_resolution(format, default='unknown'):
1601         if format.get('vcodec') == 'none':
1602             return 'audio only'
1603         if format.get('resolution') is not None:
1604             return format['resolution']
1605         if format.get('height') is not None:
1606             if format.get('width') is not None:
1607                 res = '%sx%s' % (format['width'], format['height'])
1608             else:
1609                 res = '%sp' % format['height']
1610         elif format.get('width') is not None:
1611             res = '?x%d' % format['width']
1612         else:
1613             res = default
1614         return res
1615
1616     def _format_note(self, fdict):
1617         res = ''
1618         if fdict.get('ext') in ['f4f', 'f4m']:
1619             res += '(unsupported) '
1620         if fdict.get('format_note') is not None:
1621             res += fdict['format_note'] + ' '
1622         if fdict.get('tbr') is not None:
1623             res += '%4dk ' % fdict['tbr']
1624         if fdict.get('container') is not None:
1625             if res:
1626                 res += ', '
1627             res += '%s container' % fdict['container']
1628         if (fdict.get('vcodec') is not None and
1629                 fdict.get('vcodec') != 'none'):
1630             if res:
1631                 res += ', '
1632             res += fdict['vcodec']
1633             if fdict.get('vbr') is not None:
1634                 res += '@'
1635         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1636             res += 'video@'
1637         if fdict.get('vbr') is not None:
1638             res += '%4dk' % fdict['vbr']
1639         if fdict.get('fps') is not None:
1640             res += ', %sfps' % fdict['fps']
1641         if fdict.get('acodec') is not None:
1642             if res:
1643                 res += ', '
1644             if fdict['acodec'] == 'none':
1645                 res += 'video only'
1646             else:
1647                 res += '%-5s' % fdict['acodec']
1648         elif fdict.get('abr') is not None:
1649             if res:
1650                 res += ', '
1651             res += 'audio'
1652         if fdict.get('abr') is not None:
1653             res += '@%3dk' % fdict['abr']
1654         if fdict.get('asr') is not None:
1655             res += ' (%5dHz)' % fdict['asr']
1656         if fdict.get('filesize') is not None:
1657             if res:
1658                 res += ', '
1659             res += format_bytes(fdict['filesize'])
1660         elif fdict.get('filesize_approx') is not None:
1661             if res:
1662                 res += ', '
1663             res += '~' + format_bytes(fdict['filesize_approx'])
1664         return res
1665
1666     def list_formats(self, info_dict):
1667         formats = info_dict.get('formats', [info_dict])
1668         table = [
1669             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1670             for f in formats
1671             if f.get('preference') is None or f['preference'] >= -1000]
1672         if len(formats) > 1:
1673             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1674
1675         header_line = ['format code', 'extension', 'resolution', 'note']
1676         self.to_screen(
1677             '[info] Available formats for %s:\n%s' %
1678             (info_dict['id'], render_table(header_line, table)))
1679
1680     def list_thumbnails(self, info_dict):
1681         thumbnails = info_dict.get('thumbnails')
1682         if not thumbnails:
1683             tn_url = info_dict.get('thumbnail')
1684             if tn_url:
1685                 thumbnails = [{'id': '0', 'url': tn_url}]
1686             else:
1687                 self.to_screen(
1688                     '[info] No thumbnails present for %s' % info_dict['id'])
1689                 return
1690
1691         self.to_screen(
1692             '[info] Thumbnails for %s:' % info_dict['id'])
1693         self.to_screen(render_table(
1694             ['ID', 'width', 'height', 'URL'],
1695             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1696
1697     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1698         if not subtitles:
1699             self.to_screen('%s has no %s' % (video_id, name))
1700             return
1701         self.to_screen(
1702             'Available %s for %s:' % (name, video_id))
1703         self.to_screen(render_table(
1704             ['Language', 'formats'],
1705             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1706                 for lang, formats in subtitles.items()]))
1707
1708     def urlopen(self, req):
1709         """ Start an HTTP download """
1710
1711         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1712         # always respected by websites, some tend to give out URLs with non percent-encoded
1713         # non-ASCII characters (see telemb.py, ard.py [#3412])
1714         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1715         # To work around aforementioned issue we will replace request's original URL with
1716         # percent-encoded one
1717         req_is_string = isinstance(req, compat_basestring)
1718         url = req if req_is_string else req.get_full_url()
1719         url_escaped = escape_url(url)
1720
1721         # Substitute URL if any change after escaping
1722         if url != url_escaped:
1723             if req_is_string:
1724                 req = url_escaped
1725             else:
1726                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1727                 req = req_type(
1728                     url_escaped, data=req.data, headers=req.headers,
1729                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1730
1731         return self._opener.open(req, timeout=self._socket_timeout)
1732
1733     def print_debug_header(self):
1734         if not self.params.get('verbose'):
1735             return
1736
1737         if type('') is not compat_str:
1738             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1739             self.report_warning(
1740                 'Your Python is broken! Update to a newer and supported version')
1741
1742         stdout_encoding = getattr(
1743             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1744         encoding_str = (
1745             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1746                 locale.getpreferredencoding(),
1747                 sys.getfilesystemencoding(),
1748                 stdout_encoding,
1749                 self.get_encoding()))
1750         write_string(encoding_str, encoding=None)
1751
1752         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1753         try:
1754             sp = subprocess.Popen(
1755                 ['git', 'rev-parse', '--short', 'HEAD'],
1756                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1757                 cwd=os.path.dirname(os.path.abspath(__file__)))
1758             out, err = sp.communicate()
1759             out = out.decode().strip()
1760             if re.match('[0-9a-f]+', out):
1761                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1762         except Exception:
1763             try:
1764                 sys.exc_clear()
1765             except Exception:
1766                 pass
1767         self._write_string('[debug] Python version %s - %s\n' % (
1768             platform.python_version(), platform_name()))
1769
1770         exe_versions = FFmpegPostProcessor.get_versions(self)
1771         exe_versions['rtmpdump'] = rtmpdump_version()
1772         exe_str = ', '.join(
1773             '%s %s' % (exe, v)
1774             for exe, v in sorted(exe_versions.items())
1775             if v
1776         )
1777         if not exe_str:
1778             exe_str = 'none'
1779         self._write_string('[debug] exe versions: %s\n' % exe_str)
1780
1781         proxy_map = {}
1782         for handler in self._opener.handlers:
1783             if hasattr(handler, 'proxies'):
1784                 proxy_map.update(handler.proxies)
1785         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1786
1787         if self.params.get('call_home', False):
1788             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1789             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1790             latest_version = self.urlopen(
1791                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1792             if version_tuple(latest_version) > version_tuple(__version__):
1793                 self.report_warning(
1794                     'You are using an outdated version (newest version: %s)! '
1795                     'See https://yt-dl.org/update if you need help updating.' %
1796                     latest_version)
1797
1798     def _setup_opener(self):
1799         timeout_val = self.params.get('socket_timeout')
1800         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1801
1802         opts_cookiefile = self.params.get('cookiefile')
1803         opts_proxy = self.params.get('proxy')
1804
1805         if opts_cookiefile is None:
1806             self.cookiejar = compat_cookiejar.CookieJar()
1807         else:
1808             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1809                 opts_cookiefile)
1810             if os.access(opts_cookiefile, os.R_OK):
1811                 self.cookiejar.load()
1812
1813         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1814             self.cookiejar)
1815         if opts_proxy is not None:
1816             if opts_proxy == '':
1817                 proxies = {}
1818             else:
1819                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1820         else:
1821             proxies = compat_urllib_request.getproxies()
1822             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1823             if 'http' in proxies and 'https' not in proxies:
1824                 proxies['https'] = proxies['http']
1825         proxy_handler = PerRequestProxyHandler(proxies)
1826
1827         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1828         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1829         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1830         opener = compat_urllib_request.build_opener(
1831             proxy_handler, https_handler, cookie_processor, ydlh)
1832
1833         # Delete the default user-agent header, which would otherwise apply in
1834         # cases where our custom HTTP handler doesn't come into play
1835         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1836         opener.addheaders = []
1837         self._opener = opener
1838
1839     def encode(self, s):
1840         if isinstance(s, bytes):
1841             return s  # Already encoded
1842
1843         try:
1844             return s.encode(self.get_encoding())
1845         except UnicodeEncodeError as err:
1846             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1847             raise
1848
1849     def get_encoding(self):
1850         encoding = self.params.get('encoding')
1851         if encoding is None:
1852             encoding = preferredencoding()
1853         return encoding
1854
1855     def _write_thumbnails(self, info_dict, filename):
1856         if self.params.get('writethumbnail', False):
1857             thumbnails = info_dict.get('thumbnails')
1858             if thumbnails:
1859                 thumbnails = [thumbnails[-1]]
1860         elif self.params.get('write_all_thumbnails', False):
1861             thumbnails = info_dict.get('thumbnails')
1862         else:
1863             return
1864
1865         if not thumbnails:
1866             # No thumbnails present, so return immediately
1867             return
1868
1869         for t in thumbnails:
1870             thumb_ext = determine_ext(t['url'], 'jpg')
1871             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1872             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1873             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1874
1875             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1876                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1877                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1878             else:
1879                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1880                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1881                 try:
1882                     uf = self.urlopen(t['url'])
1883                     with open(thumb_filename, 'wb') as thumbf:
1884                         shutil.copyfileobj(uf, thumbf)
1885                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1886                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1887                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1888                     self.report_warning('Unable to download thumbnail "%s": %s' %
1889                                         (t['url'], compat_str(err)))