youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import traceback
  25
  26 if os.name == 'nt':
  27     import ctypes
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_expanduser,
  33     compat_get_terminal_size,
  34     compat_http_client,
  35     compat_kwargs,
  36     compat_str,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39 )
  40 from .utils import (
  41     escape_url,
  42     ContentTooShortError,
  43     date_from_str,
  44     DateRange,
  45     DEFAULT_OUTTMPL,
  46     determine_ext,
  47     DownloadError,
  48     encodeFilename,
  49     ExtractorError,
  50     format_bytes,
  51     formatSeconds,
  52     HEADRequest,
  53     locked_file,
  54     make_HTTPS_handler,
  55     MaxDownloadsReached,
  56     PagedList,
  57     parse_filesize,
  58     PerRequestProxyHandler,
  59     PostProcessingError,
  60     platform_name,
  61     preferredencoding,
  62     render_table,
  63     SameFileError,
  64     sanitize_filename,
  65     sanitize_path,
  66     std_headers,
  67     subtitles_filename,
  68     UnavailableVideoError,
  69     url_basename,
  70     version_tuple,
  71     write_json_file,
  72     write_string,
  73     YoutubeDLHandler,
  74     prepend_extension,
  75     replace_extension,
  76     args_to_str,
  77     age_restricted,
  78 )
  79 from .cache import Cache
  80 from .extractor import get_info_extractor, gen_extractors
  81 from .downloader import get_suitable_downloader
  82 from .downloader.rtmp import rtmpdump_version
  83 from .postprocessor import (
  84     FFmpegFixupM4aPP,
  85     FFmpegFixupStretchedPP,
  86     FFmpegMergerPP,
  87     FFmpegPostProcessor,
  88     get_postprocessor,
  89 )
  90 from .version import __version__
  91
  92
  93 class YoutubeDL(object):
  94     """YoutubeDL class.
  95
  96     YoutubeDL objects are the ones responsible of downloading the
  97     actual video file and writing it to disk if the user has requested
  98     it, among some other tasks. In most cases there should be one per
  99     program. As, given a video URL, the downloader doesn't know how to
 100     extract all the needed information, task that InfoExtractors do, it
 101     has to pass the URL to one of them.
 102
 103     For this, YoutubeDL objects have a method that allows
 104     InfoExtractors to be registered in a given order. When it is passed
 105     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 106     finds that reports being able to handle it. The InfoExtractor extracts
 107     all the information about the video or videos the URL refers to, and
 108     YoutubeDL process the extracted information, possibly using a File
 109     Downloader to download the video.
 110
 111     YoutubeDL objects accept a lot of parameters. In order not to saturate
 112     the object constructor with arguments, it receives a dictionary of
 113     options instead. These options are available through the params
 114     attribute for the InfoExtractors to use. The YoutubeDL also
 115     registers itself as the downloader in charge for the InfoExtractors
 116     that are added to it, so this is a "mutual registration".
 117
 118     Available options:
 119
 120     username:          Username for authentication purposes.
 121     password:          Password for authentication purposes.
 122     videopassword:     Password for accessing a video.
 123     usenetrc:          Use netrc for authentication instead.
 124     verbose:           Print additional info to stdout.
 125     quiet:             Do not print messages to stdout.
 126     no_warnings:       Do not print out anything for warnings.
 127     forceurl:          Force printing final URL.
 128     forcetitle:        Force printing title.
 129     forceid:           Force printing ID.
 130     forcethumbnail:    Force printing thumbnail URL.
 131     forcedescription:  Force printing description.
 132     forcefilename:     Force printing final filename.
 133     forceduration:     Force printing duration.
 134     forcejson:         Force printing info_dict as JSON.
 135     dump_single_json:  Force printing the info_dict of the whole playlist
 136                        (or video) as a single JSON line.
 137     simulate:          Do not download the video files.
 138     format:            Video format code. See options.py for more information.
 139     outtmpl:           Template for output names.
 140     restrictfilenames: Do not allow "&" and spaces in file names
 141     ignoreerrors:      Do not stop on download errors.
 142     nooverwrites:      Prevent overwriting files.
 143     playliststart:     Playlist item to start at.
 144     playlistend:       Playlist item to end at.
 145     playlist_items:    Specific indices of playlist to download.
 146     playlistreverse:   Download playlist items in reverse order.
 147     matchtitle:        Download only matching titles.
 148     rejecttitle:       Reject downloads for matching titles.
 149     logger:            Log messages to a logging.Logger instance.
 150     logtostderr:       Log messages to stderr instead of stdout.
 151     writedescription:  Write the video description to a .description file
 152     writeinfojson:     Write the video description to a .info.json file
 153     writeannotations:  Write the video annotations to a .annotations.xml file
 154     writethumbnail:    Write the thumbnail image to a file
 155     write_all_thumbnails:  Write all thumbnail formats to files
 156     writesubtitles:    Write the video subtitles to a file
 157     writeautomaticsub: Write the automatic subtitles to a file
 158     allsubtitles:      Downloads all the subtitles of the video
 159                        (requires writesubtitles or writeautomaticsub)
 160     listsubtitles:     Lists all available subtitles for the video
 161     subtitlesformat:   The format code for subtitles
 162     subtitleslangs:    List of languages of the subtitles to download
 163     keepvideo:         Keep the video file after post-processing
 164     daterange:         A DateRange object, download only if the upload_date is in the range.
 165     skip_download:     Skip the actual download of the video file
 166     cachedir:          Location of the cache files in the filesystem.
 167                        False to disable filesystem cache.
 168     noplaylist:        Download single video instead of a playlist if in doubt.
 169     age_limit:         An integer representing the user's age in years.
 170                        Unsuitable videos for the given age are skipped.
 171     min_views:         An integer representing the minimum view count the video
 172                        must have in order to not be skipped.
 173                        Videos without view count information are always
 174                        downloaded. None for no limit.
 175     max_views:         An integer representing the maximum view count.
 176                        Videos that are more popular than that are not
 177                        downloaded.
 178                        Videos without view count information are always
 179                        downloaded. None for no limit.
 180     download_archive:  File name of a file where all downloads are recorded.
 181                        Videos already present in the file are not downloaded
 182                        again.
 183     cookiefile:        File name where cookies should be read from and dumped to.
 184     nocheckcertificate:Do not verify SSL certificates
 185     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 186                        At the moment, this is only supported by YouTube.
 187     proxy:             URL of the proxy server to use
 188     cn_verification_proxy:  URL of the proxy to use for IP address verification
 189                        on Chinese sites. (Experimental)
 190     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 191     bidi_workaround:   Work around buggy terminals without bidirectional text
 192                        support, using fridibi
 193     debug_printtraffic:Print out sent and received HTTP traffic
 194     include_ads:       Download ads as well
 195     default_search:    Prepend this string if an input url is not valid.
 196                        'auto' for elaborate guessing
 197     encoding:          Use this encoding instead of the system-specified.
 198     extract_flat:      Do not resolve URLs, return the immediate result.
 199                        Pass in 'in_playlist' to only show this behavior for
 200                        playlist items.
 201     postprocessors:    A list of dictionaries, each with an entry
 202                        * key:  The name of the postprocessor. See
 203                                youtube_dl/postprocessor/__init__.py for a list.
 204                        as well as any further keyword arguments for the
 205                        postprocessor.
 206     progress_hooks:    A list of functions that get called on download
 207                        progress, with a dictionary with the entries
 208                        * status: One of "downloading", "error", or "finished".
 209                                  Check this first and ignore unknown values.
 210
 211                        If status is one of "downloading", or "finished", the
 212                        following properties may also be present:
 213                        * filename: The final filename (always present)
 214                        * tmpfilename: The filename we're currently writing to
 215                        * downloaded_bytes: Bytes on disk
 216                        * total_bytes: Size of the whole file, None if unknown
 217                        * total_bytes_estimate: Guess of the eventual file size,
 218                                                None if unavailable.
 219                        * elapsed: The number of seconds since download started.
 220                        * eta: The estimated time in seconds, None if unknown
 221                        * speed: The download speed in bytes/second, None if
 222                                 unknown
 223                        * fragment_index: The counter of the currently
 224                                          downloaded video fragment.
 225                        * fragment_count: The number of fragments (= individual
 226                                          files that will be merged)
 227
 228                        Progress hooks are guaranteed to be called at least once
 229                        (with status "finished") if the download is successful.
 230     merge_output_format: Extension to use when merging formats.
 231     fixup:             Automatically correct known faults of the file.
 232                        One of:
 233                        - "never": do nothing
 234                        - "warn": only emit a warning
 235                        - "detect_or_warn": check whether we can do anything
 236                                            about it, warn otherwise (default)
 237     source_address:    (Experimental) Client-side IP address to bind to.
 238     call_home:         Boolean, true iff we are allowed to contact the
 239                        youtube-dl servers for debugging.
 240     sleep_interval:    Number of seconds to sleep before each download.
 241     listformats:       Print an overview of available video formats and exit.
 242     list_thumbnails:   Print a table of all thumbnails and exit.
 243     match_filter:      A function that gets called with the info_dict of
 244                        every video.
 245                        If it returns a message, the video is ignored.
 246                        If it returns None, the video is downloaded.
 247                        match_filter_func in utils.py is one example for this.
 248     no_color:          Do not emit color codes in output.
 249
 250     The following options determine which downloader is picked:
 251     external_downloader: Executable of the external downloader to call.
 252                        None or unset for standard (built-in) downloader.
 253     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 254
 255     The following parameters are not used by YoutubeDL itself, they are used by
 256     the downloader (see youtube_dl/downloader/common.py):
 257     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 258     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 259     xattr_set_filesize, external_downloader_args.
 260
 261     The following options are used by the post processors:
 262     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 263                        otherwise prefer avconv.
 264     """
 265
 266     params = None
 267     _ies = []
 268     _pps = []
 269     _download_retcode = None
 270     _num_downloads = None
 271     _screen_file = None
 272
 273     def __init__(self, params=None, auto_init=True):
 274         """Create a FileDownloader object with the given options."""
 275         if params is None:
 276             params = {}
 277         self._ies = []
 278         self._ies_instances = {}
 279         self._pps = []
 280         self._progress_hooks = []
 281         self._download_retcode = 0
 282         self._num_downloads = 0
 283         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 284         self._err_file = sys.stderr
 285         self.params = params
 286         self.cache = Cache(self)
 287
 288         if params.get('bidi_workaround', False):
 289             try:
 290                 import pty
 291                 master, slave = pty.openpty()
 292                 width = compat_get_terminal_size().columns
 293                 if width is None:
 294                     width_args = []
 295                 else:
 296                     width_args = ['-w', str(width)]
 297                 sp_kwargs = dict(
 298                     stdin=subprocess.PIPE,
 299                     stdout=slave,
 300                     stderr=self._err_file)
 301                 try:
 302                     self._output_process = subprocess.Popen(
 303                         ['bidiv'] + width_args, **sp_kwargs
 304                     )
 305                 except OSError:
 306                     self._output_process = subprocess.Popen(
 307                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 308                 self._output_channel = os.fdopen(master, 'rb')
 309             except OSError as ose:
 310                 if ose.errno == 2:
 311                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 312                 else:
 313                     raise
 314
 315         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 316                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 317                 not params.get('restrictfilenames', False)):
 318             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 319             self.report_warning(
 320                 'Assuming --restrict-filenames since file system encoding '
 321                 'cannot encode all characters. '
 322                 'Set the LC_ALL environment variable to fix this.')
 323             self.params['restrictfilenames'] = True
 324
 325         if isinstance(params.get('outtmpl'), bytes):
 326             self.report_warning(
 327                 'Parameter outtmpl is bytes, but should be a unicode string. '
 328                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 329
 330         self._setup_opener()
 331
 332         if auto_init:
 333             self.print_debug_header()
 334             self.add_default_info_extractors()
 335
 336         for pp_def_raw in self.params.get('postprocessors', []):
 337             pp_class = get_postprocessor(pp_def_raw['key'])
 338             pp_def = dict(pp_def_raw)
 339             del pp_def['key']
 340             pp = pp_class(self, **compat_kwargs(pp_def))
 341             self.add_post_processor(pp)
 342
 343         for ph in self.params.get('progress_hooks', []):
 344             self.add_progress_hook(ph)
 345
 346     def warn_if_short_id(self, argv):
 347         # short YouTube ID starting with dash?
 348         idxs = [
 349             i for i, a in enumerate(argv)
 350             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 351         if idxs:
 352             correct_argv = (
 353                 ['youtube-dl'] +
 354                 [a for i, a in enumerate(argv) if i not in idxs] +
 355                 ['--'] + [argv[i] for i in idxs]
 356             )
 357             self.report_warning(
 358                 'Long argument string detected. '
 359                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 360                 args_to_str(correct_argv))
 361
 362     def add_info_extractor(self, ie):
 363         """Add an InfoExtractor object to the end of the list."""
 364         self._ies.append(ie)
 365         self._ies_instances[ie.ie_key()] = ie
 366         ie.set_downloader(self)
 367
 368     def get_info_extractor(self, ie_key):
 369         """
 370         Get an instance of an IE with name ie_key, it will try to get one from
 371         the _ies list, if there's no instance it will create a new one and add
 372         it to the extractor list.
 373         """
 374         ie = self._ies_instances.get(ie_key)
 375         if ie is None:
 376             ie = get_info_extractor(ie_key)()
 377             self.add_info_extractor(ie)
 378         return ie
 379
 380     def add_default_info_extractors(self):
 381         """
 382         Add the InfoExtractors returned by gen_extractors to the end of the list
 383         """
 384         for ie in gen_extractors():
 385             self.add_info_extractor(ie)
 386
 387     def add_post_processor(self, pp):
 388         """Add a PostProcessor object to the end of the chain."""
 389         self._pps.append(pp)
 390         pp.set_downloader(self)
 391
 392     def add_progress_hook(self, ph):
 393         """Add the progress hook (currently only for the file downloader)"""
 394         self._progress_hooks.append(ph)
 395
 396     def _bidi_workaround(self, message):
 397         if not hasattr(self, '_output_channel'):
 398             return message
 399
 400         assert hasattr(self, '_output_process')
 401         assert isinstance(message, compat_str)
 402         line_count = message.count('\n') + 1
 403         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 404         self._output_process.stdin.flush()
 405         res = ''.join(self._output_channel.readline().decode('utf-8')
 406                       for _ in range(line_count))
 407         return res[:-len('\n')]
 408
 409     def to_screen(self, message, skip_eol=False):
 410         """Print message to stdout if not in quiet mode."""
 411         return self.to_stdout(message, skip_eol, check_quiet=True)
 412
 413     def _write_string(self, s, out=None):
 414         write_string(s, out=out, encoding=self.params.get('encoding'))
 415
 416     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 417         """Print message to stdout if not in quiet mode."""
 418         if self.params.get('logger'):
 419             self.params['logger'].debug(message)
 420         elif not check_quiet or not self.params.get('quiet', False):
 421             message = self._bidi_workaround(message)
 422             terminator = ['\n', ''][skip_eol]
 423             output = message + terminator
 424
 425             self._write_string(output, self._screen_file)
 426
 427     def to_stderr(self, message):
 428         """Print message to stderr."""
 429         assert isinstance(message, compat_str)
 430         if self.params.get('logger'):
 431             self.params['logger'].error(message)
 432         else:
 433             message = self._bidi_workaround(message)
 434             output = message + '\n'
 435             self._write_string(output, self._err_file)
 436
 437     def to_console_title(self, message):
 438         if not self.params.get('consoletitle', False):
 439             return
 440         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 441             # c_wchar_p() might not be necessary if `message` is
 442             # already of type unicode()
 443             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 444         elif 'TERM' in os.environ:
 445             self._write_string('\033]0;%s\007' % message, self._screen_file)
 446
 447     def save_console_title(self):
 448         if not self.params.get('consoletitle', False):
 449             return
 450         if 'TERM' in os.environ:
 451             # Save the title on stack
 452             self._write_string('\033[22;0t', self._screen_file)
 453
 454     def restore_console_title(self):
 455         if not self.params.get('consoletitle', False):
 456             return
 457         if 'TERM' in os.environ:
 458             # Restore the title from stack
 459             self._write_string('\033[23;0t', self._screen_file)
 460
 461     def __enter__(self):
 462         self.save_console_title()
 463         return self
 464
 465     def __exit__(self, *args):
 466         self.restore_console_title()
 467
 468         if self.params.get('cookiefile') is not None:
 469             self.cookiejar.save()
 470
 471     def trouble(self, message=None, tb=None):
 472         """Determine action to take when a download problem appears.
 473
 474         Depending on if the downloader has been configured to ignore
 475         download errors or not, this method may throw an exception or
 476         not when errors are found, after printing the message.
 477
 478         tb, if given, is additional traceback information.
 479         """
 480         if message is not None:
 481             self.to_stderr(message)
 482         if self.params.get('verbose'):
 483             if tb is None:
 484                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 485                     tb = ''
 486                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 487                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 488                     tb += compat_str(traceback.format_exc())
 489                 else:
 490                     tb_data = traceback.format_list(traceback.extract_stack())
 491                     tb = ''.join(tb_data)
 492             self.to_stderr(tb)
 493         if not self.params.get('ignoreerrors', False):
 494             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 495                 exc_info = sys.exc_info()[1].exc_info
 496             else:
 497                 exc_info = sys.exc_info()
 498             raise DownloadError(message, exc_info)
 499         self._download_retcode = 1
 500
 501     def report_warning(self, message):
 502         '''
 503         Print the message to stderr, it will be prefixed with 'WARNING:'
 504         If stderr is a tty file the 'WARNING:' will be colored
 505         '''
 506         if self.params.get('logger') is not None:
 507             self.params['logger'].warning(message)
 508         else:
 509             if self.params.get('no_warnings'):
 510                 return
 511             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 512                 _msg_header = '\033[0;33mWARNING:\033[0m'
 513             else:
 514                 _msg_header = 'WARNING:'
 515             warning_message = '%s %s' % (_msg_header, message)
 516             self.to_stderr(warning_message)
 517
 518     def report_error(self, message, tb=None):
 519         '''
 520         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 521         in red if stderr is a tty file.
 522         '''
 523         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 524             _msg_header = '\033[0;31mERROR:\033[0m'
 525         else:
 526             _msg_header = 'ERROR:'
 527         error_message = '%s %s' % (_msg_header, message)
 528         self.trouble(error_message, tb)
 529
 530     def report_file_already_downloaded(self, file_name):
 531         """Report file has already been fully downloaded."""
 532         try:
 533             self.to_screen('[download] %s has already been downloaded' % file_name)
 534         except UnicodeEncodeError:
 535             self.to_screen('[download] The file has already been downloaded')
 536
 537     def prepare_filename(self, info_dict):
 538         """Generate the output filename."""
 539         try:
 540             template_dict = dict(info_dict)
 541
 542             template_dict['epoch'] = int(time.time())
 543             autonumber_size = self.params.get('autonumber_size')
 544             if autonumber_size is None:
 545                 autonumber_size = 5
 546             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 547             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 548             if template_dict.get('playlist_index') is not None:
 549                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 550             if template_dict.get('resolution') is None:
 551                 if template_dict.get('width') and template_dict.get('height'):
 552                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 553                 elif template_dict.get('height'):
 554                     template_dict['resolution'] = '%sp' % template_dict['height']
 555                 elif template_dict.get('width'):
 556                     template_dict['resolution'] = '?x%d' % template_dict['width']
 557
 558             sanitize = lambda k, v: sanitize_filename(
 559                 compat_str(v),
 560                 restricted=self.params.get('restrictfilenames'),
 561                 is_id=(k == 'id'))
 562             template_dict = dict((k, sanitize(k, v))
 563                                  for k, v in template_dict.items()
 564                                  if v is not None)
 565             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 566
 567             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 568             tmpl = compat_expanduser(outtmpl)
 569             filename = tmpl % template_dict
 570             # Temporary fix for #4787
 571             # 'Treat' all problem characters by passing filename through preferredencoding
 572             # to workaround encoding issues with subprocess on python2 @ Windows
 573             if sys.version_info < (3, 0) and sys.platform == 'win32':
 574                 filename = encodeFilename(filename, True).decode(preferredencoding())
 575             return filename
 576         except ValueError as err:
 577             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 578             return None
 579
 580     def _match_entry(self, info_dict, incomplete):
 581         """ Returns None iff the file should be downloaded """
 582
 583         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 584         if 'title' in info_dict:
 585             # This can happen when we're just evaluating the playlist
 586             title = info_dict['title']
 587             matchtitle = self.params.get('matchtitle', False)
 588             if matchtitle:
 589                 if not re.search(matchtitle, title, re.IGNORECASE):
 590                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 591             rejecttitle = self.params.get('rejecttitle', False)
 592             if rejecttitle:
 593                 if re.search(rejecttitle, title, re.IGNORECASE):
 594                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 595         date = info_dict.get('upload_date', None)
 596         if date is not None:
 597             dateRange = self.params.get('daterange', DateRange())
 598             if date not in dateRange:
 599                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 600         view_count = info_dict.get('view_count', None)
 601         if view_count is not None:
 602             min_views = self.params.get('min_views')
 603             if min_views is not None and view_count < min_views:
 604                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 605             max_views = self.params.get('max_views')
 606             if max_views is not None and view_count > max_views:
 607                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 608         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 609             return 'Skipping "%s" because it is age restricted' % video_title
 610         if self.in_download_archive(info_dict):
 611             return '%s has already been recorded in archive' % video_title
 612
 613         if not incomplete:
 614             match_filter = self.params.get('match_filter')
 615             if match_filter is not None:
 616                 ret = match_filter(info_dict)
 617                 if ret is not None:
 618                     return ret
 619
 620         return None
 621
 622     @staticmethod
 623     def add_extra_info(info_dict, extra_info):
 624         '''Set the keys from extra_info in info dict if they are missing'''
 625         for key, value in extra_info.items():
 626             info_dict.setdefault(key, value)
 627
 628     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 629                      process=True):
 630         '''
 631         Returns a list with a dictionary for each video we find.
 632         If 'download', also downloads the videos.
 633         extra_info is a dict containing the extra values to add to each result
 634         '''
 635
 636         if ie_key:
 637             ies = [self.get_info_extractor(ie_key)]
 638         else:
 639             ies = self._ies
 640
 641         for ie in ies:
 642             if not ie.suitable(url):
 643                 continue
 644
 645             if not ie.working():
 646                 self.report_warning('The program functionality for this site has been marked as broken, '
 647                                     'and will probably not work.')
 648
 649             try:
 650                 ie_result = ie.extract(url)
 651                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 652                     break
 653                 if isinstance(ie_result, list):
 654                     # Backwards compatibility: old IE result format
 655                     ie_result = {
 656                         '_type': 'compat_list',
 657                         'entries': ie_result,
 658                     }
 659                 self.add_default_extra_info(ie_result, ie, url)
 660                 if process:
 661                     return self.process_ie_result(ie_result, download, extra_info)
 662                 else:
 663                     return ie_result
 664             except ExtractorError as de:  # An error we somewhat expected
 665                 self.report_error(compat_str(de), de.format_traceback())
 666                 break
 667             except MaxDownloadsReached:
 668                 raise
 669             except Exception as e:
 670                 if self.params.get('ignoreerrors', False):
 671                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 672                     break
 673                 else:
 674                     raise
 675         else:
 676             self.report_error('no suitable InfoExtractor for URL %s' % url)
 677
 678     def add_default_extra_info(self, ie_result, ie, url):
 679         self.add_extra_info(ie_result, {
 680             'extractor': ie.IE_NAME,
 681             'webpage_url': url,
 682             'webpage_url_basename': url_basename(url),
 683             'extractor_key': ie.ie_key(),
 684         })
 685
 686     def process_ie_result(self, ie_result, download=True, extra_info={}):
 687         """
 688         Take the result of the ie(may be modified) and resolve all unresolved
 689         references (URLs, playlist items).
 690
 691         It will also download the videos if 'download'.
 692         Returns the resolved ie_result.
 693         """
 694
 695         result_type = ie_result.get('_type', 'video')
 696
 697         if result_type in ('url', 'url_transparent'):
 698             extract_flat = self.params.get('extract_flat', False)
 699             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 700                     extract_flat is True):
 701                 if self.params.get('forcejson', False):
 702                     self.to_stdout(json.dumps(ie_result))
 703                 return ie_result
 704
 705         if result_type == 'video':
 706             self.add_extra_info(ie_result, extra_info)
 707             return self.process_video_result(ie_result, download=download)
 708         elif result_type == 'url':
 709             # We have to add extra_info to the results because it may be
 710             # contained in a playlist
 711             return self.extract_info(ie_result['url'],
 712                                      download,
 713                                      ie_key=ie_result.get('ie_key'),
 714                                      extra_info=extra_info)
 715         elif result_type == 'url_transparent':
 716             # Use the information from the embedding page
 717             info = self.extract_info(
 718                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 719                 extra_info=extra_info, download=False, process=False)
 720
 721             force_properties = dict(
 722                 (k, v) for k, v in ie_result.items() if v is not None)
 723             for f in ('_type', 'url'):
 724                 if f in force_properties:
 725                     del force_properties[f]
 726             new_result = info.copy()
 727             new_result.update(force_properties)
 728
 729             assert new_result.get('_type') != 'url_transparent'
 730
 731             return self.process_ie_result(
 732                 new_result, download=download, extra_info=extra_info)
 733         elif result_type == 'playlist' or result_type == 'multi_video':
 734             # We process each entry in the playlist
 735             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 736             self.to_screen('[download] Downloading playlist: %s' % playlist)
 737
 738             playlist_results = []
 739
 740             playliststart = self.params.get('playliststart', 1) - 1
 741             playlistend = self.params.get('playlistend', None)
 742             # For backwards compatibility, interpret -1 as whole list
 743             if playlistend == -1:
 744                 playlistend = None
 745
 746             playlistitems_str = self.params.get('playlist_items', None)
 747             playlistitems = None
 748             if playlistitems_str is not None:
 749                 def iter_playlistitems(format):
 750                     for string_segment in format.split(','):
 751                         if '-' in string_segment:
 752                             start, end = string_segment.split('-')
 753                             for item in range(int(start), int(end) + 1):
 754                                 yield int(item)
 755                         else:
 756                             yield int(string_segment)
 757                 playlistitems = iter_playlistitems(playlistitems_str)
 758
 759             ie_entries = ie_result['entries']
 760             if isinstance(ie_entries, list):
 761                 n_all_entries = len(ie_entries)
 762                 if playlistitems:
 763                     entries = [
 764                         ie_entries[i - 1] for i in playlistitems
 765                         if -n_all_entries <= i - 1 < n_all_entries]
 766                 else:
 767                     entries = ie_entries[playliststart:playlistend]
 768                 n_entries = len(entries)
 769                 self.to_screen(
 770                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 771                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 772             elif isinstance(ie_entries, PagedList):
 773                 if playlistitems:
 774                     entries = []
 775                     for item in playlistitems:
 776                         entries.extend(ie_entries.getslice(
 777                             item - 1, item
 778                         ))
 779                 else:
 780                     entries = ie_entries.getslice(
 781                         playliststart, playlistend)
 782                 n_entries = len(entries)
 783                 self.to_screen(
 784                     "[%s] playlist %s: Downloading %d videos" %
 785                     (ie_result['extractor'], playlist, n_entries))
 786             else:  # iterable
 787                 if playlistitems:
 788                     entry_list = list(ie_entries)
 789                     entries = [entry_list[i - 1] for i in playlistitems]
 790                 else:
 791                     entries = list(itertools.islice(
 792                         ie_entries, playliststart, playlistend))
 793                 n_entries = len(entries)
 794                 self.to_screen(
 795                     "[%s] playlist %s: Downloading %d videos" %
 796                     (ie_result['extractor'], playlist, n_entries))
 797
 798             if self.params.get('playlistreverse', False):
 799                 entries = entries[::-1]
 800
 801             for i, entry in enumerate(entries, 1):
 802                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 803                 extra = {
 804                     'n_entries': n_entries,
 805                     'playlist': playlist,
 806                     'playlist_id': ie_result.get('id'),
 807                     'playlist_title': ie_result.get('title'),
 808                     'playlist_index': i + playliststart,
 809                     'extractor': ie_result['extractor'],
 810                     'webpage_url': ie_result['webpage_url'],
 811                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 812                     'extractor_key': ie_result['extractor_key'],
 813                 }
 814
 815                 reason = self._match_entry(entry, incomplete=True)
 816                 if reason is not None:
 817                     self.to_screen('[download] ' + reason)
 818                     continue
 819
 820                 entry_result = self.process_ie_result(entry,
 821                                                       download=download,
 822                                                       extra_info=extra)
 823                 playlist_results.append(entry_result)
 824             ie_result['entries'] = playlist_results
 825             return ie_result
 826         elif result_type == 'compat_list':
 827             self.report_warning(
 828                 'Extractor %s returned a compat_list result. '
 829                 'It needs to be updated.' % ie_result.get('extractor'))
 830
 831             def _fixup(r):
 832                 self.add_extra_info(
 833                     r,
 834                     {
 835                         'extractor': ie_result['extractor'],
 836                         'webpage_url': ie_result['webpage_url'],
 837                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 838                         'extractor_key': ie_result['extractor_key'],
 839                     }
 840                 )
 841                 return r
 842             ie_result['entries'] = [
 843                 self.process_ie_result(_fixup(r), download, extra_info)
 844                 for r in ie_result['entries']
 845             ]
 846             return ie_result
 847         else:
 848             raise Exception('Invalid result type: %s' % result_type)
 849
 850     def _apply_format_filter(self, format_spec, available_formats):
 851         " Returns a tuple of the remaining format_spec and filtered formats "
 852
 853         OPERATORS = {
 854             '<': operator.lt,
 855             '<=': operator.le,
 856             '>': operator.gt,
 857             '>=': operator.ge,
 858             '=': operator.eq,
 859             '!=': operator.ne,
 860         }
 861         operator_rex = re.compile(r'''(?x)\s*\[
 862             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 863             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 864             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 865             \]$
 866             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 867         m = operator_rex.search(format_spec)
 868         if m:
 869             try:
 870                 comparison_value = int(m.group('value'))
 871             except ValueError:
 872                 comparison_value = parse_filesize(m.group('value'))
 873                 if comparison_value is None:
 874                     comparison_value = parse_filesize(m.group('value') + 'B')
 875                 if comparison_value is None:
 876                     raise ValueError(
 877                         'Invalid value %r in format specification %r' % (
 878                             m.group('value'), format_spec))
 879             op = OPERATORS[m.group('op')]
 880
 881         if not m:
 882             STR_OPERATORS = {
 883                 '=': operator.eq,
 884                 '!=': operator.ne,
 885             }
 886             str_operator_rex = re.compile(r'''(?x)\s*\[
 887                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 888                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 889                 \s*(?P<value>[a-zA-Z0-9_-]+)
 890                 \s*\]$
 891                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 892             m = str_operator_rex.search(format_spec)
 893             if m:
 894                 comparison_value = m.group('value')
 895                 op = STR_OPERATORS[m.group('op')]
 896
 897         if not m:
 898             raise ValueError('Invalid format specification %r' % format_spec)
 899
 900         def _filter(f):
 901             actual_value = f.get(m.group('key'))
 902             if actual_value is None:
 903                 return m.group('none_inclusive')
 904             return op(actual_value, comparison_value)
 905         new_formats = [f for f in available_formats if _filter(f)]
 906
 907         new_format_spec = format_spec[:-len(m.group(0))]
 908         if not new_format_spec:
 909             new_format_spec = 'best'
 910
 911         return (new_format_spec, new_formats)
 912
 913     def select_format(self, format_spec, available_formats):
 914         while format_spec.endswith(']'):
 915             format_spec, available_formats = self._apply_format_filter(
 916                 format_spec, available_formats)
 917         if not available_formats:
 918             return None
 919
 920         if format_spec in ['best', 'worst', None]:
 921             format_idx = 0 if format_spec == 'worst' else -1
 922             audiovideo_formats = [
 923                 f for f in available_formats
 924                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 925             if audiovideo_formats:
 926                 return audiovideo_formats[format_idx]
 927             # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
 928             elif (all(f.get('acodec') != 'none' for f in available_formats) or
 929                   all(f.get('vcodec') != 'none' for f in available_formats)):
 930                 return available_formats[format_idx]
 931         elif format_spec == 'bestaudio':
 932             audio_formats = [
 933                 f for f in available_formats
 934                 if f.get('vcodec') == 'none']
 935             if audio_formats:
 936                 return audio_formats[-1]
 937         elif format_spec == 'worstaudio':
 938             audio_formats = [
 939                 f for f in available_formats
 940                 if f.get('vcodec') == 'none']
 941             if audio_formats:
 942                 return audio_formats[0]
 943         elif format_spec == 'bestvideo':
 944             video_formats = [
 945                 f for f in available_formats
 946                 if f.get('acodec') == 'none']
 947             if video_formats:
 948                 return video_formats[-1]
 949         elif format_spec == 'worstvideo':
 950             video_formats = [
 951                 f for f in available_formats
 952                 if f.get('acodec') == 'none']
 953             if video_formats:
 954                 return video_formats[0]
 955         else:
 956             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 957             if format_spec in extensions:
 958                 filter_f = lambda f: f['ext'] == format_spec
 959             else:
 960                 filter_f = lambda f: f['format_id'] == format_spec
 961             matches = list(filter(filter_f, available_formats))
 962             if matches:
 963                 return matches[-1]
 964         return None
 965
 966     def _calc_headers(self, info_dict):
 967         res = std_headers.copy()
 968
 969         add_headers = info_dict.get('http_headers')
 970         if add_headers:
 971             res.update(add_headers)
 972
 973         cookies = self._calc_cookies(info_dict)
 974         if cookies:
 975             res['Cookie'] = cookies
 976
 977         return res
 978
 979     def _calc_cookies(self, info_dict):
 980         pr = compat_urllib_request.Request(info_dict['url'])
 981         self.cookiejar.add_cookie_header(pr)
 982         return pr.get_header('Cookie')
 983
 984     def process_video_result(self, info_dict, download=True):
 985         assert info_dict.get('_type', 'video') == 'video'
 986
 987         if 'id' not in info_dict:
 988             raise ExtractorError('Missing "id" field in extractor result')
 989         if 'title' not in info_dict:
 990             raise ExtractorError('Missing "title" field in extractor result')
 991
 992         if 'playlist' not in info_dict:
 993             # It isn't part of a playlist
 994             info_dict['playlist'] = None
 995             info_dict['playlist_index'] = None
 996
 997         thumbnails = info_dict.get('thumbnails')
 998         if thumbnails is None:
 999             thumbnail = info_dict.get('thumbnail')
1000             if thumbnail:
1001                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1002         if thumbnails:
1003             thumbnails.sort(key=lambda t: (
1004                 t.get('preference'), t.get('width'), t.get('height'),
1005                 t.get('id'), t.get('url')))
1006             for i, t in enumerate(thumbnails):
1007                 if 'width' in t and 'height' in t:
1008                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1009                 if t.get('id') is None:
1010                     t['id'] = '%d' % i
1011
1012         if thumbnails and 'thumbnail' not in info_dict:
1013             info_dict['thumbnail'] = thumbnails[-1]['url']
1014
1015         if 'display_id' not in info_dict and 'id' in info_dict:
1016             info_dict['display_id'] = info_dict['id']
1017
1018         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1019             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1020             # see http://bugs.python.org/issue1646728)
1021             try:
1022                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1023                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1024             except (ValueError, OverflowError, OSError):
1025                 pass
1026
1027         if self.params.get('listsubtitles', False):
1028             if 'automatic_captions' in info_dict:
1029                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1030             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1031             return
1032         info_dict['requested_subtitles'] = self.process_subtitles(
1033             info_dict['id'], info_dict.get('subtitles'),
1034             info_dict.get('automatic_captions'))
1035
1036         # We now pick which formats have to be downloaded
1037         if info_dict.get('formats') is None:
1038             # There's only one format available
1039             formats = [info_dict]
1040         else:
1041             formats = info_dict['formats']
1042
1043         if not formats:
1044             raise ExtractorError('No video formats found!')
1045
1046         formats_dict = {}
1047
1048         # We check that all the formats have the format and format_id fields
1049         for i, format in enumerate(formats):
1050             if 'url' not in format:
1051                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1052
1053             if format.get('format_id') is None:
1054                 format['format_id'] = compat_str(i)
1055             format_id = format['format_id']
1056             if format_id not in formats_dict:
1057                 formats_dict[format_id] = []
1058             formats_dict[format_id].append(format)
1059
1060         # Make sure all formats have unique format_id
1061         for format_id, ambiguous_formats in formats_dict.items():
1062             if len(ambiguous_formats) > 1:
1063                 for i, format in enumerate(ambiguous_formats):
1064                     format['format_id'] = '%s-%d' % (format_id, i)
1065
1066         for i, format in enumerate(formats):
1067             if format.get('format') is None:
1068                 format['format'] = '{id} - {res}{note}'.format(
1069                     id=format['format_id'],
1070                     res=self.format_resolution(format),
1071                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1072                 )
1073             # Automatically determine file extension if missing
1074             if 'ext' not in format:
1075                 format['ext'] = determine_ext(format['url']).lower()
1076             # Add HTTP headers, so that external programs can use them from the
1077             # json output
1078             full_format_info = info_dict.copy()
1079             full_format_info.update(format)
1080             format['http_headers'] = self._calc_headers(full_format_info)
1081
1082         # TODO Central sorting goes here
1083
1084         if formats[0] is not info_dict:
1085             # only set the 'formats' fields if the original info_dict list them
1086             # otherwise we end up with a circular reference, the first (and unique)
1087             # element in the 'formats' field in info_dict is info_dict itself,
1088             # wich can't be exported to json
1089             info_dict['formats'] = formats
1090         if self.params.get('listformats'):
1091             self.list_formats(info_dict)
1092             return
1093         if self.params.get('list_thumbnails'):
1094             self.list_thumbnails(info_dict)
1095             return
1096
1097         req_format = self.params.get('format')
1098         if req_format is None:
1099             req_format_list = []
1100             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1101                     info_dict['extractor'] in ['youtube', 'ted']):
1102                 merger = FFmpegMergerPP(self)
1103                 if merger.available and merger.can_merge():
1104                     req_format_list.append('bestvideo+bestaudio')
1105             req_format_list.append('best')
1106             req_format = '/'.join(req_format_list)
1107         formats_to_download = []
1108         if req_format == 'all':
1109             formats_to_download = formats
1110         else:
1111             for rfstr in req_format.split(','):
1112                 # We can accept formats requested in the format: 34/5/best, we pick
1113                 # the first that is available, starting from left
1114                 req_formats = rfstr.split('/')
1115                 for rf in req_formats:
1116                     if re.match(r'.+?\+.+?', rf) is not None:
1117                         # Two formats have been requested like '137+139'
1118                         format_1, format_2 = rf.split('+')
1119                         formats_info = (self.select_format(format_1, formats),
1120                                         self.select_format(format_2, formats))
1121                         if all(formats_info):
1122                             # The first format must contain the video and the
1123                             # second the audio
1124                             if formats_info[0].get('vcodec') == 'none':
1125                                 self.report_error('The first format must '
1126                                                   'contain the video, try using '
1127                                                   '"-f %s+%s"' % (format_2, format_1))
1128                                 return
1129                             output_ext = (
1130                                 formats_info[0]['ext']
1131                                 if self.params.get('merge_output_format') is None
1132                                 else self.params['merge_output_format'])
1133                             selected_format = {
1134                                 'requested_formats': formats_info,
1135                                 'format': '%s+%s' % (formats_info[0].get('format'),
1136                                                      formats_info[1].get('format')),
1137                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1138                                                         formats_info[1].get('format_id')),
1139                                 'width': formats_info[0].get('width'),
1140                                 'height': formats_info[0].get('height'),
1141                                 'resolution': formats_info[0].get('resolution'),
1142                                 'fps': formats_info[0].get('fps'),
1143                                 'vcodec': formats_info[0].get('vcodec'),
1144                                 'vbr': formats_info[0].get('vbr'),
1145                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1146                                 'acodec': formats_info[1].get('acodec'),
1147                                 'abr': formats_info[1].get('abr'),
1148                                 'ext': output_ext,
1149                             }
1150                         else:
1151                             selected_format = None
1152                     else:
1153                         selected_format = self.select_format(rf, formats)
1154                     if selected_format is not None:
1155                         formats_to_download.append(selected_format)
1156                         break
1157         if not formats_to_download:
1158             raise ExtractorError('requested format not available',
1159                                  expected=True)
1160
1161         if download:
1162             if len(formats_to_download) > 1:
1163                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1164             for format in formats_to_download:
1165                 new_info = dict(info_dict)
1166                 new_info.update(format)
1167                 self.process_info(new_info)
1168         # We update the info dict with the best quality format (backwards compatibility)
1169         info_dict.update(formats_to_download[-1])
1170         return info_dict
1171
1172     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1173         """Select the requested subtitles and their format"""
1174         available_subs = {}
1175         if normal_subtitles and self.params.get('writesubtitles'):
1176             available_subs.update(normal_subtitles)
1177         if automatic_captions and self.params.get('writeautomaticsub'):
1178             for lang, cap_info in automatic_captions.items():
1179                 if lang not in available_subs:
1180                     available_subs[lang] = cap_info
1181
1182         if (not self.params.get('writesubtitles') and not
1183                 self.params.get('writeautomaticsub') or not
1184                 available_subs):
1185             return None
1186
1187         if self.params.get('allsubtitles', False):
1188             requested_langs = available_subs.keys()
1189         else:
1190             if self.params.get('subtitleslangs', False):
1191                 requested_langs = self.params.get('subtitleslangs')
1192             elif 'en' in available_subs:
1193                 requested_langs = ['en']
1194             else:
1195                 requested_langs = [list(available_subs.keys())[0]]
1196
1197         formats_query = self.params.get('subtitlesformat', 'best')
1198         formats_preference = formats_query.split('/') if formats_query else []
1199         subs = {}
1200         for lang in requested_langs:
1201             formats = available_subs.get(lang)
1202             if formats is None:
1203                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1204                 continue
1205             for ext in formats_preference:
1206                 if ext == 'best':
1207                     f = formats[-1]
1208                     break
1209                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1210                 if matches:
1211                     f = matches[-1]
1212                     break
1213             else:
1214                 f = formats[-1]
1215                 self.report_warning(
1216                     'No subtitle format found matching "%s" for language %s, '
1217                     'using %s' % (formats_query, lang, f['ext']))
1218             subs[lang] = f
1219         return subs
1220
1221     def process_info(self, info_dict):
1222         """Process a single resolved IE result."""
1223
1224         assert info_dict.get('_type', 'video') == 'video'
1225
1226         max_downloads = self.params.get('max_downloads')
1227         if max_downloads is not None:
1228             if self._num_downloads >= int(max_downloads):
1229                 raise MaxDownloadsReached()
1230
1231         info_dict['fulltitle'] = info_dict['title']
1232         if len(info_dict['title']) > 200:
1233             info_dict['title'] = info_dict['title'][:197] + '...'
1234
1235         if 'format' not in info_dict:
1236             info_dict['format'] = info_dict['ext']
1237
1238         reason = self._match_entry(info_dict, incomplete=False)
1239         if reason is not None:
1240             self.to_screen('[download] ' + reason)
1241             return
1242
1243         self._num_downloads += 1
1244
1245         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1246
1247         # Forced printings
1248         if self.params.get('forcetitle', False):
1249             self.to_stdout(info_dict['fulltitle'])
1250         if self.params.get('forceid', False):
1251             self.to_stdout(info_dict['id'])
1252         if self.params.get('forceurl', False):
1253             if info_dict.get('requested_formats') is not None:
1254                 for f in info_dict['requested_formats']:
1255                     self.to_stdout(f['url'] + f.get('play_path', ''))
1256             else:
1257                 # For RTMP URLs, also include the playpath
1258                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1259         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1260             self.to_stdout(info_dict['thumbnail'])
1261         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1262             self.to_stdout(info_dict['description'])
1263         if self.params.get('forcefilename', False) and filename is not None:
1264             self.to_stdout(filename)
1265         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1266             self.to_stdout(formatSeconds(info_dict['duration']))
1267         if self.params.get('forceformat', False):
1268             self.to_stdout(info_dict['format'])
1269         if self.params.get('forcejson', False):
1270             self.to_stdout(json.dumps(info_dict))
1271
1272         # Do nothing else if in simulate mode
1273         if self.params.get('simulate', False):
1274             return
1275
1276         if filename is None:
1277             return
1278
1279         try:
1280             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1281             if dn and not os.path.exists(dn):
1282                 os.makedirs(dn)
1283         except (OSError, IOError) as err:
1284             self.report_error('unable to create directory ' + compat_str(err))
1285             return
1286
1287         if self.params.get('writedescription', False):
1288             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1289             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1290                 self.to_screen('[info] Video description is already present')
1291             elif info_dict.get('description') is None:
1292                 self.report_warning('There\'s no description to write.')
1293             else:
1294                 try:
1295                     self.to_screen('[info] Writing video description to: ' + descfn)
1296                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1297                         descfile.write(info_dict['description'])
1298                 except (OSError, IOError):
1299                     self.report_error('Cannot write description file ' + descfn)
1300                     return
1301
1302         if self.params.get('writeannotations', False):
1303             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1304             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1305                 self.to_screen('[info] Video annotations are already present')
1306             else:
1307                 try:
1308                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1309                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1310                         annofile.write(info_dict['annotations'])
1311                 except (KeyError, TypeError):
1312                     self.report_warning('There are no annotations to write.')
1313                 except (OSError, IOError):
1314                     self.report_error('Cannot write annotations file: ' + annofn)
1315                     return
1316
1317         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1318                                        self.params.get('writeautomaticsub')])
1319
1320         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1321             # subtitles download errors are already managed as troubles in relevant IE
1322             # that way it will silently go on when used with unsupporting IE
1323             subtitles = info_dict['requested_subtitles']
1324             ie = self.get_info_extractor(info_dict['extractor_key'])
1325             for sub_lang, sub_info in subtitles.items():
1326                 sub_format = sub_info['ext']
1327                 if sub_info.get('data') is not None:
1328                     sub_data = sub_info['data']
1329                 else:
1330                     try:
1331                         sub_data = ie._download_webpage(
1332                             sub_info['url'], info_dict['id'], note=False)
1333                     except ExtractorError as err:
1334                         self.report_warning('Unable to download subtitle for "%s": %s' %
1335                                             (sub_lang, compat_str(err.cause)))
1336                         continue
1337                 try:
1338                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1339                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1340                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1341                     else:
1342                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1343                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1344                             subfile.write(sub_data)
1345                 except (OSError, IOError):
1346                     self.report_error('Cannot write subtitles file ' + sub_filename)
1347                     return
1348
1349         if self.params.get('writeinfojson', False):
1350             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1351             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1352                 self.to_screen('[info] Video description metadata is already present')
1353             else:
1354                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1355                 try:
1356                     write_json_file(self.filter_requested_info(info_dict), infofn)
1357                 except (OSError, IOError):
1358                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1359                     return
1360
1361         self._write_thumbnails(info_dict, filename)
1362
1363         if not self.params.get('skip_download', False):
1364             try:
1365                 def dl(name, info):
1366                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1367                     for ph in self._progress_hooks:
1368                         fd.add_progress_hook(ph)
1369                     if self.params.get('verbose'):
1370                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1371                     return fd.download(name, info)
1372
1373                 if info_dict.get('requested_formats') is not None:
1374                     downloaded = []
1375                     success = True
1376                     merger = FFmpegMergerPP(self)
1377                     if not merger.available:
1378                         postprocessors = []
1379                         self.report_warning('You have requested multiple '
1380                                             'formats but ffmpeg or avconv are not installed.'
1381                                             ' The formats won\'t be merged.')
1382                     else:
1383                         postprocessors = [merger]
1384
1385                     def compatible_formats(formats):
1386                         video, audio = formats
1387                         # Check extension
1388                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1389                         if video_ext and audio_ext:
1390                             COMPATIBLE_EXTS = (
1391                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1392                                 ('webm')
1393                             )
1394                             for exts in COMPATIBLE_EXTS:
1395                                 if video_ext in exts and audio_ext in exts:
1396                                     return True
1397                         # TODO: Check acodec/vcodec
1398                         return False
1399
1400                     filename_real_ext = os.path.splitext(filename)[1][1:]
1401                     filename_wo_ext = (
1402                         os.path.splitext(filename)[0]
1403                         if filename_real_ext == info_dict['ext']
1404                         else filename)
1405                     requested_formats = info_dict['requested_formats']
1406                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1407                         info_dict['ext'] = 'mkv'
1408                         self.report_warning(
1409                             'Requested formats are incompatible for merge and will be merged into mkv.')
1410                     # Ensure filename always has a correct extension for successful merge
1411                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1412                     if os.path.exists(encodeFilename(filename)):
1413                         self.to_screen(
1414                             '[download] %s has already been downloaded and '
1415                             'merged' % filename)
1416                     else:
1417                         for f in requested_formats:
1418                             new_info = dict(info_dict)
1419                             new_info.update(f)
1420                             fname = self.prepare_filename(new_info)
1421                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1422                             downloaded.append(fname)
1423                             partial_success = dl(fname, new_info)
1424                             success = success and partial_success
1425                         info_dict['__postprocessors'] = postprocessors
1426                         info_dict['__files_to_merge'] = downloaded
1427                 else:
1428                     # Just a single file
1429                     success = dl(filename, info_dict)
1430             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1431                 self.report_error('unable to download video data: %s' % str(err))
1432                 return
1433             except (OSError, IOError) as err:
1434                 raise UnavailableVideoError(err)
1435             except (ContentTooShortError, ) as err:
1436                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1437                 return
1438
1439             if success:
1440                 # Fixup content
1441                 fixup_policy = self.params.get('fixup')
1442                 if fixup_policy is None:
1443                     fixup_policy = 'detect_or_warn'
1444
1445                 stretched_ratio = info_dict.get('stretched_ratio')
1446                 if stretched_ratio is not None and stretched_ratio != 1:
1447                     if fixup_policy == 'warn':
1448                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1449                             info_dict['id'], stretched_ratio))
1450                     elif fixup_policy == 'detect_or_warn':
1451                         stretched_pp = FFmpegFixupStretchedPP(self)
1452                         if stretched_pp.available:
1453                             info_dict.setdefault('__postprocessors', [])
1454                             info_dict['__postprocessors'].append(stretched_pp)
1455                         else:
1456                             self.report_warning(
1457                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1458                                     info_dict['id'], stretched_ratio))
1459                     else:
1460                         assert fixup_policy in ('ignore', 'never')
1461
1462                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1463                     if fixup_policy == 'warn':
1464                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1465                             info_dict['id']))
1466                     elif fixup_policy == 'detect_or_warn':
1467                         fixup_pp = FFmpegFixupM4aPP(self)
1468                         if fixup_pp.available:
1469                             info_dict.setdefault('__postprocessors', [])
1470                             info_dict['__postprocessors'].append(fixup_pp)
1471                         else:
1472                             self.report_warning(
1473                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1474                                     info_dict['id']))
1475                     else:
1476                         assert fixup_policy in ('ignore', 'never')
1477
1478                 try:
1479                     self.post_process(filename, info_dict)
1480                 except (PostProcessingError) as err:
1481                     self.report_error('postprocessing: %s' % str(err))
1482                     return
1483                 self.record_download_archive(info_dict)
1484
1485     def download(self, url_list):
1486         """Download a given list of URLs."""
1487         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1488         if (len(url_list) > 1 and
1489                 '%' not in outtmpl and
1490                 self.params.get('max_downloads') != 1):
1491             raise SameFileError(outtmpl)
1492
1493         for url in url_list:
1494             try:
1495                 # It also downloads the videos
1496                 res = self.extract_info(url)
1497             except UnavailableVideoError:
1498                 self.report_error('unable to download video')
1499             except MaxDownloadsReached:
1500                 self.to_screen('[info] Maximum number of downloaded files reached.')
1501                 raise
1502             else:
1503                 if self.params.get('dump_single_json', False):
1504                     self.to_stdout(json.dumps(res))
1505
1506         return self._download_retcode
1507
1508     def download_with_info_file(self, info_filename):
1509         with contextlib.closing(fileinput.FileInput(
1510                 [info_filename], mode='r',
1511                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1512             # FileInput doesn't have a read method, we can't call json.load
1513             info = self.filter_requested_info(json.loads('\n'.join(f)))
1514         try:
1515             self.process_ie_result(info, download=True)
1516         except DownloadError:
1517             webpage_url = info.get('webpage_url')
1518             if webpage_url is not None:
1519                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1520                 return self.download([webpage_url])
1521             else:
1522                 raise
1523         return self._download_retcode
1524
1525     @staticmethod
1526     def filter_requested_info(info_dict):
1527         return dict(
1528             (k, v) for k, v in info_dict.items()
1529             if k not in ['requested_formats', 'requested_subtitles'])
1530
1531     def post_process(self, filename, ie_info):
1532         """Run all the postprocessors on the given file."""
1533         info = dict(ie_info)
1534         info['filepath'] = filename
1535         pps_chain = []
1536         if ie_info.get('__postprocessors') is not None:
1537             pps_chain.extend(ie_info['__postprocessors'])
1538         pps_chain.extend(self._pps)
1539         for pp in pps_chain:
1540             files_to_delete = []
1541             try:
1542                 files_to_delete, info = pp.run(info)
1543             except PostProcessingError as e:
1544                 self.report_error(e.msg)
1545             if files_to_delete and not self.params.get('keepvideo', False):
1546                 for old_filename in files_to_delete:
1547                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1548                     try:
1549                         os.remove(encodeFilename(old_filename))
1550                     except (IOError, OSError):
1551                         self.report_warning('Unable to remove downloaded original file')
1552
1553     def _make_archive_id(self, info_dict):
1554         # Future-proof against any change in case
1555         # and backwards compatibility with prior versions
1556         extractor = info_dict.get('extractor_key')
1557         if extractor is None:
1558             if 'id' in info_dict:
1559                 extractor = info_dict.get('ie_key')  # key in a playlist
1560         if extractor is None:
1561             return None  # Incomplete video information
1562         return extractor.lower() + ' ' + info_dict['id']
1563
1564     def in_download_archive(self, info_dict):
1565         fn = self.params.get('download_archive')
1566         if fn is None:
1567             return False
1568
1569         vid_id = self._make_archive_id(info_dict)
1570         if vid_id is None:
1571             return False  # Incomplete video information
1572
1573         try:
1574             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1575                 for line in archive_file:
1576                     if line.strip() == vid_id:
1577                         return True
1578         except IOError as ioe:
1579             if ioe.errno != errno.ENOENT:
1580                 raise
1581         return False
1582
1583     def record_download_archive(self, info_dict):
1584         fn = self.params.get('download_archive')
1585         if fn is None:
1586             return
1587         vid_id = self._make_archive_id(info_dict)
1588         assert vid_id
1589         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1590             archive_file.write(vid_id + '\n')
1591
1592     @staticmethod
1593     def format_resolution(format, default='unknown'):
1594         if format.get('vcodec') == 'none':
1595             return 'audio only'
1596         if format.get('resolution') is not None:
1597             return format['resolution']
1598         if format.get('height') is not None:
1599             if format.get('width') is not None:
1600                 res = '%sx%s' % (format['width'], format['height'])
1601             else:
1602                 res = '%sp' % format['height']
1603         elif format.get('width') is not None:
1604             res = '?x%d' % format['width']
1605         else:
1606             res = default
1607         return res
1608
1609     def _format_note(self, fdict):
1610         res = ''
1611         if fdict.get('ext') in ['f4f', 'f4m']:
1612             res += '(unsupported) '
1613         if fdict.get('format_note') is not None:
1614             res += fdict['format_note'] + ' '
1615         if fdict.get('tbr') is not None:
1616             res += '%4dk ' % fdict['tbr']
1617         if fdict.get('container') is not None:
1618             if res:
1619                 res += ', '
1620             res += '%s container' % fdict['container']
1621         if (fdict.get('vcodec') is not None and
1622                 fdict.get('vcodec') != 'none'):
1623             if res:
1624                 res += ', '
1625             res += fdict['vcodec']
1626             if fdict.get('vbr') is not None:
1627                 res += '@'
1628         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1629             res += 'video@'
1630         if fdict.get('vbr') is not None:
1631             res += '%4dk' % fdict['vbr']
1632         if fdict.get('fps') is not None:
1633             res += ', %sfps' % fdict['fps']
1634         if fdict.get('acodec') is not None:
1635             if res:
1636                 res += ', '
1637             if fdict['acodec'] == 'none':
1638                 res += 'video only'
1639             else:
1640                 res += '%-5s' % fdict['acodec']
1641         elif fdict.get('abr') is not None:
1642             if res:
1643                 res += ', '
1644             res += 'audio'
1645         if fdict.get('abr') is not None:
1646             res += '@%3dk' % fdict['abr']
1647         if fdict.get('asr') is not None:
1648             res += ' (%5dHz)' % fdict['asr']
1649         if fdict.get('filesize') is not None:
1650             if res:
1651                 res += ', '
1652             res += format_bytes(fdict['filesize'])
1653         elif fdict.get('filesize_approx') is not None:
1654             if res:
1655                 res += ', '
1656             res += '~' + format_bytes(fdict['filesize_approx'])
1657         return res
1658
1659     def list_formats(self, info_dict):
1660         formats = info_dict.get('formats', [info_dict])
1661         table = [
1662             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1663             for f in formats
1664             if f.get('preference') is None or f['preference'] >= -1000]
1665         if len(formats) > 1:
1666             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1667
1668         header_line = ['format code', 'extension', 'resolution', 'note']
1669         self.to_screen(
1670             '[info] Available formats for %s:\n%s' %
1671             (info_dict['id'], render_table(header_line, table)))
1672
1673     def list_thumbnails(self, info_dict):
1674         thumbnails = info_dict.get('thumbnails')
1675         if not thumbnails:
1676             tn_url = info_dict.get('thumbnail')
1677             if tn_url:
1678                 thumbnails = [{'id': '0', 'url': tn_url}]
1679             else:
1680                 self.to_screen(
1681                     '[info] No thumbnails present for %s' % info_dict['id'])
1682                 return
1683
1684         self.to_screen(
1685             '[info] Thumbnails for %s:' % info_dict['id'])
1686         self.to_screen(render_table(
1687             ['ID', 'width', 'height', 'URL'],
1688             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1689
1690     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1691         if not subtitles:
1692             self.to_screen('%s has no %s' % (video_id, name))
1693             return
1694         self.to_screen(
1695             'Available %s for %s:' % (name, video_id))
1696         self.to_screen(render_table(
1697             ['Language', 'formats'],
1698             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1699                 for lang, formats in subtitles.items()]))
1700
1701     def urlopen(self, req):
1702         """ Start an HTTP download """
1703
1704         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1705         # always respected by websites, some tend to give out URLs with non percent-encoded
1706         # non-ASCII characters (see telemb.py, ard.py [#3412])
1707         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1708         # To work around aforementioned issue we will replace request's original URL with
1709         # percent-encoded one
1710         req_is_string = isinstance(req, compat_basestring)
1711         url = req if req_is_string else req.get_full_url()
1712         url_escaped = escape_url(url)
1713
1714         # Substitute URL if any change after escaping
1715         if url != url_escaped:
1716             if req_is_string:
1717                 req = url_escaped
1718             else:
1719                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1720                 req = req_type(
1721                     url_escaped, data=req.data, headers=req.headers,
1722                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1723
1724         return self._opener.open(req, timeout=self._socket_timeout)
1725
1726     def print_debug_header(self):
1727         if not self.params.get('verbose'):
1728             return
1729
1730         if type('') is not compat_str:
1731             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1732             self.report_warning(
1733                 'Your Python is broken! Update to a newer and supported version')
1734
1735         stdout_encoding = getattr(
1736             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1737         encoding_str = (
1738             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1739                 locale.getpreferredencoding(),
1740                 sys.getfilesystemencoding(),
1741                 stdout_encoding,
1742                 self.get_encoding()))
1743         write_string(encoding_str, encoding=None)
1744
1745         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1746         try:
1747             sp = subprocess.Popen(
1748                 ['git', 'rev-parse', '--short', 'HEAD'],
1749                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1750                 cwd=os.path.dirname(os.path.abspath(__file__)))
1751             out, err = sp.communicate()
1752             out = out.decode().strip()
1753             if re.match('[0-9a-f]+', out):
1754                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1755         except Exception:
1756             try:
1757                 sys.exc_clear()
1758             except Exception:
1759                 pass
1760         self._write_string('[debug] Python version %s - %s\n' % (
1761             platform.python_version(), platform_name()))
1762
1763         exe_versions = FFmpegPostProcessor.get_versions(self)
1764         exe_versions['rtmpdump'] = rtmpdump_version()
1765         exe_str = ', '.join(
1766             '%s %s' % (exe, v)
1767             for exe, v in sorted(exe_versions.items())
1768             if v
1769         )
1770         if not exe_str:
1771             exe_str = 'none'
1772         self._write_string('[debug] exe versions: %s\n' % exe_str)
1773
1774         proxy_map = {}
1775         for handler in self._opener.handlers:
1776             if hasattr(handler, 'proxies'):
1777                 proxy_map.update(handler.proxies)
1778         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1779
1780         if self.params.get('call_home', False):
1781             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1782             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1783             latest_version = self.urlopen(
1784                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1785             if version_tuple(latest_version) > version_tuple(__version__):
1786                 self.report_warning(
1787                     'You are using an outdated version (newest version: %s)! '
1788                     'See https://yt-dl.org/update if you need help updating.' %
1789                     latest_version)
1790
1791     def _setup_opener(self):
1792         timeout_val = self.params.get('socket_timeout')
1793         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1794
1795         opts_cookiefile = self.params.get('cookiefile')
1796         opts_proxy = self.params.get('proxy')
1797
1798         if opts_cookiefile is None:
1799             self.cookiejar = compat_cookiejar.CookieJar()
1800         else:
1801             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1802                 opts_cookiefile)
1803             if os.access(opts_cookiefile, os.R_OK):
1804                 self.cookiejar.load()
1805
1806         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1807             self.cookiejar)
1808         if opts_proxy is not None:
1809             if opts_proxy == '':
1810                 proxies = {}
1811             else:
1812                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1813         else:
1814             proxies = compat_urllib_request.getproxies()
1815             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1816             if 'http' in proxies and 'https' not in proxies:
1817                 proxies['https'] = proxies['http']
1818         proxy_handler = PerRequestProxyHandler(proxies)
1819
1820         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1821         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1822         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1823         opener = compat_urllib_request.build_opener(
1824             proxy_handler, https_handler, cookie_processor, ydlh)
1825
1826         # Delete the default user-agent header, which would otherwise apply in
1827         # cases where our custom HTTP handler doesn't come into play
1828         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1829         opener.addheaders = []
1830         self._opener = opener
1831
1832     def encode(self, s):
1833         if isinstance(s, bytes):
1834             return s  # Already encoded
1835
1836         try:
1837             return s.encode(self.get_encoding())
1838         except UnicodeEncodeError as err:
1839             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1840             raise
1841
1842     def get_encoding(self):
1843         encoding = self.params.get('encoding')
1844         if encoding is None:
1845             encoding = preferredencoding()
1846         return encoding
1847
1848     def _write_thumbnails(self, info_dict, filename):
1849         if self.params.get('writethumbnail', False):
1850             thumbnails = info_dict.get('thumbnails')
1851             if thumbnails:
1852                 thumbnails = [thumbnails[-1]]
1853         elif self.params.get('write_all_thumbnails', False):
1854             thumbnails = info_dict.get('thumbnails')
1855         else:
1856             return
1857
1858         if not thumbnails:
1859             # No thumbnails present, so return immediately
1860             return
1861
1862         for t in thumbnails:
1863             thumb_ext = determine_ext(t['url'], 'jpg')
1864             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1865             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1866             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1867
1868             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1869                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1870                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1871             else:
1872                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1873                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1874                 try:
1875                     uf = self.urlopen(t['url'])
1876                     with open(thumb_filename, 'wb') as thumbf:
1877                         shutil.copyfileobj(uf, thumbf)
1878                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1879                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1880                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1881                     self.report_warning('Unable to download thumbnail "%s": %s' %
1882                                         (t['url'], compat_str(err)))