youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DownloadError,
  55     encode_compat_str,
  56     encodeFilename,
  57     error_to_compat_str,
  58     expand_path,
  59     ExtractorError,
  60     format_bytes,
  61     formatSeconds,
  62     GeoRestrictedError,
  63     int_or_none,
  64     ISO3166Utils,
  65     locked_file,
  66     make_HTTPS_handler,
  67     MaxDownloadsReached,
  68     orderedSet,
  69     PagedList,
  70     parse_filesize,
  71     PerRequestProxyHandler,
  72     platform_name,
  73     PostProcessingError,
  74     preferredencoding,
  75     prepend_extension,
  76     register_socks_protocols,
  77     render_table,
  78     replace_extension,
  79     SameFileError,
  80     sanitize_filename,
  81     sanitize_path,
  82     sanitize_url,
  83     sanitized_Request,
  84     std_headers,
  85     str_or_none,
  86     subtitles_filename,
  87     UnavailableVideoError,
  88     url_basename,
  89     version_tuple,
  90     write_json_file,
  91     write_string,
  92     YoutubeDLCookieJar,
  93     YoutubeDLCookieProcessor,
  94     YoutubeDLHandler,
  95     YoutubeDLRedirectHandler,
  96 )
  97 from .cache import Cache
  98 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  99 from .extractor.openload import PhantomJSwrapper
 100 from .downloader import get_suitable_downloader
 101 from .downloader.rtmp import rtmpdump_version
 102 from .postprocessor import (
 103     FFmpegFixupM3u8PP,
 104     FFmpegFixupM4aPP,
 105     FFmpegFixupStretchedPP,
 106     FFmpegMergerPP,
 107     FFmpegPostProcessor,
 108     FFmpegSubtitlesConvertorPP,
 109     get_postprocessor,
 110 )
 111 from .version import __version__
 112
 113 if compat_os_name == 'nt':
 114     import ctypes
 115
 116
 117 class YoutubeDL(object):
 118     """YoutubeDL class.
 119
 120     YoutubeDL objects are the ones responsible of downloading the
 121     actual video file and writing it to disk if the user has requested
 122     it, among some other tasks. In most cases there should be one per
 123     program. As, given a video URL, the downloader doesn't know how to
 124     extract all the needed information, task that InfoExtractors do, it
 125     has to pass the URL to one of them.
 126
 127     For this, YoutubeDL objects have a method that allows
 128     InfoExtractors to be registered in a given order. When it is passed
 129     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 130     finds that reports being able to handle it. The InfoExtractor extracts
 131     all the information about the video or videos the URL refers to, and
 132     YoutubeDL process the extracted information, possibly using a File
 133     Downloader to download the video.
 134
 135     YoutubeDL objects accept a lot of parameters. In order not to saturate
 136     the object constructor with arguments, it receives a dictionary of
 137     options instead. These options are available through the params
 138     attribute for the InfoExtractors to use. The YoutubeDL also
 139     registers itself as the downloader in charge for the InfoExtractors
 140     that are added to it, so this is a "mutual registration".
 141
 142     Available options:
 143
 144     username:          Username for authentication purposes.
 145     password:          Password for authentication purposes.
 146     videopassword:     Password for accessing a video.
 147     ap_mso:            Adobe Pass multiple-system operator identifier.
 148     ap_username:       Multiple-system operator account username.
 149     ap_password:       Multiple-system operator account password.
 150     usenetrc:          Use netrc for authentication instead.
 151     verbose:           Print additional info to stdout.
 152     quiet:             Do not print messages to stdout.
 153     no_warnings:       Do not print out anything for warnings.
 154     forceurl:          Force printing final URL.
 155     forcetitle:        Force printing title.
 156     forceid:           Force printing ID.
 157     forcethumbnail:    Force printing thumbnail URL.
 158     forcedescription:  Force printing description.
 159     forcefilename:     Force printing final filename.
 160     forceduration:     Force printing duration.
 161     forcejson:         Force printing info_dict as JSON.
 162     dump_single_json:  Force printing the info_dict of the whole playlist
 163                        (or video) as a single JSON line.
 164     simulate:          Do not download the video files.
 165     format:            Video format code. See options.py for more information.
 166     outtmpl:           Template for output names.
 167     restrictfilenames: Do not allow "&" and spaces in file names.
 168     trim_file_name:    Limit length of filename (extension excluded).
 169     ignoreerrors:      Do not stop on download errors.
 170     force_generic_extractor: Force downloader to use the generic extractor
 171     nooverwrites:      Prevent overwriting files.
 172     playliststart:     Playlist item to start at.
 173     playlistend:       Playlist item to end at.
 174     playlist_items:    Specific indices of playlist to download.
 175     playlistreverse:   Download playlist items in reverse order.
 176     playlistrandom:    Download playlist items in random order.
 177     matchtitle:        Download only matching titles.
 178     rejecttitle:       Reject downloads for matching titles.
 179     logger:            Log messages to a logging.Logger instance.
 180     logtostderr:       Log messages to stderr instead of stdout.
 181     writedescription:  Write the video description to a .description file
 182     writeinfojson:     Write the video description to a .info.json file
 183     writeannotations:  Write the video annotations to a .annotations.xml file
 184     writethumbnail:    Write the thumbnail image to a file
 185     write_all_thumbnails:  Write all thumbnail formats to files
 186     writesubtitles:    Write the video subtitles to a file
 187     writeautomaticsub: Write the automatically generated subtitles to a file
 188     allsubtitles:      Downloads all the subtitles of the video
 189                        (requires writesubtitles or writeautomaticsub)
 190     listsubtitles:     Lists all available subtitles for the video
 191     subtitlesformat:   The format code for subtitles
 192     subtitleslangs:    List of languages of the subtitles to download
 193     keepvideo:         Keep the video file after post-processing
 194     daterange:         A DateRange object, download only if the upload_date is in the range.
 195     skip_download:     Skip the actual download of the video file
 196     cachedir:          Location of the cache files in the filesystem.
 197                        False to disable filesystem cache.
 198     noplaylist:        Download single video instead of a playlist if in doubt.
 199     age_limit:         An integer representing the user's age in years.
 200                        Unsuitable videos for the given age are skipped.
 201     min_views:         An integer representing the minimum view count the video
 202                        must have in order to not be skipped.
 203                        Videos without view count information are always
 204                        downloaded. None for no limit.
 205     max_views:         An integer representing the maximum view count.
 206                        Videos that are more popular than that are not
 207                        downloaded.
 208                        Videos without view count information are always
 209                        downloaded. None for no limit.
 210     download_archive:  File name of a file where all downloads are recorded.
 211                        Videos already present in the file are not downloaded
 212                        again.
 213     break_on_existing: Stop the download process after attempting to download a file that's
 214                        in the archive.
 215     cookiefile:        File name where cookies should be read from and dumped to.
 216     nocheckcertificate:Do not verify SSL certificates
 217     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 218                        At the moment, this is only supported by YouTube.
 219     proxy:             URL of the proxy server to use
 220     geo_verification_proxy:  URL of the proxy to use for IP address verification
 221                        on geo-restricted sites.
 222     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 223     bidi_workaround:   Work around buggy terminals without bidirectional text
 224                        support, using fridibi
 225     debug_printtraffic:Print out sent and received HTTP traffic
 226     include_ads:       Download ads as well
 227     default_search:    Prepend this string if an input url is not valid.
 228                        'auto' for elaborate guessing
 229     encoding:          Use this encoding instead of the system-specified.
 230     extract_flat:      Do not resolve URLs, return the immediate result.
 231                        Pass in 'in_playlist' to only show this behavior for
 232                        playlist items.
 233     postprocessors:    A list of dictionaries, each with an entry
 234                        * key:  The name of the postprocessor. See
 235                                youtube_dlc/postprocessor/__init__.py for a list.
 236                        as well as any further keyword arguments for the
 237                        postprocessor.
 238     progress_hooks:    A list of functions that get called on download
 239                        progress, with a dictionary with the entries
 240                        * status: One of "downloading", "error", or "finished".
 241                                  Check this first and ignore unknown values.
 242
 243                        If status is one of "downloading", or "finished", the
 244                        following properties may also be present:
 245                        * filename: The final filename (always present)
 246                        * tmpfilename: The filename we're currently writing to
 247                        * downloaded_bytes: Bytes on disk
 248                        * total_bytes: Size of the whole file, None if unknown
 249                        * total_bytes_estimate: Guess of the eventual file size,
 250                                                None if unavailable.
 251                        * elapsed: The number of seconds since download started.
 252                        * eta: The estimated time in seconds, None if unknown
 253                        * speed: The download speed in bytes/second, None if
 254                                 unknown
 255                        * fragment_index: The counter of the currently
 256                                          downloaded video fragment.
 257                        * fragment_count: The number of fragments (= individual
 258                                          files that will be merged)
 259
 260                        Progress hooks are guaranteed to be called at least once
 261                        (with status "finished") if the download is successful.
 262     merge_output_format: Extension to use when merging formats.
 263     fixup:             Automatically correct known faults of the file.
 264                        One of:
 265                        - "never": do nothing
 266                        - "warn": only emit a warning
 267                        - "detect_or_warn": check whether we can do anything
 268                                            about it, warn otherwise (default)
 269     source_address:    Client-side IP address to bind to.
 270     call_home:         Boolean, true iff we are allowed to contact the
 271                        youtube-dlc servers for debugging.
 272     sleep_interval:    Number of seconds to sleep before each download when
 273                        used alone or a lower bound of a range for randomized
 274                        sleep before each download (minimum possible number
 275                        of seconds to sleep) when used along with
 276                        max_sleep_interval.
 277     max_sleep_interval:Upper bound of a range for randomized sleep before each
 278                        download (maximum possible number of seconds to sleep).
 279                        Must only be used along with sleep_interval.
 280                        Actual sleep time will be a random float from range
 281                        [sleep_interval; max_sleep_interval].
 282     listformats:       Print an overview of available video formats and exit.
 283     list_thumbnails:   Print a table of all thumbnails and exit.
 284     match_filter:      A function that gets called with the info_dict of
 285                        every video.
 286                        If it returns a message, the video is ignored.
 287                        If it returns None, the video is downloaded.
 288                        match_filter_func in utils.py is one example for this.
 289     no_color:          Do not emit color codes in output.
 290     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 291                        HTTP header
 292     geo_bypass_country:
 293                        Two-letter ISO 3166-2 country code that will be used for
 294                        explicit geographic restriction bypassing via faking
 295                        X-Forwarded-For HTTP header
 296     geo_bypass_ip_block:
 297                        IP range in CIDR notation that will be used similarly to
 298                        geo_bypass_country
 299
 300     The following options determine which downloader is picked:
 301     external_downloader: Executable of the external downloader to call.
 302                        None or unset for standard (built-in) downloader.
 303     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 304                        if True, otherwise use ffmpeg/avconv if False, otherwise
 305                        use downloader suggested by extractor if None.
 306
 307     The following parameters are not used by YoutubeDL itself, they are used by
 308     the downloader (see youtube_dlc/downloader/common.py):
 309     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 310     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 311     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 312     http_chunk_size.
 313
 314     The following options are used by the post processors:
 315     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 316                        otherwise prefer ffmpeg.
 317     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 318                        to the binary or its containing directory.
 319     postprocessor_args: A list of additional command-line arguments for the
 320                         postprocessor.
 321
 322     The following options are used by the Youtube extractor:
 323     youtube_include_dash_manifest: If True (default), DASH manifests and related
 324                         data will be downloaded and processed by extractor.
 325                         You can reduce network I/O by disabling it if you don't
 326                         care about DASH.
 327     """
 328
 329     _NUMERIC_FIELDS = set((
 330         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 331         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 332         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 333         'average_rating', 'comment_count', 'age_limit',
 334         'start_time', 'end_time',
 335         'chapter_number', 'season_number', 'episode_number',
 336         'track_number', 'disc_number', 'release_year',
 337         'playlist_index',
 338     ))
 339
 340     params = None
 341     _ies = []
 342     _pps = []
 343     _download_retcode = None
 344     _num_downloads = None
 345     _screen_file = None
 346
 347     def __init__(self, params=None, auto_init=True):
 348         """Create a FileDownloader object with the given options."""
 349         if params is None:
 350             params = {}
 351         self._ies = []
 352         self._ies_instances = {}
 353         self._pps = []
 354         self._progress_hooks = []
 355         self._download_retcode = 0
 356         self._num_downloads = 0
 357         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 358         self._err_file = sys.stderr
 359         self.params = {
 360             # Default parameters
 361             'nocheckcertificate': False,
 362         }
 363         self.params.update(params)
 364         self.cache = Cache(self)
 365         self.archive = set()
 366
 367         """Preload the archive, if any is specified"""
 368         def preload_download_archive(self):
 369             fn = self.params.get('download_archive')
 370             if fn is None:
 371                 return False
 372             try:
 373                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 374                     for line in archive_file:
 375                         self.archive.add(line.strip())
 376             except IOError as ioe:
 377                 if ioe.errno != errno.ENOENT:
 378                     raise
 379                 return False
 380             return True
 381
 382         def check_deprecated(param, option, suggestion):
 383             if self.params.get(param) is not None:
 384                 self.report_warning(
 385                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 386                 return True
 387             return False
 388
 389         if self.params.get('verbose'):
 390             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 391
 392         preload_download_archive(self)
 393
 394         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 395             if self.params.get('geo_verification_proxy') is None:
 396                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 397
 398         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 399         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 400         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 401
 402         if params.get('bidi_workaround', False):
 403             try:
 404                 import pty
 405                 master, slave = pty.openpty()
 406                 width = compat_get_terminal_size().columns
 407                 if width is None:
 408                     width_args = []
 409                 else:
 410                     width_args = ['-w', str(width)]
 411                 sp_kwargs = dict(
 412                     stdin=subprocess.PIPE,
 413                     stdout=slave,
 414                     stderr=self._err_file)
 415                 try:
 416                     self._output_process = subprocess.Popen(
 417                         ['bidiv'] + width_args, **sp_kwargs
 418                     )
 419                 except OSError:
 420                     self._output_process = subprocess.Popen(
 421                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 422                 self._output_channel = os.fdopen(master, 'rb')
 423             except OSError as ose:
 424                 if ose.errno == errno.ENOENT:
 425                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 426                 else:
 427                     raise
 428
 429         if (sys.platform != 'win32'
 430                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 431                 and not params.get('restrictfilenames', False)):
 432             # Unicode filesystem API will throw errors (#1474, #13027)
 433             self.report_warning(
 434                 'Assuming --restrict-filenames since file system encoding '
 435                 'cannot encode all characters. '
 436                 'Set the LC_ALL environment variable to fix this.')
 437             self.params['restrictfilenames'] = True
 438
 439         if isinstance(params.get('outtmpl'), bytes):
 440             self.report_warning(
 441                 'Parameter outtmpl is bytes, but should be a unicode string. '
 442                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 443
 444         self._setup_opener()
 445
 446         if auto_init:
 447             self.print_debug_header()
 448             self.add_default_info_extractors()
 449
 450         for pp_def_raw in self.params.get('postprocessors', []):
 451             pp_class = get_postprocessor(pp_def_raw['key'])
 452             pp_def = dict(pp_def_raw)
 453             del pp_def['key']
 454             pp = pp_class(self, **compat_kwargs(pp_def))
 455             self.add_post_processor(pp)
 456
 457         for ph in self.params.get('progress_hooks', []):
 458             self.add_progress_hook(ph)
 459
 460         register_socks_protocols()
 461
 462     def warn_if_short_id(self, argv):
 463         # short YouTube ID starting with dash?
 464         idxs = [
 465             i for i, a in enumerate(argv)
 466             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 467         if idxs:
 468             correct_argv = (
 469                 ['youtube-dlc']
 470                 + [a for i, a in enumerate(argv) if i not in idxs]
 471                 + ['--'] + [argv[i] for i in idxs]
 472             )
 473             self.report_warning(
 474                 'Long argument string detected. '
 475                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 476                 args_to_str(correct_argv))
 477
 478     def add_info_extractor(self, ie):
 479         """Add an InfoExtractor object to the end of the list."""
 480         self._ies.append(ie)
 481         if not isinstance(ie, type):
 482             self._ies_instances[ie.ie_key()] = ie
 483             ie.set_downloader(self)
 484
 485     def get_info_extractor(self, ie_key):
 486         """
 487         Get an instance of an IE with name ie_key, it will try to get one from
 488         the _ies list, if there's no instance it will create a new one and add
 489         it to the extractor list.
 490         """
 491         ie = self._ies_instances.get(ie_key)
 492         if ie is None:
 493             ie = get_info_extractor(ie_key)()
 494             self.add_info_extractor(ie)
 495         return ie
 496
 497     def add_default_info_extractors(self):
 498         """
 499         Add the InfoExtractors returned by gen_extractors to the end of the list
 500         """
 501         for ie in gen_extractor_classes():
 502             self.add_info_extractor(ie)
 503
 504     def add_post_processor(self, pp):
 505         """Add a PostProcessor object to the end of the chain."""
 506         self._pps.append(pp)
 507         pp.set_downloader(self)
 508
 509     def add_progress_hook(self, ph):
 510         """Add the progress hook (currently only for the file downloader)"""
 511         self._progress_hooks.append(ph)
 512
 513     def _bidi_workaround(self, message):
 514         if not hasattr(self, '_output_channel'):
 515             return message
 516
 517         assert hasattr(self, '_output_process')
 518         assert isinstance(message, compat_str)
 519         line_count = message.count('\n') + 1
 520         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 521         self._output_process.stdin.flush()
 522         res = ''.join(self._output_channel.readline().decode('utf-8')
 523                       for _ in range(line_count))
 524         return res[:-len('\n')]
 525
 526     def to_screen(self, message, skip_eol=False):
 527         """Print message to stdout if not in quiet mode."""
 528         return self.to_stdout(message, skip_eol, check_quiet=True)
 529
 530     def _write_string(self, s, out=None):
 531         write_string(s, out=out, encoding=self.params.get('encoding'))
 532
 533     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 534         """Print message to stdout if not in quiet mode."""
 535         if self.params.get('logger'):
 536             self.params['logger'].debug(message)
 537         elif not check_quiet or not self.params.get('quiet', False):
 538             message = self._bidi_workaround(message)
 539             terminator = ['\n', ''][skip_eol]
 540             output = message + terminator
 541
 542             self._write_string(output, self._screen_file)
 543
 544     def to_stderr(self, message):
 545         """Print message to stderr."""
 546         assert isinstance(message, compat_str)
 547         if self.params.get('logger'):
 548             self.params['logger'].error(message)
 549         else:
 550             message = self._bidi_workaround(message)
 551             output = message + '\n'
 552             self._write_string(output, self._err_file)
 553
 554     def to_console_title(self, message):
 555         if not self.params.get('consoletitle', False):
 556             return
 557         if compat_os_name == 'nt':
 558             if ctypes.windll.kernel32.GetConsoleWindow():
 559                 # c_wchar_p() might not be necessary if `message` is
 560                 # already of type unicode()
 561                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 562         elif 'TERM' in os.environ:
 563             self._write_string('\033]0;%s\007' % message, self._screen_file)
 564
 565     def save_console_title(self):
 566         if not self.params.get('consoletitle', False):
 567             return
 568         if self.params.get('simulate', False):
 569             return
 570         if compat_os_name != 'nt' and 'TERM' in os.environ:
 571             # Save the title on stack
 572             self._write_string('\033[22;0t', self._screen_file)
 573
 574     def restore_console_title(self):
 575         if not self.params.get('consoletitle', False):
 576             return
 577         if self.params.get('simulate', False):
 578             return
 579         if compat_os_name != 'nt' and 'TERM' in os.environ:
 580             # Restore the title from stack
 581             self._write_string('\033[23;0t', self._screen_file)
 582
 583     def __enter__(self):
 584         self.save_console_title()
 585         return self
 586
 587     def __exit__(self, *args):
 588         self.restore_console_title()
 589
 590         if self.params.get('cookiefile') is not None:
 591             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 592
 593     def trouble(self, message=None, tb=None):
 594         """Determine action to take when a download problem appears.
 595
 596         Depending on if the downloader has been configured to ignore
 597         download errors or not, this method may throw an exception or
 598         not when errors are found, after printing the message.
 599
 600         tb, if given, is additional traceback information.
 601         """
 602         if message is not None:
 603             self.to_stderr(message)
 604         if self.params.get('verbose'):
 605             if tb is None:
 606                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 607                     tb = ''
 608                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 609                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 610                     tb += encode_compat_str(traceback.format_exc())
 611                 else:
 612                     tb_data = traceback.format_list(traceback.extract_stack())
 613                     tb = ''.join(tb_data)
 614             self.to_stderr(tb)
 615         if not self.params.get('ignoreerrors', False):
 616             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 617                 exc_info = sys.exc_info()[1].exc_info
 618             else:
 619                 exc_info = sys.exc_info()
 620             raise DownloadError(message, exc_info)
 621         self._download_retcode = 1
 622
 623     def report_warning(self, message):
 624         '''
 625         Print the message to stderr, it will be prefixed with 'WARNING:'
 626         If stderr is a tty file the 'WARNING:' will be colored
 627         '''
 628         if self.params.get('logger') is not None:
 629             self.params['logger'].warning(message)
 630         else:
 631             if self.params.get('no_warnings'):
 632                 return
 633             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 634                 _msg_header = '\033[0;33mWARNING:\033[0m'
 635             else:
 636                 _msg_header = 'WARNING:'
 637             warning_message = '%s %s' % (_msg_header, message)
 638             self.to_stderr(warning_message)
 639
 640     def report_error(self, message, tb=None):
 641         '''
 642         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 643         in red if stderr is a tty file.
 644         '''
 645         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 646             _msg_header = '\033[0;31mERROR:\033[0m'
 647         else:
 648             _msg_header = 'ERROR:'
 649         error_message = '%s %s' % (_msg_header, message)
 650         self.trouble(error_message, tb)
 651
 652     def report_file_already_downloaded(self, file_name):
 653         """Report file has already been fully downloaded."""
 654         try:
 655             self.to_screen('[download] %s has already been downloaded' % file_name)
 656         except UnicodeEncodeError:
 657             self.to_screen('[download] The file has already been downloaded')
 658
 659     def prepare_filename(self, info_dict):
 660         """Generate the output filename."""
 661         try:
 662             template_dict = dict(info_dict)
 663
 664             template_dict['epoch'] = int(time.time())
 665             autonumber_size = self.params.get('autonumber_size')
 666             if autonumber_size is None:
 667                 autonumber_size = 5
 668             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 669             if template_dict.get('resolution') is None:
 670                 if template_dict.get('width') and template_dict.get('height'):
 671                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 672                 elif template_dict.get('height'):
 673                     template_dict['resolution'] = '%sp' % template_dict['height']
 674                 elif template_dict.get('width'):
 675                     template_dict['resolution'] = '%dx?' % template_dict['width']
 676
 677             sanitize = lambda k, v: sanitize_filename(
 678                 compat_str(v),
 679                 restricted=self.params.get('restrictfilenames'),
 680                 is_id=(k == 'id' or k.endswith('_id')))
 681             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 682                                  for k, v in template_dict.items()
 683                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 684             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 685
 686             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 687
 688             # For fields playlist_index and autonumber convert all occurrences
 689             # of %(field)s to %(field)0Nd for backward compatibility
 690             field_size_compat_map = {
 691                 'playlist_index': len(str(template_dict['n_entries'])),
 692                 'autonumber': autonumber_size,
 693             }
 694             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 695             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 696             if mobj:
 697                 outtmpl = re.sub(
 698                     FIELD_SIZE_COMPAT_RE,
 699                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 700                     outtmpl)
 701
 702             # Missing numeric fields used together with integer presentation types
 703             # in format specification will break the argument substitution since
 704             # string 'NA' is returned for missing fields. We will patch output
 705             # template for missing fields to meet string presentation type.
 706             for numeric_field in self._NUMERIC_FIELDS:
 707                 if numeric_field not in template_dict:
 708                     # As of [1] format syntax is:
 709                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 710                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 711                     FORMAT_RE = r'''(?x)
 712                         (?<!%)
 713                         %
 714                         \({0}\)  # mapping key
 715                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 716                         (?:\d+)?  # minimum field width (optional)
 717                         (?:\.\d+)?  # precision (optional)
 718                         [hlL]?  # length modifier (optional)
 719                         [diouxXeEfFgGcrs%]  # conversion type
 720                     '''
 721                     outtmpl = re.sub(
 722                         FORMAT_RE.format(numeric_field),
 723                         r'%({0})s'.format(numeric_field), outtmpl)
 724
 725             # expand_path translates '%%' into '%' and '$$' into '$'
 726             # correspondingly that is not what we want since we need to keep
 727             # '%%' intact for template dict substitution step. Working around
 728             # with boundary-alike separator hack.
 729             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 730             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 731
 732             # outtmpl should be expand_path'ed before template dict substitution
 733             # because meta fields may contain env variables we don't want to
 734             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 735             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 736             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 737
 738             # https://github.com/blackjack4494/youtube-dlc/issues/85
 739             trim_file_name = self.params.get('trim_file_name', False)
 740             if trim_file_name:
 741                 fn_groups = filename.rsplit('.')
 742                 ext = fn_groups[-1]
 743                 sub_ext = ''
 744                 if len(fn_groups) > 2:
 745                     sub_ext = fn_groups[-2]
 746                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 747
 748             # Temporary fix for #4787
 749             # 'Treat' all problem characters by passing filename through preferredencoding
 750             # to workaround encoding issues with subprocess on python2 @ Windows
 751             if sys.version_info < (3, 0) and sys.platform == 'win32':
 752                 filename = encodeFilename(filename, True).decode(preferredencoding())
 753             return sanitize_path(filename)
 754         except ValueError as err:
 755             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 756             return None
 757
 758     def _match_entry(self, info_dict, incomplete):
 759         """ Returns None if the file should be downloaded """
 760
 761         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 762         if 'title' in info_dict:
 763             # This can happen when we're just evaluating the playlist
 764             title = info_dict['title']
 765             matchtitle = self.params.get('matchtitle', False)
 766             if matchtitle:
 767                 if not re.search(matchtitle, title, re.IGNORECASE):
 768                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 769             rejecttitle = self.params.get('rejecttitle', False)
 770             if rejecttitle:
 771                 if re.search(rejecttitle, title, re.IGNORECASE):
 772                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 773         date = info_dict.get('upload_date')
 774         if date is not None:
 775             dateRange = self.params.get('daterange', DateRange())
 776             if date not in dateRange:
 777                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 778         view_count = info_dict.get('view_count')
 779         if view_count is not None:
 780             min_views = self.params.get('min_views')
 781             if min_views is not None and view_count < min_views:
 782                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 783             max_views = self.params.get('max_views')
 784             if max_views is not None and view_count > max_views:
 785                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 786         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 787             return 'Skipping "%s" because it is age restricted' % video_title
 788         if self.in_download_archive(info_dict):
 789             return '%s has already been recorded in archive' % video_title
 790
 791         if not incomplete:
 792             match_filter = self.params.get('match_filter')
 793             if match_filter is not None:
 794                 ret = match_filter(info_dict)
 795                 if ret is not None:
 796                     return ret
 797
 798         return None
 799
 800     @staticmethod
 801     def add_extra_info(info_dict, extra_info):
 802         '''Set the keys from extra_info in info dict if they are missing'''
 803         for key, value in extra_info.items():
 804             info_dict.setdefault(key, value)
 805
 806     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 807                      process=True, force_generic_extractor=False):
 808         '''
 809         Returns a list with a dictionary for each video we find.
 810         If 'download', also downloads the videos.
 811         extra_info is a dict containing the extra values to add to each result
 812         '''
 813
 814         if not ie_key and force_generic_extractor:
 815             ie_key = 'Generic'
 816
 817         if ie_key:
 818             ies = [self.get_info_extractor(ie_key)]
 819         else:
 820             ies = self._ies
 821
 822         for ie in ies:
 823             if not ie.suitable(url):
 824                 continue
 825
 826             ie_key = ie.ie_key()
 827             ie = self.get_info_extractor(ie_key)
 828             if not ie.working():
 829                 self.report_warning('The program functionality for this site has been marked as broken, '
 830                                     'and will probably not work.')
 831
 832             try:
 833                 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
 834             except (AssertionError, IndexError, AttributeError):
 835                 temp_id = None
 836             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
 837                 self.to_screen("[%s] %s: has already been recorded in archive" % (
 838                                ie_key, temp_id))
 839                 break
 840
 841             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
 842
 843         else:
 844             self.report_error('no suitable InfoExtractor for URL %s' % url)
 845
 846     def __handle_extraction_exceptions(func):
 847         def wrapper(self, *args, **kwargs):
 848             try:
 849                 return func(self, *args, **kwargs)
 850             except GeoRestrictedError as e:
 851                 msg = e.msg
 852                 if e.countries:
 853                     msg += '\nThis video is available in %s.' % ', '.join(
 854                         map(ISO3166Utils.short2full, e.countries))
 855                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 856                 self.report_error(msg)
 857             except ExtractorError as e:  # An error we somewhat expected
 858                 self.report_error(compat_str(e), e.format_traceback())
 859             except MaxDownloadsReached:
 860                 raise
 861             except Exception as e:
 862                 if self.params.get('ignoreerrors', False):
 863                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 864                 else:
 865                     raise
 866         return wrapper
 867
 868     @__handle_extraction_exceptions
 869     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
 870         ie_result = ie.extract(url)
 871         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 872             return
 873         if isinstance(ie_result, list):
 874             # Backwards compatibility: old IE result format
 875             ie_result = {
 876                 '_type': 'compat_list',
 877                 'entries': ie_result,
 878             }
 879         if info_dict:
 880             if info_dict.get('id'):
 881                 ie_result['id'] = info_dict['id']
 882             if info_dict.get('title'):
 883                 ie_result['title'] = info_dict['title']
 884         self.add_default_extra_info(ie_result, ie, url)
 885         if process:
 886             return self.process_ie_result(ie_result, download, extra_info)
 887         else:
 888             return ie_result
 889
 890     def add_default_extra_info(self, ie_result, ie, url):
 891         self.add_extra_info(ie_result, {
 892             'extractor': ie.IE_NAME,
 893             'webpage_url': url,
 894             'webpage_url_basename': url_basename(url),
 895             'extractor_key': ie.ie_key(),
 896         })
 897
 898     def process_ie_result(self, ie_result, download=True, extra_info={}):
 899         """
 900         Take the result of the ie(may be modified) and resolve all unresolved
 901         references (URLs, playlist items).
 902
 903         It will also download the videos if 'download'.
 904         Returns the resolved ie_result.
 905         """
 906         result_type = ie_result.get('_type', 'video')
 907
 908         if result_type in ('url', 'url_transparent'):
 909             ie_result['url'] = sanitize_url(ie_result['url'])
 910             extract_flat = self.params.get('extract_flat', False)
 911             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
 912                     or extract_flat is True):
 913                 self.__forced_printings(
 914                     ie_result, self.prepare_filename(ie_result),
 915                     incomplete=True)
 916                 return ie_result
 917
 918         if result_type == 'video':
 919             self.add_extra_info(ie_result, extra_info)
 920             return self.process_video_result(ie_result, download=download)
 921         elif result_type == 'url':
 922             # We have to add extra_info to the results because it may be
 923             # contained in a playlist
 924             return self.extract_info(ie_result['url'],
 925                                      download, info_dict=ie_result,
 926                                      ie_key=ie_result.get('ie_key'),
 927                                      extra_info=extra_info)
 928         elif result_type == 'url_transparent':
 929             # Use the information from the embedding page
 930             info = self.extract_info(
 931                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 932                 extra_info=extra_info, download=False, process=False)
 933
 934             # extract_info may return None when ignoreerrors is enabled and
 935             # extraction failed with an error, don't crash and return early
 936             # in this case
 937             if not info:
 938                 return info
 939
 940             force_properties = dict(
 941                 (k, v) for k, v in ie_result.items() if v is not None)
 942             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
 943                 if f in force_properties:
 944                     del force_properties[f]
 945             new_result = info.copy()
 946             new_result.update(force_properties)
 947
 948             # Extracted info may not be a video result (i.e.
 949             # info.get('_type', 'video') != video) but rather an url or
 950             # url_transparent. In such cases outer metadata (from ie_result)
 951             # should be propagated to inner one (info). For this to happen
 952             # _type of info should be overridden with url_transparent. This
 953             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
 954             if new_result.get('_type') == 'url':
 955                 new_result['_type'] = 'url_transparent'
 956
 957             return self.process_ie_result(
 958                 new_result, download=download, extra_info=extra_info)
 959         elif result_type in ('playlist', 'multi_video'):
 960             # We process each entry in the playlist
 961             playlist = ie_result.get('title') or ie_result.get('id')
 962             self.to_screen('[download] Downloading playlist: %s' % playlist)
 963
 964             playlist_results = []
 965
 966             playliststart = self.params.get('playliststart', 1) - 1
 967             playlistend = self.params.get('playlistend')
 968             # For backwards compatibility, interpret -1 as whole list
 969             if playlistend == -1:
 970                 playlistend = None
 971
 972             playlistitems_str = self.params.get('playlist_items')
 973             playlistitems = None
 974             if playlistitems_str is not None:
 975                 def iter_playlistitems(format):
 976                     for string_segment in format.split(','):
 977                         if '-' in string_segment:
 978                             start, end = string_segment.split('-')
 979                             for item in range(int(start), int(end) + 1):
 980                                 yield int(item)
 981                         else:
 982                             yield int(string_segment)
 983                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
 984
 985             ie_entries = ie_result['entries']
 986
 987             def make_playlistitems_entries(list_ie_entries):
 988                 num_entries = len(list_ie_entries)
 989                 return [
 990                     list_ie_entries[i - 1] for i in playlistitems
 991                     if -num_entries <= i - 1 < num_entries]
 992
 993             def report_download(num_entries):
 994                 self.to_screen(
 995                     '[%s] playlist %s: Downloading %d videos' %
 996                     (ie_result['extractor'], playlist, num_entries))
 997
 998             if isinstance(ie_entries, list):
 999                 n_all_entries = len(ie_entries)
1000                 if playlistitems:
1001                     entries = make_playlistitems_entries(ie_entries)
1002                 else:
1003                     entries = ie_entries[playliststart:playlistend]
1004                 n_entries = len(entries)
1005                 self.to_screen(
1006                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1007                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
1008             elif isinstance(ie_entries, PagedList):
1009                 if playlistitems:
1010                     entries = []
1011                     for item in playlistitems:
1012                         entries.extend(ie_entries.getslice(
1013                             item - 1, item
1014                         ))
1015                 else:
1016                     entries = ie_entries.getslice(
1017                         playliststart, playlistend)
1018                 n_entries = len(entries)
1019                 report_download(n_entries)
1020             else:  # iterable
1021                 if playlistitems:
1022                     entries = make_playlistitems_entries(list(itertools.islice(
1023                         ie_entries, 0, max(playlistitems))))
1024                 else:
1025                     entries = list(itertools.islice(
1026                         ie_entries, playliststart, playlistend))
1027                 n_entries = len(entries)
1028                 report_download(n_entries)
1029
1030             if self.params.get('playlistreverse', False):
1031                 entries = entries[::-1]
1032
1033             if self.params.get('playlistrandom', False):
1034                 random.shuffle(entries)
1035
1036             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1037
1038             for i, entry in enumerate(entries, 1):
1039                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1040                 # This __x_forwarded_for_ip thing is a bit ugly but requires
1041                 # minimal changes
1042                 if x_forwarded_for:
1043                     entry['__x_forwarded_for_ip'] = x_forwarded_for
1044                 extra = {
1045                     'n_entries': n_entries,
1046                     'playlist': playlist,
1047                     'playlist_id': ie_result.get('id'),
1048                     'playlist_title': ie_result.get('title'),
1049                     'playlist_uploader': ie_result.get('uploader'),
1050                     'playlist_uploader_id': ie_result.get('uploader_id'),
1051                     'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1052                     'extractor': ie_result['extractor'],
1053                     'webpage_url': ie_result['webpage_url'],
1054                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1055                     'extractor_key': ie_result['extractor_key'],
1056                 }
1057
1058                 reason = self._match_entry(entry, incomplete=True)
1059                 if reason is not None:
1060                     if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
1061                         print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1062                         break
1063                     else:
1064                         self.to_screen('[download] ' + reason)
1065                         continue
1066
1067                 entry_result = self.__process_iterable_entry(entry, download, extra)
1068                 # TODO: skip failed (empty) entries?
1069                 playlist_results.append(entry_result)
1070             ie_result['entries'] = playlist_results
1071             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1072             return ie_result
1073         elif result_type == 'compat_list':
1074             self.report_warning(
1075                 'Extractor %s returned a compat_list result. '
1076                 'It needs to be updated.' % ie_result.get('extractor'))
1077
1078             def _fixup(r):
1079                 self.add_extra_info(
1080                     r,
1081                     {
1082                         'extractor': ie_result['extractor'],
1083                         'webpage_url': ie_result['webpage_url'],
1084                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1085                         'extractor_key': ie_result['extractor_key'],
1086                     }
1087                 )
1088                 return r
1089             ie_result['entries'] = [
1090                 self.process_ie_result(_fixup(r), download, extra_info)
1091                 for r in ie_result['entries']
1092             ]
1093             return ie_result
1094         else:
1095             raise Exception('Invalid result type: %s' % result_type)
1096
1097     @__handle_extraction_exceptions
1098     def __process_iterable_entry(self, entry, download, extra_info):
1099         return self.process_ie_result(
1100             entry, download=download, extra_info=extra_info)
1101
1102     def _build_format_filter(self, filter_spec):
1103         " Returns a function to filter the formats according to the filter_spec "
1104
1105         OPERATORS = {
1106             '<': operator.lt,
1107             '<=': operator.le,
1108             '>': operator.gt,
1109             '>=': operator.ge,
1110             '=': operator.eq,
1111             '!=': operator.ne,
1112         }
1113         operator_rex = re.compile(r'''(?x)\s*
1114             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1115             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1116             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1117             $
1118             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1119         m = operator_rex.search(filter_spec)
1120         if m:
1121             try:
1122                 comparison_value = int(m.group('value'))
1123             except ValueError:
1124                 comparison_value = parse_filesize(m.group('value'))
1125                 if comparison_value is None:
1126                     comparison_value = parse_filesize(m.group('value') + 'B')
1127                 if comparison_value is None:
1128                     raise ValueError(
1129                         'Invalid value %r in format specification %r' % (
1130                             m.group('value'), filter_spec))
1131             op = OPERATORS[m.group('op')]
1132
1133         if not m:
1134             STR_OPERATORS = {
1135                 '=': operator.eq,
1136                 '^=': lambda attr, value: attr.startswith(value),
1137                 '$=': lambda attr, value: attr.endswith(value),
1138                 '*=': lambda attr, value: value in attr,
1139             }
1140             str_operator_rex = re.compile(r'''(?x)
1141                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1142                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1143                 \s*(?P<value>[a-zA-Z0-9._-]+)
1144                 \s*$
1145                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1146             m = str_operator_rex.search(filter_spec)
1147             if m:
1148                 comparison_value = m.group('value')
1149                 str_op = STR_OPERATORS[m.group('op')]
1150                 if m.group('negation'):
1151                     op = lambda attr, value: not str_op(attr, value)
1152                 else:
1153                     op = str_op
1154
1155         if not m:
1156             raise ValueError('Invalid filter specification %r' % filter_spec)
1157
1158         def _filter(f):
1159             actual_value = f.get(m.group('key'))
1160             if actual_value is None:
1161                 return m.group('none_inclusive')
1162             return op(actual_value, comparison_value)
1163         return _filter
1164
1165     def _default_format_spec(self, info_dict, download=True):
1166
1167         def can_merge():
1168             merger = FFmpegMergerPP(self)
1169             return merger.available and merger.can_merge()
1170
1171         def prefer_best():
1172             if self.params.get('simulate', False):
1173                 return False
1174             if not download:
1175                 return False
1176             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1177                 return True
1178             if info_dict.get('is_live'):
1179                 return True
1180             if not can_merge():
1181                 return True
1182             return False
1183
1184         req_format_list = ['bestvideo+bestaudio', 'best']
1185         if prefer_best():
1186             req_format_list.reverse()
1187         return '/'.join(req_format_list)
1188
1189     def build_format_selector(self, format_spec):
1190         def syntax_error(note, start):
1191             message = (
1192                 'Invalid format specification: '
1193                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1194             return SyntaxError(message)
1195
1196         PICKFIRST = 'PICKFIRST'
1197         MERGE = 'MERGE'
1198         SINGLE = 'SINGLE'
1199         GROUP = 'GROUP'
1200         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1201
1202         def _parse_filter(tokens):
1203             filter_parts = []
1204             for type, string, start, _, _ in tokens:
1205                 if type == tokenize.OP and string == ']':
1206                     return ''.join(filter_parts)
1207                 else:
1208                     filter_parts.append(string)
1209
1210         def _remove_unused_ops(tokens):
1211             # Remove operators that we don't use and join them with the surrounding strings
1212             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1213             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1214             last_string, last_start, last_end, last_line = None, None, None, None
1215             for type, string, start, end, line in tokens:
1216                 if type == tokenize.OP and string == '[':
1217                     if last_string:
1218                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1219                         last_string = None
1220                     yield type, string, start, end, line
1221                     # everything inside brackets will be handled by _parse_filter
1222                     for type, string, start, end, line in tokens:
1223                         yield type, string, start, end, line
1224                         if type == tokenize.OP and string == ']':
1225                             break
1226                 elif type == tokenize.OP and string in ALLOWED_OPS:
1227                     if last_string:
1228                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1229                         last_string = None
1230                     yield type, string, start, end, line
1231                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1232                     if not last_string:
1233                         last_string = string
1234                         last_start = start
1235                         last_end = end
1236                     else:
1237                         last_string += string
1238             if last_string:
1239                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1240
1241         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1242             selectors = []
1243             current_selector = None
1244             for type, string, start, _, _ in tokens:
1245                 # ENCODING is only defined in python 3.x
1246                 if type == getattr(tokenize, 'ENCODING', None):
1247                     continue
1248                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1249                     current_selector = FormatSelector(SINGLE, string, [])
1250                 elif type == tokenize.OP:
1251                     if string == ')':
1252                         if not inside_group:
1253                             # ')' will be handled by the parentheses group
1254                             tokens.restore_last_token()
1255                         break
1256                     elif inside_merge and string in ['/', ',']:
1257                         tokens.restore_last_token()
1258                         break
1259                     elif inside_choice and string == ',':
1260                         tokens.restore_last_token()
1261                         break
1262                     elif string == ',':
1263                         if not current_selector:
1264                             raise syntax_error('"," must follow a format selector', start)
1265                         selectors.append(current_selector)
1266                         current_selector = None
1267                     elif string == '/':
1268                         if not current_selector:
1269                             raise syntax_error('"/" must follow a format selector', start)
1270                         first_choice = current_selector
1271                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1272                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1273                     elif string == '[':
1274                         if not current_selector:
1275                             current_selector = FormatSelector(SINGLE, 'best', [])
1276                         format_filter = _parse_filter(tokens)
1277                         current_selector.filters.append(format_filter)
1278                     elif string == '(':
1279                         if current_selector:
1280                             raise syntax_error('Unexpected "("', start)
1281                         group = _parse_format_selection(tokens, inside_group=True)
1282                         current_selector = FormatSelector(GROUP, group, [])
1283                     elif string == '+':
1284                         if not current_selector:
1285                             raise syntax_error('Unexpected "+"', start)
1286                         selector_1 = current_selector
1287                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1288                         if not selector_2:
1289                             raise syntax_error('Expected a selector', start)
1290                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1291                     else:
1292                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1293                 elif type == tokenize.ENDMARKER:
1294                     break
1295             if current_selector:
1296                 selectors.append(current_selector)
1297             return selectors
1298
1299         def _build_selector_function(selector):
1300             if isinstance(selector, list):
1301                 fs = [_build_selector_function(s) for s in selector]
1302
1303                 def selector_function(ctx):
1304                     for f in fs:
1305                         for format in f(ctx):
1306                             yield format
1307                 return selector_function
1308             elif selector.type == GROUP:
1309                 selector_function = _build_selector_function(selector.selector)
1310             elif selector.type == PICKFIRST:
1311                 fs = [_build_selector_function(s) for s in selector.selector]
1312
1313                 def selector_function(ctx):
1314                     for f in fs:
1315                         picked_formats = list(f(ctx))
1316                         if picked_formats:
1317                             return picked_formats
1318                     return []
1319             elif selector.type == SINGLE:
1320                 format_spec = selector.selector
1321
1322                 def selector_function(ctx):
1323                     formats = list(ctx['formats'])
1324                     if not formats:
1325                         return
1326                     if format_spec == 'all':
1327                         for f in formats:
1328                             yield f
1329                     elif format_spec in ['best', 'worst', None]:
1330                         format_idx = 0 if format_spec == 'worst' else -1
1331                         audiovideo_formats = [
1332                             f for f in formats
1333                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1334                         if audiovideo_formats:
1335                             yield audiovideo_formats[format_idx]
1336                         # for extractors with incomplete formats (audio only (soundcloud)
1337                         # or video only (imgur)) we will fallback to best/worst
1338                         # {video,audio}-only format
1339                         elif ctx['incomplete_formats']:
1340                             yield formats[format_idx]
1341                     elif format_spec == 'bestaudio':
1342                         audio_formats = [
1343                             f for f in formats
1344                             if f.get('vcodec') == 'none']
1345                         if audio_formats:
1346                             yield audio_formats[-1]
1347                     elif format_spec == 'worstaudio':
1348                         audio_formats = [
1349                             f for f in formats
1350                             if f.get('vcodec') == 'none']
1351                         if audio_formats:
1352                             yield audio_formats[0]
1353                     elif format_spec == 'bestvideo':
1354                         video_formats = [
1355                             f for f in formats
1356                             if f.get('acodec') == 'none']
1357                         if video_formats:
1358                             yield video_formats[-1]
1359                     elif format_spec == 'worstvideo':
1360                         video_formats = [
1361                             f for f in formats
1362                             if f.get('acodec') == 'none']
1363                         if video_formats:
1364                             yield video_formats[0]
1365                     else:
1366                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1367                         if format_spec in extensions:
1368                             filter_f = lambda f: f['ext'] == format_spec
1369                         else:
1370                             filter_f = lambda f: f['format_id'] == format_spec
1371                         matches = list(filter(filter_f, formats))
1372                         if matches:
1373                             yield matches[-1]
1374             elif selector.type == MERGE:
1375                 def _merge(formats_pair):
1376                     format_1, format_2 = formats_pair
1377
1378                     formats_info = []
1379                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1380                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1381
1382                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1383                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1384
1385                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1386                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1387
1388                     output_ext = self.params.get('merge_output_format')
1389                     if not output_ext:
1390                         if the_only_video:
1391                             output_ext = the_only_video['ext']
1392                         elif the_only_audio and not video_fmts:
1393                             output_ext = the_only_audio['ext']
1394                         else:
1395                             output_ext = 'mkv'
1396
1397                     new_dict = {
1398                         'requested_formats': formats_info,
1399                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1400                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1401                         'ext': output_ext,
1402                     }
1403
1404                     if the_only_video:
1405                         new_dict.update({
1406                             'width': the_only_video.get('width'),
1407                             'height': the_only_video.get('height'),
1408                             'resolution': the_only_video.get('resolution'),
1409                             'fps': the_only_video.get('fps'),
1410                             'vcodec': the_only_video.get('vcodec'),
1411                             'vbr': the_only_video.get('vbr'),
1412                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1413                         })
1414
1415                     if the_only_audio:
1416                         new_dict.update({
1417                             'acodec': the_only_audio.get('acodec'),
1418                             'abr': the_only_audio.get('abr'),
1419                         })
1420
1421                     return new_dict
1422
1423                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1424
1425                 def selector_function(ctx):
1426                     for pair in itertools.product(
1427                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1428                         yield _merge(pair)
1429
1430             filters = [self._build_format_filter(f) for f in selector.filters]
1431
1432             def final_selector(ctx):
1433                 ctx_copy = copy.deepcopy(ctx)
1434                 for _filter in filters:
1435                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1436                 return selector_function(ctx_copy)
1437             return final_selector
1438
1439         stream = io.BytesIO(format_spec.encode('utf-8'))
1440         try:
1441             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1442         except tokenize.TokenError:
1443             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1444
1445         class TokenIterator(object):
1446             def __init__(self, tokens):
1447                 self.tokens = tokens
1448                 self.counter = 0
1449
1450             def __iter__(self):
1451                 return self
1452
1453             def __next__(self):
1454                 if self.counter >= len(self.tokens):
1455                     raise StopIteration()
1456                 value = self.tokens[self.counter]
1457                 self.counter += 1
1458                 return value
1459
1460             next = __next__
1461
1462             def restore_last_token(self):
1463                 self.counter -= 1
1464
1465         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1466         return _build_selector_function(parsed_selector)
1467
1468     def _calc_headers(self, info_dict):
1469         res = std_headers.copy()
1470
1471         add_headers = info_dict.get('http_headers')
1472         if add_headers:
1473             res.update(add_headers)
1474
1475         cookies = self._calc_cookies(info_dict)
1476         if cookies:
1477             res['Cookie'] = cookies
1478
1479         if 'X-Forwarded-For' not in res:
1480             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1481             if x_forwarded_for_ip:
1482                 res['X-Forwarded-For'] = x_forwarded_for_ip
1483
1484         return res
1485
1486     def _calc_cookies(self, info_dict):
1487         pr = sanitized_Request(info_dict['url'])
1488         self.cookiejar.add_cookie_header(pr)
1489         return pr.get_header('Cookie')
1490
1491     def process_video_result(self, info_dict, download=True):
1492         assert info_dict.get('_type', 'video') == 'video'
1493
1494         if 'id' not in info_dict:
1495             raise ExtractorError('Missing "id" field in extractor result')
1496         if 'title' not in info_dict:
1497             raise ExtractorError('Missing "title" field in extractor result')
1498
1499         def report_force_conversion(field, field_not, conversion):
1500             self.report_warning(
1501                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1502                 % (field, field_not, conversion))
1503
1504         def sanitize_string_field(info, string_field):
1505             field = info.get(string_field)
1506             if field is None or isinstance(field, compat_str):
1507                 return
1508             report_force_conversion(string_field, 'a string', 'string')
1509             info[string_field] = compat_str(field)
1510
1511         def sanitize_numeric_fields(info):
1512             for numeric_field in self._NUMERIC_FIELDS:
1513                 field = info.get(numeric_field)
1514                 if field is None or isinstance(field, compat_numeric_types):
1515                     continue
1516                 report_force_conversion(numeric_field, 'numeric', 'int')
1517                 info[numeric_field] = int_or_none(field)
1518
1519         sanitize_string_field(info_dict, 'id')
1520         sanitize_numeric_fields(info_dict)
1521
1522         if 'playlist' not in info_dict:
1523             # It isn't part of a playlist
1524             info_dict['playlist'] = None
1525             info_dict['playlist_index'] = None
1526
1527         thumbnails = info_dict.get('thumbnails')
1528         if thumbnails is None:
1529             thumbnail = info_dict.get('thumbnail')
1530             if thumbnail:
1531                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1532         if thumbnails:
1533             thumbnails.sort(key=lambda t: (
1534                 t.get('preference') if t.get('preference') is not None else -1,
1535                 t.get('width') if t.get('width') is not None else -1,
1536                 t.get('height') if t.get('height') is not None else -1,
1537                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1538             for i, t in enumerate(thumbnails):
1539                 t['url'] = sanitize_url(t['url'])
1540                 if t.get('width') and t.get('height'):
1541                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1542                 if t.get('id') is None:
1543                     t['id'] = '%d' % i
1544
1545         if self.params.get('list_thumbnails'):
1546             self.list_thumbnails(info_dict)
1547             return
1548
1549         thumbnail = info_dict.get('thumbnail')
1550         if thumbnail:
1551             info_dict['thumbnail'] = sanitize_url(thumbnail)
1552         elif thumbnails:
1553             info_dict['thumbnail'] = thumbnails[-1]['url']
1554
1555         if 'display_id' not in info_dict and 'id' in info_dict:
1556             info_dict['display_id'] = info_dict['id']
1557
1558         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1559             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1560             # see http://bugs.python.org/issue1646728)
1561             try:
1562                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1563                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1564             except (ValueError, OverflowError, OSError):
1565                 pass
1566
1567         # Auto generate title fields corresponding to the *_number fields when missing
1568         # in order to always have clean titles. This is very common for TV series.
1569         for field in ('chapter', 'season', 'episode'):
1570             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1571                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1572
1573         for cc_kind in ('subtitles', 'automatic_captions'):
1574             cc = info_dict.get(cc_kind)
1575             if cc:
1576                 for _, subtitle in cc.items():
1577                     for subtitle_format in subtitle:
1578                         if subtitle_format.get('url'):
1579                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1580                         if subtitle_format.get('ext') is None:
1581                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1582
1583         automatic_captions = info_dict.get('automatic_captions')
1584         subtitles = info_dict.get('subtitles')
1585
1586         if self.params.get('listsubtitles', False):
1587             if 'automatic_captions' in info_dict:
1588                 self.list_subtitles(
1589                     info_dict['id'], automatic_captions, 'automatic captions')
1590             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1591             return
1592
1593         info_dict['requested_subtitles'] = self.process_subtitles(
1594             info_dict['id'], subtitles, automatic_captions)
1595
1596         # We now pick which formats have to be downloaded
1597         if info_dict.get('formats') is None:
1598             # There's only one format available
1599             formats = [info_dict]
1600         else:
1601             formats = info_dict['formats']
1602
1603         if not formats:
1604             raise ExtractorError('No video formats found!')
1605
1606         def is_wellformed(f):
1607             url = f.get('url')
1608             if not url:
1609                 self.report_warning(
1610                     '"url" field is missing or empty - skipping format, '
1611                     'there is an error in extractor')
1612                 return False
1613             if isinstance(url, bytes):
1614                 sanitize_string_field(f, 'url')
1615             return True
1616
1617         # Filter out malformed formats for better extraction robustness
1618         formats = list(filter(is_wellformed, formats))
1619
1620         formats_dict = {}
1621
1622         # We check that all the formats have the format and format_id fields
1623         for i, format in enumerate(formats):
1624             sanitize_string_field(format, 'format_id')
1625             sanitize_numeric_fields(format)
1626             format['url'] = sanitize_url(format['url'])
1627             if not format.get('format_id'):
1628                 format['format_id'] = compat_str(i)
1629             else:
1630                 # Sanitize format_id from characters used in format selector expression
1631                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1632             format_id = format['format_id']
1633             if format_id not in formats_dict:
1634                 formats_dict[format_id] = []
1635             formats_dict[format_id].append(format)
1636
1637         # Make sure all formats have unique format_id
1638         for format_id, ambiguous_formats in formats_dict.items():
1639             if len(ambiguous_formats) > 1:
1640                 for i, format in enumerate(ambiguous_formats):
1641                     format['format_id'] = '%s-%d' % (format_id, i)
1642
1643         for i, format in enumerate(formats):
1644             if format.get('format') is None:
1645                 format['format'] = '{id} - {res}{note}'.format(
1646                     id=format['format_id'],
1647                     res=self.format_resolution(format),
1648                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1649                 )
1650             # Automatically determine file extension if missing
1651             if format.get('ext') is None:
1652                 format['ext'] = determine_ext(format['url']).lower()
1653             # Automatically determine protocol if missing (useful for format
1654             # selection purposes)
1655             if format.get('protocol') is None:
1656                 format['protocol'] = determine_protocol(format)
1657             # Add HTTP headers, so that external programs can use them from the
1658             # json output
1659             full_format_info = info_dict.copy()
1660             full_format_info.update(format)
1661             format['http_headers'] = self._calc_headers(full_format_info)
1662         # Remove private housekeeping stuff
1663         if '__x_forwarded_for_ip' in info_dict:
1664             del info_dict['__x_forwarded_for_ip']
1665
1666         # TODO Central sorting goes here
1667
1668         if formats[0] is not info_dict:
1669             # only set the 'formats' fields if the original info_dict list them
1670             # otherwise we end up with a circular reference, the first (and unique)
1671             # element in the 'formats' field in info_dict is info_dict itself,
1672             # which can't be exported to json
1673             info_dict['formats'] = formats
1674         if self.params.get('listformats'):
1675             self.list_formats(info_dict)
1676             return
1677
1678         req_format = self.params.get('format')
1679         if req_format is None:
1680             req_format = self._default_format_spec(info_dict, download=download)
1681             if self.params.get('verbose'):
1682                 self.to_stdout('[debug] Default format spec: %s' % req_format)
1683
1684         format_selector = self.build_format_selector(req_format)
1685
1686         # While in format selection we may need to have an access to the original
1687         # format set in order to calculate some metrics or do some processing.
1688         # For now we need to be able to guess whether original formats provided
1689         # by extractor are incomplete or not (i.e. whether extractor provides only
1690         # video-only or audio-only formats) for proper formats selection for
1691         # extractors with such incomplete formats (see
1692         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1693         # Since formats may be filtered during format selection and may not match
1694         # the original formats the results may be incorrect. Thus original formats
1695         # or pre-calculated metrics should be passed to format selection routines
1696         # as well.
1697         # We will pass a context object containing all necessary additional data
1698         # instead of just formats.
1699         # This fixes incorrect format selection issue (see
1700         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1701         incomplete_formats = (
1702             # All formats are video-only or
1703             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1704             # all formats are audio-only
1705             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1706
1707         ctx = {
1708             'formats': formats,
1709             'incomplete_formats': incomplete_formats,
1710         }
1711
1712         formats_to_download = list(format_selector(ctx))
1713         if not formats_to_download:
1714             raise ExtractorError('requested format not available',
1715                                  expected=True)
1716
1717         if download:
1718             if len(formats_to_download) > 1:
1719                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1720             for format in formats_to_download:
1721                 new_info = dict(info_dict)
1722                 new_info.update(format)
1723                 self.process_info(new_info)
1724         # We update the info dict with the best quality format (backwards compatibility)
1725         info_dict.update(formats_to_download[-1])
1726         return info_dict
1727
1728     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1729         """Select the requested subtitles and their format"""
1730         available_subs = {}
1731         if normal_subtitles and self.params.get('writesubtitles'):
1732             available_subs.update(normal_subtitles)
1733         if automatic_captions and self.params.get('writeautomaticsub'):
1734             for lang, cap_info in automatic_captions.items():
1735                 if lang not in available_subs:
1736                     available_subs[lang] = cap_info
1737
1738         if (not self.params.get('writesubtitles') and not
1739                 self.params.get('writeautomaticsub') or not
1740                 available_subs):
1741             return None
1742
1743         if self.params.get('allsubtitles', False):
1744             requested_langs = available_subs.keys()
1745         else:
1746             if self.params.get('subtitleslangs', False):
1747                 requested_langs = self.params.get('subtitleslangs')
1748             elif 'en' in available_subs:
1749                 requested_langs = ['en']
1750             else:
1751                 requested_langs = [list(available_subs.keys())[0]]
1752
1753         formats_query = self.params.get('subtitlesformat', 'best')
1754         formats_preference = formats_query.split('/') if formats_query else []
1755         subs = {}
1756         for lang in requested_langs:
1757             formats = available_subs.get(lang)
1758             if formats is None:
1759                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1760                 continue
1761             for ext in formats_preference:
1762                 if ext == 'best':
1763                     f = formats[-1]
1764                     break
1765                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1766                 if matches:
1767                     f = matches[-1]
1768                     break
1769             else:
1770                 f = formats[-1]
1771                 self.report_warning(
1772                     'No subtitle format found matching "%s" for language %s, '
1773                     'using %s' % (formats_query, lang, f['ext']))
1774             subs[lang] = f
1775         return subs
1776
1777     def __forced_printings(self, info_dict, filename, incomplete):
1778         def print_mandatory(field):
1779             if (self.params.get('force%s' % field, False)
1780                     and (not incomplete or info_dict.get(field) is not None)):
1781                 self.to_stdout(info_dict[field])
1782
1783         def print_optional(field):
1784             if (self.params.get('force%s' % field, False)
1785                     and info_dict.get(field) is not None):
1786                 self.to_stdout(info_dict[field])
1787
1788         print_mandatory('title')
1789         print_mandatory('id')
1790         if self.params.get('forceurl', False) and not incomplete:
1791             if info_dict.get('requested_formats') is not None:
1792                 for f in info_dict['requested_formats']:
1793                     self.to_stdout(f['url'] + f.get('play_path', ''))
1794             else:
1795                 # For RTMP URLs, also include the playpath
1796                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1797         print_optional('thumbnail')
1798         print_optional('description')
1799         if self.params.get('forcefilename', False) and filename is not None:
1800             self.to_stdout(filename)
1801         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1802             self.to_stdout(formatSeconds(info_dict['duration']))
1803         print_mandatory('format')
1804         if self.params.get('forcejson', False):
1805             self.to_stdout(json.dumps(info_dict))
1806
1807     def process_info(self, info_dict):
1808         """Process a single resolved IE result."""
1809
1810         assert info_dict.get('_type', 'video') == 'video'
1811
1812         max_downloads = self.params.get('max_downloads')
1813         if max_downloads is not None:
1814             if self._num_downloads >= int(max_downloads):
1815                 raise MaxDownloadsReached()
1816
1817         # TODO: backward compatibility, to be removed
1818         info_dict['fulltitle'] = info_dict['title']
1819
1820         if 'format' not in info_dict:
1821             info_dict['format'] = info_dict['ext']
1822
1823         reason = self._match_entry(info_dict, incomplete=False)
1824         if reason is not None:
1825             self.to_screen('[download] ' + reason)
1826             return
1827
1828         self._num_downloads += 1
1829
1830         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1831
1832         # Forced printings
1833         self.__forced_printings(info_dict, filename, incomplete=False)
1834
1835         # Do nothing else if in simulate mode
1836         if self.params.get('simulate', False):
1837             return
1838
1839         if filename is None:
1840             return
1841
1842         def ensure_dir_exists(path):
1843             try:
1844                 dn = os.path.dirname(path)
1845                 if dn and not os.path.exists(dn):
1846                     os.makedirs(dn)
1847                 return True
1848             except (OSError, IOError) as err:
1849                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1850                 return False
1851
1852         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1853             return
1854
1855         if self.params.get('writedescription', False):
1856             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1857             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1858                 self.to_screen('[info] Video description is already present')
1859             elif info_dict.get('description') is None:
1860                 self.report_warning('There\'s no description to write.')
1861             else:
1862                 try:
1863                     self.to_screen('[info] Writing video description to: ' + descfn)
1864                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1865                         descfile.write(info_dict['description'])
1866                 except (OSError, IOError):
1867                     self.report_error('Cannot write description file ' + descfn)
1868                     return
1869
1870         if self.params.get('writeannotations', False):
1871             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1872             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1873                 self.to_screen('[info] Video annotations are already present')
1874             elif not info_dict.get('annotations'):
1875                 self.report_warning('There are no annotations to write.')
1876             else:
1877                 try:
1878                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1879                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1880                         annofile.write(info_dict['annotations'])
1881                 except (KeyError, TypeError):
1882                     self.report_warning('There are no annotations to write.')
1883                 except (OSError, IOError):
1884                     self.report_error('Cannot write annotations file: ' + annofn)
1885                     return
1886
1887         def dl(name, info, subtitle=False):
1888             fd = get_suitable_downloader(info, self.params)(self, self.params)
1889             for ph in self._progress_hooks:
1890                 fd.add_progress_hook(ph)
1891             if self.params.get('verbose'):
1892                 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1893             return fd.download(name, info, subtitle)
1894
1895         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1896                                        self.params.get('writeautomaticsub')])
1897
1898         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1899             # subtitles download errors are already managed as troubles in relevant IE
1900             # that way it will silently go on when used with unsupporting IE
1901             subtitles = info_dict['requested_subtitles']
1902             # ie = self.get_info_extractor(info_dict['extractor_key'])
1903             for sub_lang, sub_info in subtitles.items():
1904                 sub_format = sub_info['ext']
1905                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1906                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1907                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1908                 else:
1909                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1910                     if sub_info.get('data') is not None:
1911                         try:
1912                             # Use newline='' to prevent conversion of newline characters
1913                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
1914                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1915                                 subfile.write(sub_info['data'])
1916                         except (OSError, IOError):
1917                             self.report_error('Cannot write subtitles file ' + sub_filename)
1918                             return
1919                     else:
1920                         try:
1921                             dl(sub_filename, sub_info, subtitle=True)
1922                             '''
1923                             if self.params.get('sleep_interval_subtitles', False):
1924                                 dl(sub_filename, sub_info)
1925                             else:
1926                                 sub_data = ie._request_webpage(
1927                                     sub_info['url'], info_dict['id'], note=False).read()
1928                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1929                                     subfile.write(sub_data)
1930                             '''
1931                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1932                             self.report_warning('Unable to download subtitle for "%s": %s' %
1933                                                 (sub_lang, error_to_compat_str(err)))
1934                             continue
1935
1936         if self.params.get('skip_download', False):
1937             if self.params.get('convertsubtitles', False):
1938                 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1939                 filename_real_ext = os.path.splitext(filename)[1][1:]
1940                 filename_wo_ext = (
1941                     os.path.splitext(filename)[0]
1942                     if filename_real_ext == info_dict['ext']
1943                     else filename)
1944                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1945                 if subconv.available:
1946                     info_dict.setdefault('__postprocessors', [])
1947                     # info_dict['__postprocessors'].append(subconv)
1948                 if os.path.exists(encodeFilename(afilename)):
1949                     self.to_screen(
1950                         '[download] %s has already been downloaded and '
1951                         'converted' % afilename)
1952                 else:
1953                     try:
1954                         self.post_process(filename, info_dict)
1955                     except (PostProcessingError) as err:
1956                         self.report_error('postprocessing: %s' % str(err))
1957                         return
1958
1959         if self.params.get('writeinfojson', False):
1960             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1961             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1962                 self.to_screen('[info] Video description metadata is already present')
1963             else:
1964                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1965                 try:
1966                     write_json_file(self.filter_requested_info(info_dict), infofn)
1967                 except (OSError, IOError):
1968                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1969                     return
1970
1971         self._write_thumbnails(info_dict, filename)
1972
1973         if not self.params.get('skip_download', False):
1974             try:
1975                 if info_dict.get('requested_formats') is not None:
1976                     downloaded = []
1977                     success = True
1978                     merger = FFmpegMergerPP(self)
1979                     if not merger.available:
1980                         postprocessors = []
1981                         self.report_warning('You have requested multiple '
1982                                             'formats but ffmpeg or avconv are not installed.'
1983                                             ' The formats won\'t be merged.')
1984                     else:
1985                         postprocessors = [merger]
1986
1987                     def compatible_formats(formats):
1988                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
1989                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
1990                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
1991                         if len(video_formats) > 2 or len(audio_formats) > 2:
1992                             return False
1993
1994                         # Check extension
1995                         exts = set(format.get('ext') for format in formats)
1996                         COMPATIBLE_EXTS = (
1997                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
1998                             set(('webm',)),
1999                         )
2000                         for ext_sets in COMPATIBLE_EXTS:
2001                             if ext_sets.issuperset(exts):
2002                                 return True
2003                         # TODO: Check acodec/vcodec
2004                         return False
2005
2006                     filename_real_ext = os.path.splitext(filename)[1][1:]
2007                     filename_wo_ext = (
2008                         os.path.splitext(filename)[0]
2009                         if filename_real_ext == info_dict['ext']
2010                         else filename)
2011                     requested_formats = info_dict['requested_formats']
2012                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2013                         info_dict['ext'] = 'mkv'
2014                         self.report_warning(
2015                             'Requested formats are incompatible for merge and will be merged into mkv.')
2016                     # Ensure filename always has a correct extension for successful merge
2017                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
2018                     if os.path.exists(encodeFilename(filename)):
2019                         self.to_screen(
2020                             '[download] %s has already been downloaded and '
2021                             'merged' % filename)
2022                     else:
2023                         for f in requested_formats:
2024                             new_info = dict(info_dict)
2025                             new_info.update(f)
2026                             fname = prepend_extension(
2027                                 self.prepare_filename(new_info),
2028                                 'f%s' % f['format_id'], new_info['ext'])
2029                             if not ensure_dir_exists(fname):
2030                                 return
2031                             downloaded.append(fname)
2032                             partial_success = dl(fname, new_info)
2033                             success = success and partial_success
2034                         info_dict['__postprocessors'] = postprocessors
2035                         info_dict['__files_to_merge'] = downloaded
2036                 else:
2037                     # Just a single file
2038                     success = dl(filename, info_dict)
2039             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2040                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2041                 return
2042             except (OSError, IOError) as err:
2043                 raise UnavailableVideoError(err)
2044             except (ContentTooShortError, ) as err:
2045                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2046                 return
2047
2048             if success and filename != '-':
2049                 # Fixup content
2050                 fixup_policy = self.params.get('fixup')
2051                 if fixup_policy is None:
2052                     fixup_policy = 'detect_or_warn'
2053
2054                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2055
2056                 stretched_ratio = info_dict.get('stretched_ratio')
2057                 if stretched_ratio is not None and stretched_ratio != 1:
2058                     if fixup_policy == 'warn':
2059                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2060                             info_dict['id'], stretched_ratio))
2061                     elif fixup_policy == 'detect_or_warn':
2062                         stretched_pp = FFmpegFixupStretchedPP(self)
2063                         if stretched_pp.available:
2064                             info_dict.setdefault('__postprocessors', [])
2065                             info_dict['__postprocessors'].append(stretched_pp)
2066                         else:
2067                             self.report_warning(
2068                                 '%s: Non-uniform pixel ratio (%s). %s'
2069                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2070                     else:
2071                         assert fixup_policy in ('ignore', 'never')
2072
2073                 if (info_dict.get('requested_formats') is None
2074                         and info_dict.get('container') == 'm4a_dash'):
2075                     if fixup_policy == 'warn':
2076                         self.report_warning(
2077                             '%s: writing DASH m4a. '
2078                             'Only some players support this container.'
2079                             % info_dict['id'])
2080                     elif fixup_policy == 'detect_or_warn':
2081                         fixup_pp = FFmpegFixupM4aPP(self)
2082                         if fixup_pp.available:
2083                             info_dict.setdefault('__postprocessors', [])
2084                             info_dict['__postprocessors'].append(fixup_pp)
2085                         else:
2086                             self.report_warning(
2087                                 '%s: writing DASH m4a. '
2088                                 'Only some players support this container. %s'
2089                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2090                     else:
2091                         assert fixup_policy in ('ignore', 'never')
2092
2093                 if (info_dict.get('protocol') == 'm3u8_native'
2094                         or info_dict.get('protocol') == 'm3u8'
2095                         and self.params.get('hls_prefer_native')):
2096                     if fixup_policy == 'warn':
2097                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2098                             info_dict['id']))
2099                     elif fixup_policy == 'detect_or_warn':
2100                         fixup_pp = FFmpegFixupM3u8PP(self)
2101                         if fixup_pp.available:
2102                             info_dict.setdefault('__postprocessors', [])
2103                             info_dict['__postprocessors'].append(fixup_pp)
2104                         else:
2105                             self.report_warning(
2106                                 '%s: malformed AAC bitstream detected. %s'
2107                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2108                     else:
2109                         assert fixup_policy in ('ignore', 'never')
2110
2111                 try:
2112                     self.post_process(filename, info_dict)
2113                 except (PostProcessingError) as err:
2114                     self.report_error('postprocessing: %s' % str(err))
2115                     return
2116                 self.record_download_archive(info_dict)
2117
2118     def download(self, url_list):
2119         """Download a given list of URLs."""
2120         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2121         if (len(url_list) > 1
2122                 and outtmpl != '-'
2123                 and '%' not in outtmpl
2124                 and self.params.get('max_downloads') != 1):
2125             raise SameFileError(outtmpl)
2126
2127         for url in url_list:
2128             try:
2129                 # It also downloads the videos
2130                 res = self.extract_info(
2131                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2132             except UnavailableVideoError:
2133                 self.report_error('unable to download video')
2134             except MaxDownloadsReached:
2135                 self.to_screen('[info] Maximum number of downloaded files reached.')
2136                 raise
2137             else:
2138                 if self.params.get('dump_single_json', False):
2139                     self.to_stdout(json.dumps(res))
2140
2141         return self._download_retcode
2142
2143     def download_with_info_file(self, info_filename):
2144         with contextlib.closing(fileinput.FileInput(
2145                 [info_filename], mode='r',
2146                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2147             # FileInput doesn't have a read method, we can't call json.load
2148             info = self.filter_requested_info(json.loads('\n'.join(f)))
2149         try:
2150             self.process_ie_result(info, download=True)
2151         except DownloadError:
2152             webpage_url = info.get('webpage_url')
2153             if webpage_url is not None:
2154                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2155                 return self.download([webpage_url])
2156             else:
2157                 raise
2158         return self._download_retcode
2159
2160     @staticmethod
2161     def filter_requested_info(info_dict):
2162         return dict(
2163             (k, v) for k, v in info_dict.items()
2164             if k not in ['requested_formats', 'requested_subtitles'])
2165
2166     def post_process(self, filename, ie_info):
2167         """Run all the postprocessors on the given file."""
2168         info = dict(ie_info)
2169         info['filepath'] = filename
2170         pps_chain = []
2171         if ie_info.get('__postprocessors') is not None:
2172             pps_chain.extend(ie_info['__postprocessors'])
2173         pps_chain.extend(self._pps)
2174         for pp in pps_chain:
2175             files_to_delete = []
2176             try:
2177                 files_to_delete, info = pp.run(info)
2178             except PostProcessingError as e:
2179                 self.report_error(e.msg)
2180             if files_to_delete and not self.params.get('keepvideo', False):
2181                 for old_filename in set(files_to_delete):
2182                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2183                     try:
2184                         os.remove(encodeFilename(old_filename))
2185                     except (IOError, OSError):
2186                         self.report_warning('Unable to remove downloaded original file')
2187
2188     def _make_archive_id(self, info_dict):
2189         video_id = info_dict.get('id')
2190         if not video_id:
2191             return
2192         # Future-proof against any change in case
2193         # and backwards compatibility with prior versions
2194         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2195         if extractor is None:
2196             url = str_or_none(info_dict.get('url'))
2197             if not url:
2198                 return
2199             # Try to find matching extractor for the URL and take its ie_key
2200             for ie in self._ies:
2201                 if ie.suitable(url):
2202                     extractor = ie.ie_key()
2203                     break
2204             else:
2205                 return
2206         return extractor.lower() + ' ' + video_id
2207
2208     def in_download_archive(self, info_dict):
2209         fn = self.params.get('download_archive')
2210         if fn is None:
2211             return False
2212
2213         vid_id = self._make_archive_id(info_dict)
2214         if not vid_id:
2215             return False  # Incomplete video information
2216
2217         return vid_id in self.archive
2218
2219     def record_download_archive(self, info_dict):
2220         fn = self.params.get('download_archive')
2221         if fn is None:
2222             return
2223         vid_id = self._make_archive_id(info_dict)
2224         assert vid_id
2225         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2226             archive_file.write(vid_id + '\n')
2227         self.archive.add(vid_id)
2228
2229     @staticmethod
2230     def format_resolution(format, default='unknown'):
2231         if format.get('vcodec') == 'none':
2232             return 'audio only'
2233         if format.get('resolution') is not None:
2234             return format['resolution']
2235         if format.get('height') is not None:
2236             if format.get('width') is not None:
2237                 res = '%sx%s' % (format['width'], format['height'])
2238             else:
2239                 res = '%sp' % format['height']
2240         elif format.get('width') is not None:
2241             res = '%dx?' % format['width']
2242         else:
2243             res = default
2244         return res
2245
2246     def _format_note(self, fdict):
2247         res = ''
2248         if fdict.get('ext') in ['f4f', 'f4m']:
2249             res += '(unsupported) '
2250         if fdict.get('language'):
2251             if res:
2252                 res += ' '
2253             res += '[%s] ' % fdict['language']
2254         if fdict.get('format_note') is not None:
2255             res += fdict['format_note'] + ' '
2256         if fdict.get('tbr') is not None:
2257             res += '%4dk ' % fdict['tbr']
2258         if fdict.get('container') is not None:
2259             if res:
2260                 res += ', '
2261             res += '%s container' % fdict['container']
2262         if (fdict.get('vcodec') is not None
2263                 and fdict.get('vcodec') != 'none'):
2264             if res:
2265                 res += ', '
2266             res += fdict['vcodec']
2267             if fdict.get('vbr') is not None:
2268                 res += '@'
2269         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2270             res += 'video@'
2271         if fdict.get('vbr') is not None:
2272             res += '%4dk' % fdict['vbr']
2273         if fdict.get('fps') is not None:
2274             if res:
2275                 res += ', '
2276             res += '%sfps' % fdict['fps']
2277         if fdict.get('acodec') is not None:
2278             if res:
2279                 res += ', '
2280             if fdict['acodec'] == 'none':
2281                 res += 'video only'
2282             else:
2283                 res += '%-5s' % fdict['acodec']
2284         elif fdict.get('abr') is not None:
2285             if res:
2286                 res += ', '
2287             res += 'audio'
2288         if fdict.get('abr') is not None:
2289             res += '@%3dk' % fdict['abr']
2290         if fdict.get('asr') is not None:
2291             res += ' (%5dHz)' % fdict['asr']
2292         if fdict.get('filesize') is not None:
2293             if res:
2294                 res += ', '
2295             res += format_bytes(fdict['filesize'])
2296         elif fdict.get('filesize_approx') is not None:
2297             if res:
2298                 res += ', '
2299             res += '~' + format_bytes(fdict['filesize_approx'])
2300         return res
2301
2302     def list_formats(self, info_dict):
2303         formats = info_dict.get('formats', [info_dict])
2304         table = [
2305             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2306             for f in formats
2307             if f.get('preference') is None or f['preference'] >= -1000]
2308         if len(formats) > 1:
2309             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2310
2311         header_line = ['format code', 'extension', 'resolution', 'note']
2312         self.to_screen(
2313             '[info] Available formats for %s:\n%s' %
2314             (info_dict['id'], render_table(header_line, table)))
2315
2316     def list_thumbnails(self, info_dict):
2317         thumbnails = info_dict.get('thumbnails')
2318         if not thumbnails:
2319             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2320             return
2321
2322         self.to_screen(
2323             '[info] Thumbnails for %s:' % info_dict['id'])
2324         self.to_screen(render_table(
2325             ['ID', 'width', 'height', 'URL'],
2326             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2327
2328     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2329         if not subtitles:
2330             self.to_screen('%s has no %s' % (video_id, name))
2331             return
2332         self.to_screen(
2333             'Available %s for %s:' % (name, video_id))
2334         self.to_screen(render_table(
2335             ['Language', 'formats'],
2336             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2337                 for lang, formats in subtitles.items()]))
2338
2339     def urlopen(self, req):
2340         """ Start an HTTP download """
2341         if isinstance(req, compat_basestring):
2342             req = sanitized_Request(req)
2343         return self._opener.open(req, timeout=self._socket_timeout)
2344
2345     def print_debug_header(self):
2346         if not self.params.get('verbose'):
2347             return
2348
2349         if type('') is not compat_str:
2350             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2351             self.report_warning(
2352                 'Your Python is broken! Update to a newer and supported version')
2353
2354         stdout_encoding = getattr(
2355             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2356         encoding_str = (
2357             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2358                 locale.getpreferredencoding(),
2359                 sys.getfilesystemencoding(),
2360                 stdout_encoding,
2361                 self.get_encoding()))
2362         write_string(encoding_str, encoding=None)
2363
2364         self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
2365         if _LAZY_LOADER:
2366             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2367         try:
2368             sp = subprocess.Popen(
2369                 ['git', 'rev-parse', '--short', 'HEAD'],
2370                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2371                 cwd=os.path.dirname(os.path.abspath(__file__)))
2372             out, err = sp.communicate()
2373             out = out.decode().strip()
2374             if re.match('[0-9a-f]+', out):
2375                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2376         except Exception:
2377             try:
2378                 sys.exc_clear()
2379             except Exception:
2380                 pass
2381
2382         def python_implementation():
2383             impl_name = platform.python_implementation()
2384             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2385                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2386             return impl_name
2387
2388         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2389             platform.python_version(), python_implementation(),
2390             platform_name()))
2391
2392         exe_versions = FFmpegPostProcessor.get_versions(self)
2393         exe_versions['rtmpdump'] = rtmpdump_version()
2394         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2395         exe_str = ', '.join(
2396             '%s %s' % (exe, v)
2397             for exe, v in sorted(exe_versions.items())
2398             if v
2399         )
2400         if not exe_str:
2401             exe_str = 'none'
2402         self._write_string('[debug] exe versions: %s\n' % exe_str)
2403
2404         proxy_map = {}
2405         for handler in self._opener.handlers:
2406             if hasattr(handler, 'proxies'):
2407                 proxy_map.update(handler.proxies)
2408         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2409
2410         if self.params.get('call_home', False):
2411             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2412             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2413             latest_version = self.urlopen(
2414                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2415             if version_tuple(latest_version) > version_tuple(__version__):
2416                 self.report_warning(
2417                     'You are using an outdated version (newest version: %s)! '
2418                     'See https://yt-dl.org/update if you need help updating.' %
2419                     latest_version)
2420
2421     def _setup_opener(self):
2422         timeout_val = self.params.get('socket_timeout')
2423         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2424
2425         opts_cookiefile = self.params.get('cookiefile')
2426         opts_proxy = self.params.get('proxy')
2427
2428         if opts_cookiefile is None:
2429             self.cookiejar = compat_cookiejar.CookieJar()
2430         else:
2431             opts_cookiefile = expand_path(opts_cookiefile)
2432             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2433             if os.access(opts_cookiefile, os.R_OK):
2434                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2435
2436         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2437         if opts_proxy is not None:
2438             if opts_proxy == '':
2439                 proxies = {}
2440             else:
2441                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2442         else:
2443             proxies = compat_urllib_request.getproxies()
2444             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2445             if 'http' in proxies and 'https' not in proxies:
2446                 proxies['https'] = proxies['http']
2447         proxy_handler = PerRequestProxyHandler(proxies)
2448
2449         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2450         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2451         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2452         redirect_handler = YoutubeDLRedirectHandler()
2453         data_handler = compat_urllib_request_DataHandler()
2454
2455         # When passing our own FileHandler instance, build_opener won't add the
2456         # default FileHandler and allows us to disable the file protocol, which
2457         # can be used for malicious purposes (see
2458         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2459         file_handler = compat_urllib_request.FileHandler()
2460
2461         def file_open(*args, **kwargs):
2462             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2463         file_handler.file_open = file_open
2464
2465         opener = compat_urllib_request.build_opener(
2466             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2467
2468         # Delete the default user-agent header, which would otherwise apply in
2469         # cases where our custom HTTP handler doesn't come into play
2470         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2471         opener.addheaders = []
2472         self._opener = opener
2473
2474     def encode(self, s):
2475         if isinstance(s, bytes):
2476             return s  # Already encoded
2477
2478         try:
2479             return s.encode(self.get_encoding())
2480         except UnicodeEncodeError as err:
2481             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2482             raise
2483
2484     def get_encoding(self):
2485         encoding = self.params.get('encoding')
2486         if encoding is None:
2487             encoding = preferredencoding()
2488         return encoding
2489
2490     def _write_thumbnails(self, info_dict, filename):
2491         if self.params.get('writethumbnail', False):
2492             thumbnails = info_dict.get('thumbnails')
2493             if thumbnails:
2494                 thumbnails = [thumbnails[-1]]
2495         elif self.params.get('write_all_thumbnails', False):
2496             thumbnails = info_dict.get('thumbnails')
2497         else:
2498             return
2499
2500         if not thumbnails:
2501             # No thumbnails present, so return immediately
2502             return
2503
2504         for t in thumbnails:
2505             thumb_ext = determine_ext(t['url'], 'jpg')
2506             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2507             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2508             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2509
2510             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2511                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2512                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2513             else:
2514                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2515                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2516                 try:
2517                     uf = self.urlopen(t['url'])
2518                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2519                         shutil.copyfileobj(uf, thumbf)
2520                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2521                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2522                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2523                     self.report_warning('Unable to download thumbnail "%s": %s' %
2524                                         (t['url'], error_to_compat_str(err)))