youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DOT_DESKTOP_LINK_TEMPLATE,
  55     DOT_URL_LINK_TEMPLATE,
  56     DOT_WEBLOC_LINK_TEMPLATE,
  57     DownloadError,
  58     encode_compat_str,
  59     encodeFilename,
  60     error_to_compat_str,
  61     expand_path,
  62     ExtractorError,
  63     format_bytes,
  64     format_field,
  65     formatSeconds,
  66     GeoRestrictedError,
  67     int_or_none,
  68     iri_to_uri,
  69     ISO3166Utils,
  70     locked_file,
  71     make_HTTPS_handler,
  72     MaxDownloadsReached,
  73     orderedSet,
  74     PagedList,
  75     parse_filesize,
  76     PerRequestProxyHandler,
  77     platform_name,
  78     PostProcessingError,
  79     preferredencoding,
  80     prepend_extension,
  81     register_socks_protocols,
  82     render_table,
  83     replace_extension,
  84     SameFileError,
  85     sanitize_filename,
  86     sanitize_path,
  87     sanitize_url,
  88     sanitized_Request,
  89     std_headers,
  90     str_or_none,
  91     subtitles_filename,
  92     to_high_limit_path,
  93     UnavailableVideoError,
  94     url_basename,
  95     version_tuple,
  96     write_json_file,
  97     write_string,
  98     YoutubeDLCookieJar,
  99     YoutubeDLCookieProcessor,
 100     YoutubeDLHandler,
 101     YoutubeDLRedirectHandler,
 102     process_communicate_or_kill,
 103 )
 104 from .cache import Cache
 105 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
 106 from .extractor.openload import PhantomJSwrapper
 107 from .downloader import get_suitable_downloader
 108 from .downloader.rtmp import rtmpdump_version
 109 from .postprocessor import (
 110     FFmpegFixupM3u8PP,
 111     FFmpegFixupM4aPP,
 112     FFmpegFixupStretchedPP,
 113     FFmpegMergerPP,
 114     FFmpegPostProcessor,
 115     FFmpegSubtitlesConvertorPP,
 116     get_postprocessor,
 117 )
 118 from .version import __version__
 119
 120 if compat_os_name == 'nt':
 121     import ctypes
 122
 123
 124 class YoutubeDL(object):
 125     """YoutubeDL class.
 126
 127     YoutubeDL objects are the ones responsible of downloading the
 128     actual video file and writing it to disk if the user has requested
 129     it, among some other tasks. In most cases there should be one per
 130     program. As, given a video URL, the downloader doesn't know how to
 131     extract all the needed information, task that InfoExtractors do, it
 132     has to pass the URL to one of them.
 133
 134     For this, YoutubeDL objects have a method that allows
 135     InfoExtractors to be registered in a given order. When it is passed
 136     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 137     finds that reports being able to handle it. The InfoExtractor extracts
 138     all the information about the video or videos the URL refers to, and
 139     YoutubeDL process the extracted information, possibly using a File
 140     Downloader to download the video.
 141
 142     YoutubeDL objects accept a lot of parameters. In order not to saturate
 143     the object constructor with arguments, it receives a dictionary of
 144     options instead. These options are available through the params
 145     attribute for the InfoExtractors to use. The YoutubeDL also
 146     registers itself as the downloader in charge for the InfoExtractors
 147     that are added to it, so this is a "mutual registration".
 148
 149     Available options:
 150
 151     username:          Username for authentication purposes.
 152     password:          Password for authentication purposes.
 153     videopassword:     Password for accessing a video.
 154     ap_mso:            Adobe Pass multiple-system operator identifier.
 155     ap_username:       Multiple-system operator account username.
 156     ap_password:       Multiple-system operator account password.
 157     usenetrc:          Use netrc for authentication instead.
 158     verbose:           Print additional info to stdout.
 159     quiet:             Do not print messages to stdout.
 160     no_warnings:       Do not print out anything for warnings.
 161     forceurl:          Force printing final URL.
 162     forcetitle:        Force printing title.
 163     forceid:           Force printing ID.
 164     forcethumbnail:    Force printing thumbnail URL.
 165     forcedescription:  Force printing description.
 166     forcefilename:     Force printing final filename.
 167     forceduration:     Force printing duration.
 168     forcejson:         Force printing info_dict as JSON.
 169     dump_single_json:  Force printing the info_dict of the whole playlist
 170                        (or video) as a single JSON line.
 171     force_write_download_archive: Force writing download archive regardless of
 172                        'skip_download' or 'simulate'.
 173     simulate:          Do not download the video files.
 174     format:            Video format code. see "FORMAT SELECTION" for more details.
 175     format_sort:       How to sort the video formats. see "Sorting Formats" for more details.
 176     format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
 177     allow_multiple_video_streams:   Allow multiple video streams to be merged into a single file
 178     allow_multiple_audio_streams:   Allow multiple audio streams to be merged into a single file
 179     outtmpl:           Template for output names.
 180     restrictfilenames: Do not allow "&" and spaces in file names.
 181     trim_file_name:    Limit length of filename (extension excluded).
 182     ignoreerrors:      Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
 183     force_generic_extractor: Force downloader to use the generic extractor
 184     nooverwrites:      Prevent overwriting files.
 185     playliststart:     Playlist item to start at.
 186     playlistend:       Playlist item to end at.
 187     playlist_items:    Specific indices of playlist to download.
 188     playlistreverse:   Download playlist items in reverse order.
 189     playlistrandom:    Download playlist items in random order.
 190     matchtitle:        Download only matching titles.
 191     rejecttitle:       Reject downloads for matching titles.
 192     logger:            Log messages to a logging.Logger instance.
 193     logtostderr:       Log messages to stderr instead of stdout.
 194     writedescription:  Write the video description to a .description file
 195     writeinfojson:     Write the video description to a .info.json file
 196     writeannotations:  Write the video annotations to a .annotations.xml file
 197     writethumbnail:    Write the thumbnail image to a file
 198     write_all_thumbnails:  Write all thumbnail formats to files
 199     writelink:         Write an internet shortcut file, depending on the
 200                        current platform (.url/.webloc/.desktop)
 201     writeurllink:      Write a Windows internet shortcut file (.url)
 202     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 203     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 204     writesubtitles:    Write the video subtitles to a file
 205     writeautomaticsub: Write the automatically generated subtitles to a file
 206     allsubtitles:      Downloads all the subtitles of the video
 207                        (requires writesubtitles or writeautomaticsub)
 208     listsubtitles:     Lists all available subtitles for the video
 209     subtitlesformat:   The format code for subtitles
 210     subtitleslangs:    List of languages of the subtitles to download
 211     keepvideo:         Keep the video file after post-processing
 212     daterange:         A DateRange object, download only if the upload_date is in the range.
 213     skip_download:     Skip the actual download of the video file
 214     cachedir:          Location of the cache files in the filesystem.
 215                        False to disable filesystem cache.
 216     noplaylist:        Download single video instead of a playlist if in doubt.
 217     age_limit:         An integer representing the user's age in years.
 218                        Unsuitable videos for the given age are skipped.
 219     min_views:         An integer representing the minimum view count the video
 220                        must have in order to not be skipped.
 221                        Videos without view count information are always
 222                        downloaded. None for no limit.
 223     max_views:         An integer representing the maximum view count.
 224                        Videos that are more popular than that are not
 225                        downloaded.
 226                        Videos without view count information are always
 227                        downloaded. None for no limit.
 228     download_archive:  File name of a file where all downloads are recorded.
 229                        Videos already present in the file are not downloaded
 230                        again.
 231     break_on_existing: Stop the download process after attempting to download a file that's
 232                        in the archive.
 233     cookiefile:        File name where cookies should be read from and dumped to.
 234     nocheckcertificate:Do not verify SSL certificates
 235     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 236                        At the moment, this is only supported by YouTube.
 237     proxy:             URL of the proxy server to use
 238     geo_verification_proxy:  URL of the proxy to use for IP address verification
 239                        on geo-restricted sites.
 240     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 241     bidi_workaround:   Work around buggy terminals without bidirectional text
 242                        support, using fridibi
 243     debug_printtraffic:Print out sent and received HTTP traffic
 244     include_ads:       Download ads as well
 245     default_search:    Prepend this string if an input url is not valid.
 246                        'auto' for elaborate guessing
 247     encoding:          Use this encoding instead of the system-specified.
 248     extract_flat:      Do not resolve URLs, return the immediate result.
 249                        Pass in 'in_playlist' to only show this behavior for
 250                        playlist items.
 251     postprocessors:    A list of dictionaries, each with an entry
 252                        * key:  The name of the postprocessor. See
 253                                youtube_dlc/postprocessor/__init__.py for a list.
 254                        as well as any further keyword arguments for the
 255                        postprocessor.
 256     post_hooks:        A list of functions that get called as the final step
 257                        for each video file, after all postprocessors have been
 258                        called. The filename will be passed as the only argument.
 259     progress_hooks:    A list of functions that get called on download
 260                        progress, with a dictionary with the entries
 261                        * status: One of "downloading", "error", or "finished".
 262                                  Check this first and ignore unknown values.
 263
 264                        If status is one of "downloading", or "finished", the
 265                        following properties may also be present:
 266                        * filename: The final filename (always present)
 267                        * tmpfilename: The filename we're currently writing to
 268                        * downloaded_bytes: Bytes on disk
 269                        * total_bytes: Size of the whole file, None if unknown
 270                        * total_bytes_estimate: Guess of the eventual file size,
 271                                                None if unavailable.
 272                        * elapsed: The number of seconds since download started.
 273                        * eta: The estimated time in seconds, None if unknown
 274                        * speed: The download speed in bytes/second, None if
 275                                 unknown
 276                        * fragment_index: The counter of the currently
 277                                          downloaded video fragment.
 278                        * fragment_count: The number of fragments (= individual
 279                                          files that will be merged)
 280
 281                        Progress hooks are guaranteed to be called at least once
 282                        (with status "finished") if the download is successful.
 283     merge_output_format: Extension to use when merging formats.
 284     fixup:             Automatically correct known faults of the file.
 285                        One of:
 286                        - "never": do nothing
 287                        - "warn": only emit a warning
 288                        - "detect_or_warn": check whether we can do anything
 289                                            about it, warn otherwise (default)
 290     source_address:    Client-side IP address to bind to.
 291     call_home:         Boolean, true iff we are allowed to contact the
 292                        youtube-dlc servers for debugging.
 293     sleep_interval:    Number of seconds to sleep before each download when
 294                        used alone or a lower bound of a range for randomized
 295                        sleep before each download (minimum possible number
 296                        of seconds to sleep) when used along with
 297                        max_sleep_interval.
 298     max_sleep_interval:Upper bound of a range for randomized sleep before each
 299                        download (maximum possible number of seconds to sleep).
 300                        Must only be used along with sleep_interval.
 301                        Actual sleep time will be a random float from range
 302                        [sleep_interval; max_sleep_interval].
 303     listformats:       Print an overview of available video formats and exit.
 304     list_thumbnails:   Print a table of all thumbnails and exit.
 305     match_filter:      A function that gets called with the info_dict of
 306                        every video.
 307                        If it returns a message, the video is ignored.
 308                        If it returns None, the video is downloaded.
 309                        match_filter_func in utils.py is one example for this.
 310     no_color:          Do not emit color codes in output.
 311     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 312                        HTTP header
 313     geo_bypass_country:
 314                        Two-letter ISO 3166-2 country code that will be used for
 315                        explicit geographic restriction bypassing via faking
 316                        X-Forwarded-For HTTP header
 317     geo_bypass_ip_block:
 318                        IP range in CIDR notation that will be used similarly to
 319                        geo_bypass_country
 320
 321     The following options determine which downloader is picked:
 322     external_downloader: Executable of the external downloader to call.
 323                        None or unset for standard (built-in) downloader.
 324     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 325                        if True, otherwise use ffmpeg/avconv if False, otherwise
 326                        use downloader suggested by extractor if None.
 327
 328     The following parameters are not used by YoutubeDL itself, they are used by
 329     the downloader (see youtube_dlc/downloader/common.py):
 330     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 331     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 332     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 333     http_chunk_size.
 334
 335     The following options are used by the post processors:
 336     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 337                        otherwise prefer ffmpeg.
 338     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 339                        to the binary or its containing directory.
 340     postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
 341                         of additional command-line arguments for the postprocessor.
 342                         Use 'default' as the name for arguments to passed to all PP.
 343
 344     The following options are used by the Youtube extractor:
 345     youtube_include_dash_manifest: If True (default), DASH manifests and related
 346                         data will be downloaded and processed by extractor.
 347                         You can reduce network I/O by disabling it if you don't
 348                         care about DASH.
 349     """
 350
 351     _NUMERIC_FIELDS = set((
 352         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 353         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 354         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 355         'average_rating', 'comment_count', 'age_limit',
 356         'start_time', 'end_time',
 357         'chapter_number', 'season_number', 'episode_number',
 358         'track_number', 'disc_number', 'release_year',
 359         'playlist_index',
 360     ))
 361
 362     params = None
 363     _ies = []
 364     _pps = []
 365     _download_retcode = None
 366     _num_downloads = None
 367     _screen_file = None
 368
 369     def __init__(self, params=None, auto_init=True):
 370         """Create a FileDownloader object with the given options."""
 371         if params is None:
 372             params = {}
 373         self._ies = []
 374         self._ies_instances = {}
 375         self._pps = []
 376         self._post_hooks = []
 377         self._progress_hooks = []
 378         self._download_retcode = 0
 379         self._num_downloads = 0
 380         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 381         self._err_file = sys.stderr
 382         self.params = {
 383             # Default parameters
 384             'nocheckcertificate': False,
 385         }
 386         self.params.update(params)
 387         self.cache = Cache(self)
 388         self.archive = set()
 389
 390         """Preload the archive, if any is specified"""
 391         def preload_download_archive(self):
 392             fn = self.params.get('download_archive')
 393             if fn is None:
 394                 return False
 395             try:
 396                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 397                     for line in archive_file:
 398                         self.archive.add(line.strip())
 399             except IOError as ioe:
 400                 if ioe.errno != errno.ENOENT:
 401                     raise
 402                 return False
 403             return True
 404
 405         def check_deprecated(param, option, suggestion):
 406             if self.params.get(param) is not None:
 407                 self.report_warning(
 408                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 409                 return True
 410             return False
 411
 412         if self.params.get('verbose'):
 413             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 414
 415         preload_download_archive(self)
 416
 417         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 418             if self.params.get('geo_verification_proxy') is None:
 419                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 420
 421         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 422         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 423         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 424
 425         if params.get('bidi_workaround', False):
 426             try:
 427                 import pty
 428                 master, slave = pty.openpty()
 429                 width = compat_get_terminal_size().columns
 430                 if width is None:
 431                     width_args = []
 432                 else:
 433                     width_args = ['-w', str(width)]
 434                 sp_kwargs = dict(
 435                     stdin=subprocess.PIPE,
 436                     stdout=slave,
 437                     stderr=self._err_file)
 438                 try:
 439                     self._output_process = subprocess.Popen(
 440                         ['bidiv'] + width_args, **sp_kwargs
 441                     )
 442                 except OSError:
 443                     self._output_process = subprocess.Popen(
 444                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 445                 self._output_channel = os.fdopen(master, 'rb')
 446             except OSError as ose:
 447                 if ose.errno == errno.ENOENT:
 448                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 449                 else:
 450                     raise
 451
 452         if (sys.platform != 'win32'
 453                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 454                 and not params.get('restrictfilenames', False)):
 455             # Unicode filesystem API will throw errors (#1474, #13027)
 456             self.report_warning(
 457                 'Assuming --restrict-filenames since file system encoding '
 458                 'cannot encode all characters. '
 459                 'Set the LC_ALL environment variable to fix this.')
 460             self.params['restrictfilenames'] = True
 461
 462         if isinstance(params.get('outtmpl'), bytes):
 463             self.report_warning(
 464                 'Parameter outtmpl is bytes, but should be a unicode string. '
 465                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 466
 467         self._setup_opener()
 468
 469         if auto_init:
 470             self.print_debug_header()
 471             self.add_default_info_extractors()
 472
 473         for pp_def_raw in self.params.get('postprocessors', []):
 474             pp_class = get_postprocessor(pp_def_raw['key'])
 475             pp_def = dict(pp_def_raw)
 476             del pp_def['key']
 477             pp = pp_class(self, **compat_kwargs(pp_def))
 478             self.add_post_processor(pp)
 479
 480         for ph in self.params.get('post_hooks', []):
 481             self.add_post_hook(ph)
 482
 483         for ph in self.params.get('progress_hooks', []):
 484             self.add_progress_hook(ph)
 485
 486         register_socks_protocols()
 487
 488     def warn_if_short_id(self, argv):
 489         # short YouTube ID starting with dash?
 490         idxs = [
 491             i for i, a in enumerate(argv)
 492             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 493         if idxs:
 494             correct_argv = (
 495                 ['youtube-dlc']
 496                 + [a for i, a in enumerate(argv) if i not in idxs]
 497                 + ['--'] + [argv[i] for i in idxs]
 498             )
 499             self.report_warning(
 500                 'Long argument string detected. '
 501                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 502                 args_to_str(correct_argv))
 503
 504     def add_info_extractor(self, ie):
 505         """Add an InfoExtractor object to the end of the list."""
 506         self._ies.append(ie)
 507         if not isinstance(ie, type):
 508             self._ies_instances[ie.ie_key()] = ie
 509             ie.set_downloader(self)
 510
 511     def get_info_extractor(self, ie_key):
 512         """
 513         Get an instance of an IE with name ie_key, it will try to get one from
 514         the _ies list, if there's no instance it will create a new one and add
 515         it to the extractor list.
 516         """
 517         ie = self._ies_instances.get(ie_key)
 518         if ie is None:
 519             ie = get_info_extractor(ie_key)()
 520             self.add_info_extractor(ie)
 521         return ie
 522
 523     def add_default_info_extractors(self):
 524         """
 525         Add the InfoExtractors returned by gen_extractors to the end of the list
 526         """
 527         for ie in gen_extractor_classes():
 528             self.add_info_extractor(ie)
 529
 530     def add_post_processor(self, pp):
 531         """Add a PostProcessor object to the end of the chain."""
 532         self._pps.append(pp)
 533         pp.set_downloader(self)
 534
 535     def add_post_hook(self, ph):
 536         """Add the post hook"""
 537         self._post_hooks.append(ph)
 538
 539     def add_progress_hook(self, ph):
 540         """Add the progress hook (currently only for the file downloader)"""
 541         self._progress_hooks.append(ph)
 542
 543     def _bidi_workaround(self, message):
 544         if not hasattr(self, '_output_channel'):
 545             return message
 546
 547         assert hasattr(self, '_output_process')
 548         assert isinstance(message, compat_str)
 549         line_count = message.count('\n') + 1
 550         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 551         self._output_process.stdin.flush()
 552         res = ''.join(self._output_channel.readline().decode('utf-8')
 553                       for _ in range(line_count))
 554         return res[:-len('\n')]
 555
 556     def to_screen(self, message, skip_eol=False):
 557         """Print message to stdout if not in quiet mode."""
 558         return self.to_stdout(message, skip_eol, check_quiet=True)
 559
 560     def _write_string(self, s, out=None):
 561         write_string(s, out=out, encoding=self.params.get('encoding'))
 562
 563     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 564         """Print message to stdout if not in quiet mode."""
 565         if self.params.get('logger'):
 566             self.params['logger'].debug(message)
 567         elif not check_quiet or not self.params.get('quiet', False):
 568             message = self._bidi_workaround(message)
 569             terminator = ['\n', ''][skip_eol]
 570             output = message + terminator
 571
 572             self._write_string(output, self._screen_file)
 573
 574     def to_stderr(self, message):
 575         """Print message to stderr."""
 576         assert isinstance(message, compat_str)
 577         if self.params.get('logger'):
 578             self.params['logger'].error(message)
 579         else:
 580             message = self._bidi_workaround(message)
 581             output = message + '\n'
 582             self._write_string(output, self._err_file)
 583
 584     def to_console_title(self, message):
 585         if not self.params.get('consoletitle', False):
 586             return
 587         if compat_os_name == 'nt':
 588             if ctypes.windll.kernel32.GetConsoleWindow():
 589                 # c_wchar_p() might not be necessary if `message` is
 590                 # already of type unicode()
 591                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 592         elif 'TERM' in os.environ:
 593             self._write_string('\033[0;%s\007' % message, self._screen_file)
 594
 595     def save_console_title(self):
 596         if not self.params.get('consoletitle', False):
 597             return
 598         if self.params.get('simulate', False):
 599             return
 600         if compat_os_name != 'nt' and 'TERM' in os.environ:
 601             # Save the title on stack
 602             self._write_string('\033[22;0t', self._screen_file)
 603
 604     def restore_console_title(self):
 605         if not self.params.get('consoletitle', False):
 606             return
 607         if self.params.get('simulate', False):
 608             return
 609         if compat_os_name != 'nt' and 'TERM' in os.environ:
 610             # Restore the title from stack
 611             self._write_string('\033[23;0t', self._screen_file)
 612
 613     def __enter__(self):
 614         self.save_console_title()
 615         return self
 616
 617     def __exit__(self, *args):
 618         self.restore_console_title()
 619
 620         if self.params.get('cookiefile') is not None:
 621             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 622
 623     def trouble(self, message=None, tb=None):
 624         """Determine action to take when a download problem appears.
 625
 626         Depending on if the downloader has been configured to ignore
 627         download errors or not, this method may throw an exception or
 628         not when errors are found, after printing the message.
 629
 630         tb, if given, is additional traceback information.
 631         """
 632         if message is not None:
 633             self.to_stderr(message)
 634         if self.params.get('verbose'):
 635             if tb is None:
 636                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 637                     tb = ''
 638                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 639                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 640                     tb += encode_compat_str(traceback.format_exc())
 641                 else:
 642                     tb_data = traceback.format_list(traceback.extract_stack())
 643                     tb = ''.join(tb_data)
 644             self.to_stderr(tb)
 645         if not self.params.get('ignoreerrors', False):
 646             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 647                 exc_info = sys.exc_info()[1].exc_info
 648             else:
 649                 exc_info = sys.exc_info()
 650             raise DownloadError(message, exc_info)
 651         self._download_retcode = 1
 652
 653     def report_warning(self, message):
 654         '''
 655         Print the message to stderr, it will be prefixed with 'WARNING:'
 656         If stderr is a tty file the 'WARNING:' will be colored
 657         '''
 658         if self.params.get('logger') is not None:
 659             self.params['logger'].warning(message)
 660         else:
 661             if self.params.get('no_warnings'):
 662                 return
 663             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 664                 _msg_header = '\033[0;33mWARNING:\033[0m'
 665             else:
 666                 _msg_header = 'WARNING:'
 667             warning_message = '%s %s' % (_msg_header, message)
 668             self.to_stderr(warning_message)
 669
 670     def report_error(self, message, tb=None):
 671         '''
 672         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 673         in red if stderr is a tty file.
 674         '''
 675         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 676             _msg_header = '\033[0;31mERROR:\033[0m'
 677         else:
 678             _msg_header = 'ERROR:'
 679         error_message = '%s %s' % (_msg_header, message)
 680         self.trouble(error_message, tb)
 681
 682     def report_file_already_downloaded(self, file_name):
 683         """Report file has already been fully downloaded."""
 684         try:
 685             self.to_screen('[download] %s has already been downloaded' % file_name)
 686         except UnicodeEncodeError:
 687             self.to_screen('[download] The file has already been downloaded')
 688
 689     def prepare_filename(self, info_dict):
 690         """Generate the output filename."""
 691         try:
 692             template_dict = dict(info_dict)
 693
 694             template_dict['epoch'] = int(time.time())
 695             autonumber_size = self.params.get('autonumber_size')
 696             if autonumber_size is None:
 697                 autonumber_size = 5
 698             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 699             if template_dict.get('resolution') is None:
 700                 if template_dict.get('width') and template_dict.get('height'):
 701                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 702                 elif template_dict.get('height'):
 703                     template_dict['resolution'] = '%sp' % template_dict['height']
 704                 elif template_dict.get('width'):
 705                     template_dict['resolution'] = '%dx?' % template_dict['width']
 706
 707             sanitize = lambda k, v: sanitize_filename(
 708                 compat_str(v),
 709                 restricted=self.params.get('restrictfilenames'),
 710                 is_id=(k == 'id' or k.endswith('_id')))
 711             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 712                                  for k, v in template_dict.items()
 713                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 714             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 715
 716             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 717
 718             # For fields playlist_index and autonumber convert all occurrences
 719             # of %(field)s to %(field)0Nd for backward compatibility
 720             field_size_compat_map = {
 721                 'playlist_index': len(str(template_dict['n_entries'])),
 722                 'autonumber': autonumber_size,
 723             }
 724             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 725             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 726             if mobj:
 727                 outtmpl = re.sub(
 728                     FIELD_SIZE_COMPAT_RE,
 729                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 730                     outtmpl)
 731
 732             # Missing numeric fields used together with integer presentation types
 733             # in format specification will break the argument substitution since
 734             # string 'NA' is returned for missing fields. We will patch output
 735             # template for missing fields to meet string presentation type.
 736             for numeric_field in self._NUMERIC_FIELDS:
 737                 if numeric_field not in template_dict:
 738                     # As of [1] format syntax is:
 739                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 740                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 741                     FORMAT_RE = r'''(?x)
 742                         (?<!%)
 743                         %
 744                         \({0}\)  # mapping key
 745                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 746                         (?:\d+)?  # minimum field width (optional)
 747                         (?:\.\d+)?  # precision (optional)
 748                         [hlL]?  # length modifier (optional)
 749                         [diouxXeEfFgGcrs%]  # conversion type
 750                     '''
 751                     outtmpl = re.sub(
 752                         FORMAT_RE.format(numeric_field),
 753                         r'%({0})s'.format(numeric_field), outtmpl)
 754
 755             # expand_path translates '%%' into '%' and '$$' into '$'
 756             # correspondingly that is not what we want since we need to keep
 757             # '%%' intact for template dict substitution step. Working around
 758             # with boundary-alike separator hack.
 759             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 760             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 761
 762             # outtmpl should be expand_path'ed before template dict substitution
 763             # because meta fields may contain env variables we don't want to
 764             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 765             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 766             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 767
 768             # https://github.com/blackjack4494/youtube-dlc/issues/85
 769             trim_file_name = self.params.get('trim_file_name', False)
 770             if trim_file_name:
 771                 fn_groups = filename.rsplit('.')
 772                 ext = fn_groups[-1]
 773                 sub_ext = ''
 774                 if len(fn_groups) > 2:
 775                     sub_ext = fn_groups[-2]
 776                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 777
 778             # Temporary fix for #4787
 779             # 'Treat' all problem characters by passing filename through preferredencoding
 780             # to workaround encoding issues with subprocess on python2 @ Windows
 781             if sys.version_info < (3, 0) and sys.platform == 'win32':
 782                 filename = encodeFilename(filename, True).decode(preferredencoding())
 783             return sanitize_path(filename)
 784         except ValueError as err:
 785             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 786             return None
 787
 788     def _match_entry(self, info_dict, incomplete):
 789         """ Returns None if the file should be downloaded """
 790
 791         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 792         if 'title' in info_dict:
 793             # This can happen when we're just evaluating the playlist
 794             title = info_dict['title']
 795             matchtitle = self.params.get('matchtitle', False)
 796             if matchtitle:
 797                 if not re.search(matchtitle, title, re.IGNORECASE):
 798                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 799             rejecttitle = self.params.get('rejecttitle', False)
 800             if rejecttitle:
 801                 if re.search(rejecttitle, title, re.IGNORECASE):
 802                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 803         date = info_dict.get('upload_date')
 804         if date is not None:
 805             dateRange = self.params.get('daterange', DateRange())
 806             if date not in dateRange:
 807                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 808         view_count = info_dict.get('view_count')
 809         if view_count is not None:
 810             min_views = self.params.get('min_views')
 811             if min_views is not None and view_count < min_views:
 812                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 813             max_views = self.params.get('max_views')
 814             if max_views is not None and view_count > max_views:
 815                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 816         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 817             return 'Skipping "%s" because it is age restricted' % video_title
 818         if self.in_download_archive(info_dict):
 819             return '%s has already been recorded in archive' % video_title
 820
 821         if not incomplete:
 822             match_filter = self.params.get('match_filter')
 823             if match_filter is not None:
 824                 ret = match_filter(info_dict)
 825                 if ret is not None:
 826                     return ret
 827
 828         return None
 829
 830     @staticmethod
 831     def add_extra_info(info_dict, extra_info):
 832         '''Set the keys from extra_info in info dict if they are missing'''
 833         for key, value in extra_info.items():
 834             info_dict.setdefault(key, value)
 835
 836     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 837                      process=True, force_generic_extractor=False):
 838         '''
 839         Returns a list with a dictionary for each video we find.
 840         If 'download', also downloads the videos.
 841         extra_info is a dict containing the extra values to add to each result
 842         '''
 843
 844         if not ie_key and force_generic_extractor:
 845             ie_key = 'Generic'
 846
 847         if ie_key:
 848             ies = [self.get_info_extractor(ie_key)]
 849         else:
 850             ies = self._ies
 851
 852         for ie in ies:
 853             if not ie.suitable(url):
 854                 continue
 855
 856             ie_key = ie.ie_key()
 857             ie = self.get_info_extractor(ie_key)
 858             if not ie.working():
 859                 self.report_warning('The program functionality for this site has been marked as broken, '
 860                                     'and will probably not work.')
 861
 862             try:
 863                 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
 864             except (AssertionError, IndexError, AttributeError):
 865                 temp_id = None
 866             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
 867                 self.to_screen("[%s] %s: has already been recorded in archive" % (
 868                                ie_key, temp_id))
 869                 break
 870
 871             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
 872
 873         else:
 874             self.report_error('no suitable InfoExtractor for URL %s' % url)
 875
 876     def __handle_extraction_exceptions(func):
 877         def wrapper(self, *args, **kwargs):
 878             try:
 879                 return func(self, *args, **kwargs)
 880             except GeoRestrictedError as e:
 881                 msg = e.msg
 882                 if e.countries:
 883                     msg += '\nThis video is available in %s.' % ', '.join(
 884                         map(ISO3166Utils.short2full, e.countries))
 885                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 886                 self.report_error(msg)
 887             except ExtractorError as e:  # An error we somewhat expected
 888                 self.report_error(compat_str(e), e.format_traceback())
 889             except MaxDownloadsReached:
 890                 raise
 891             except Exception as e:
 892                 if self.params.get('ignoreerrors', False):
 893                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 894                 else:
 895                     raise
 896         return wrapper
 897
 898     @__handle_extraction_exceptions
 899     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
 900         ie_result = ie.extract(url)
 901         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 902             return
 903         if isinstance(ie_result, list):
 904             # Backwards compatibility: old IE result format
 905             ie_result = {
 906                 '_type': 'compat_list',
 907                 'entries': ie_result,
 908             }
 909         if info_dict:
 910             if info_dict.get('id'):
 911                 ie_result['id'] = info_dict['id']
 912             if info_dict.get('title'):
 913                 ie_result['title'] = info_dict['title']
 914         self.add_default_extra_info(ie_result, ie, url)
 915         if process:
 916             return self.process_ie_result(ie_result, download, extra_info)
 917         else:
 918             return ie_result
 919
 920     def add_default_extra_info(self, ie_result, ie, url):
 921         self.add_extra_info(ie_result, {
 922             'extractor': ie.IE_NAME,
 923             'webpage_url': url,
 924             'duration_string': (
 925                 formatSeconds(ie_result['duration'], '-')
 926                 if ie_result.get('duration', None) is not None
 927                 else None),
 928             'webpage_url_basename': url_basename(url),
 929             'extractor_key': ie.ie_key(),
 930         })
 931
 932     def process_ie_result(self, ie_result, download=True, extra_info={}):
 933         """
 934         Take the result of the ie(may be modified) and resolve all unresolved
 935         references (URLs, playlist items).
 936
 937         It will also download the videos if 'download'.
 938         Returns the resolved ie_result.
 939         """
 940         result_type = ie_result.get('_type', 'video')
 941
 942         if result_type in ('url', 'url_transparent'):
 943             ie_result['url'] = sanitize_url(ie_result['url'])
 944             extract_flat = self.params.get('extract_flat', False)
 945             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
 946                     or extract_flat is True):
 947                 self.__forced_printings(
 948                     ie_result, self.prepare_filename(ie_result),
 949                     incomplete=True)
 950                 return ie_result
 951
 952         if result_type == 'video':
 953             self.add_extra_info(ie_result, extra_info)
 954             return self.process_video_result(ie_result, download=download)
 955         elif result_type == 'url':
 956             # We have to add extra_info to the results because it may be
 957             # contained in a playlist
 958             return self.extract_info(ie_result['url'],
 959                                      download, info_dict=ie_result,
 960                                      ie_key=ie_result.get('ie_key'),
 961                                      extra_info=extra_info)
 962         elif result_type == 'url_transparent':
 963             # Use the information from the embedding page
 964             info = self.extract_info(
 965                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 966                 extra_info=extra_info, download=False, process=False)
 967
 968             # extract_info may return None when ignoreerrors is enabled and
 969             # extraction failed with an error, don't crash and return early
 970             # in this case
 971             if not info:
 972                 return info
 973
 974             force_properties = dict(
 975                 (k, v) for k, v in ie_result.items() if v is not None)
 976             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
 977                 if f in force_properties:
 978                     del force_properties[f]
 979             new_result = info.copy()
 980             new_result.update(force_properties)
 981
 982             # Extracted info may not be a video result (i.e.
 983             # info.get('_type', 'video') != video) but rather an url or
 984             # url_transparent. In such cases outer metadata (from ie_result)
 985             # should be propagated to inner one (info). For this to happen
 986             # _type of info should be overridden with url_transparent. This
 987             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
 988             if new_result.get('_type') == 'url':
 989                 new_result['_type'] = 'url_transparent'
 990
 991             return self.process_ie_result(
 992                 new_result, download=download, extra_info=extra_info)
 993         elif result_type in ('playlist', 'multi_video'):
 994             # We process each entry in the playlist
 995             playlist = ie_result.get('title') or ie_result.get('id')
 996             self.to_screen('[download] Downloading playlist: %s' % playlist)
 997
 998             playlist_results = []
 999
1000             playliststart = self.params.get('playliststart', 1) - 1
1001             playlistend = self.params.get('playlistend')
1002             # For backwards compatibility, interpret -1 as whole list
1003             if playlistend == -1:
1004                 playlistend = None
1005
1006             playlistitems_str = self.params.get('playlist_items')
1007             playlistitems = None
1008             if playlistitems_str is not None:
1009                 def iter_playlistitems(format):
1010                     for string_segment in format.split(','):
1011                         if '-' in string_segment:
1012                             start, end = string_segment.split('-')
1013                             for item in range(int(start), int(end) + 1):
1014                                 yield int(item)
1015                         else:
1016                             yield int(string_segment)
1017                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1018
1019             ie_entries = ie_result['entries']
1020
1021             def make_playlistitems_entries(list_ie_entries):
1022                 num_entries = len(list_ie_entries)
1023                 return [
1024                     list_ie_entries[i - 1] for i in playlistitems
1025                     if -num_entries <= i - 1 < num_entries]
1026
1027             def report_download(num_entries):
1028                 self.to_screen(
1029                     '[%s] playlist %s: Downloading %d videos' %
1030                     (ie_result['extractor'], playlist, num_entries))
1031
1032             if isinstance(ie_entries, list):
1033                 n_all_entries = len(ie_entries)
1034                 if playlistitems:
1035                     entries = make_playlistitems_entries(ie_entries)
1036                 else:
1037                     entries = ie_entries[playliststart:playlistend]
1038                 n_entries = len(entries)
1039                 self.to_screen(
1040                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1041                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
1042             elif isinstance(ie_entries, PagedList):
1043                 if playlistitems:
1044                     entries = []
1045                     for item in playlistitems:
1046                         entries.extend(ie_entries.getslice(
1047                             item - 1, item
1048                         ))
1049                 else:
1050                     entries = ie_entries.getslice(
1051                         playliststart, playlistend)
1052                 n_entries = len(entries)
1053                 report_download(n_entries)
1054             else:  # iterable
1055                 if playlistitems:
1056                     entries = make_playlistitems_entries(list(itertools.islice(
1057                         ie_entries, 0, max(playlistitems))))
1058                 else:
1059                     entries = list(itertools.islice(
1060                         ie_entries, playliststart, playlistend))
1061                 n_entries = len(entries)
1062                 report_download(n_entries)
1063
1064             if self.params.get('playlistreverse', False):
1065                 entries = entries[::-1]
1066
1067             if self.params.get('playlistrandom', False):
1068                 random.shuffle(entries)
1069
1070             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1071
1072             for i, entry in enumerate(entries, 1):
1073                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1074                 # This __x_forwarded_for_ip thing is a bit ugly but requires
1075                 # minimal changes
1076                 if x_forwarded_for:
1077                     entry['__x_forwarded_for_ip'] = x_forwarded_for
1078                 extra = {
1079                     'n_entries': n_entries,
1080                     'playlist': playlist,
1081                     'playlist_id': ie_result.get('id'),
1082                     'playlist_title': ie_result.get('title'),
1083                     'playlist_uploader': ie_result.get('uploader'),
1084                     'playlist_uploader_id': ie_result.get('uploader_id'),
1085                     'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1086                     'extractor': ie_result['extractor'],
1087                     'webpage_url': ie_result['webpage_url'],
1088                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1089                     'extractor_key': ie_result['extractor_key'],
1090                 }
1091
1092                 reason = self._match_entry(entry, incomplete=True)
1093                 if reason is not None:
1094                     if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
1095                         print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
1096                         break
1097                     else:
1098                         self.to_screen('[download] ' + reason)
1099                         continue
1100
1101                 entry_result = self.__process_iterable_entry(entry, download, extra)
1102                 # TODO: skip failed (empty) entries?
1103                 playlist_results.append(entry_result)
1104             ie_result['entries'] = playlist_results
1105             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1106             return ie_result
1107         elif result_type == 'compat_list':
1108             self.report_warning(
1109                 'Extractor %s returned a compat_list result. '
1110                 'It needs to be updated.' % ie_result.get('extractor'))
1111
1112             def _fixup(r):
1113                 self.add_extra_info(
1114                     r,
1115                     {
1116                         'extractor': ie_result['extractor'],
1117                         'webpage_url': ie_result['webpage_url'],
1118                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1119                         'extractor_key': ie_result['extractor_key'],
1120                     }
1121                 )
1122                 return r
1123             ie_result['entries'] = [
1124                 self.process_ie_result(_fixup(r), download, extra_info)
1125                 for r in ie_result['entries']
1126             ]
1127             return ie_result
1128         else:
1129             raise Exception('Invalid result type: %s' % result_type)
1130
1131     @__handle_extraction_exceptions
1132     def __process_iterable_entry(self, entry, download, extra_info):
1133         return self.process_ie_result(
1134             entry, download=download, extra_info=extra_info)
1135
1136     def _build_format_filter(self, filter_spec):
1137         " Returns a function to filter the formats according to the filter_spec "
1138
1139         OPERATORS = {
1140             '<': operator.lt,
1141             '<=': operator.le,
1142             '>': operator.gt,
1143             '>=': operator.ge,
1144             '=': operator.eq,
1145             '!=': operator.ne,
1146         }
1147         operator_rex = re.compile(r'''(?x)\s*
1148             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1149             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1150             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1151             $
1152             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1153         m = operator_rex.search(filter_spec)
1154         if m:
1155             try:
1156                 comparison_value = int(m.group('value'))
1157             except ValueError:
1158                 comparison_value = parse_filesize(m.group('value'))
1159                 if comparison_value is None:
1160                     comparison_value = parse_filesize(m.group('value') + 'B')
1161                 if comparison_value is None:
1162                     raise ValueError(
1163                         'Invalid value %r in format specification %r' % (
1164                             m.group('value'), filter_spec))
1165             op = OPERATORS[m.group('op')]
1166
1167         if not m:
1168             STR_OPERATORS = {
1169                 '=': operator.eq,
1170                 '^=': lambda attr, value: attr.startswith(value),
1171                 '$=': lambda attr, value: attr.endswith(value),
1172                 '*=': lambda attr, value: value in attr,
1173             }
1174             str_operator_rex = re.compile(r'''(?x)
1175                 \s*(?P<key>[a-zA-Z0-9._-]+)
1176                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1177                 \s*(?P<value>[a-zA-Z0-9._-]+)
1178                 \s*$
1179                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1180             m = str_operator_rex.search(filter_spec)
1181             if m:
1182                 comparison_value = m.group('value')
1183                 str_op = STR_OPERATORS[m.group('op')]
1184                 if m.group('negation'):
1185                     op = lambda attr, value: not str_op(attr, value)
1186                 else:
1187                     op = str_op
1188
1189         if not m:
1190             raise ValueError('Invalid filter specification %r' % filter_spec)
1191
1192         def _filter(f):
1193             actual_value = f.get(m.group('key'))
1194             if actual_value is None:
1195                 return m.group('none_inclusive')
1196             return op(actual_value, comparison_value)
1197         return _filter
1198
1199     def _default_format_spec(self, info_dict, download=True):
1200
1201         def can_merge():
1202             merger = FFmpegMergerPP(self)
1203             return merger.available and merger.can_merge()
1204
1205         prefer_best = (
1206             not self.params.get('simulate', False)
1207             and download
1208             and (
1209                 not can_merge()
1210                 or info_dict.get('is_live', False)
1211                 or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
1212
1213         return (
1214             'best/bestvideo+bestaudio'
1215             if prefer_best
1216             else 'bestvideo*+bestaudio/best'
1217             if not self.params.get('allow_multiple_audio_streams', False)
1218             else 'bestvideo+bestaudio/best')
1219
1220     def build_format_selector(self, format_spec):
1221         def syntax_error(note, start):
1222             message = (
1223                 'Invalid format specification: '
1224                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1225             return SyntaxError(message)
1226
1227         PICKFIRST = 'PICKFIRST'
1228         MERGE = 'MERGE'
1229         SINGLE = 'SINGLE'
1230         GROUP = 'GROUP'
1231         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1232
1233         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1234                                   'video': self.params.get('allow_multiple_video_streams', False)}
1235
1236         def _parse_filter(tokens):
1237             filter_parts = []
1238             for type, string, start, _, _ in tokens:
1239                 if type == tokenize.OP and string == ']':
1240                     return ''.join(filter_parts)
1241                 else:
1242                     filter_parts.append(string)
1243
1244         def _remove_unused_ops(tokens):
1245             # Remove operators that we don't use and join them with the surrounding strings
1246             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1247             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1248             last_string, last_start, last_end, last_line = None, None, None, None
1249             for type, string, start, end, line in tokens:
1250                 if type == tokenize.OP and string == '[':
1251                     if last_string:
1252                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1253                         last_string = None
1254                     yield type, string, start, end, line
1255                     # everything inside brackets will be handled by _parse_filter
1256                     for type, string, start, end, line in tokens:
1257                         yield type, string, start, end, line
1258                         if type == tokenize.OP and string == ']':
1259                             break
1260                 elif type == tokenize.OP and string in ALLOWED_OPS:
1261                     if last_string:
1262                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1263                         last_string = None
1264                     yield type, string, start, end, line
1265                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1266                     if not last_string:
1267                         last_string = string
1268                         last_start = start
1269                         last_end = end
1270                     else:
1271                         last_string += string
1272             if last_string:
1273                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1274
1275         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1276             selectors = []
1277             current_selector = None
1278             for type, string, start, _, _ in tokens:
1279                 # ENCODING is only defined in python 3.x
1280                 if type == getattr(tokenize, 'ENCODING', None):
1281                     continue
1282                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1283                     current_selector = FormatSelector(SINGLE, string, [])
1284                 elif type == tokenize.OP:
1285                     if string == ')':
1286                         if not inside_group:
1287                             # ')' will be handled by the parentheses group
1288                             tokens.restore_last_token()
1289                         break
1290                     elif inside_merge and string in ['/', ',']:
1291                         tokens.restore_last_token()
1292                         break
1293                     elif inside_choice and string == ',':
1294                         tokens.restore_last_token()
1295                         break
1296                     elif string == ',':
1297                         if not current_selector:
1298                             raise syntax_error('"," must follow a format selector', start)
1299                         selectors.append(current_selector)
1300                         current_selector = None
1301                     elif string == '/':
1302                         if not current_selector:
1303                             raise syntax_error('"/" must follow a format selector', start)
1304                         first_choice = current_selector
1305                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1306                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1307                     elif string == '[':
1308                         if not current_selector:
1309                             current_selector = FormatSelector(SINGLE, 'best', [])
1310                         format_filter = _parse_filter(tokens)
1311                         current_selector.filters.append(format_filter)
1312                     elif string == '(':
1313                         if current_selector:
1314                             raise syntax_error('Unexpected "("', start)
1315                         group = _parse_format_selection(tokens, inside_group=True)
1316                         current_selector = FormatSelector(GROUP, group, [])
1317                     elif string == '+':
1318                         if not current_selector:
1319                             raise syntax_error('Unexpected "+"', start)
1320                         selector_1 = current_selector
1321                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1322                         if not selector_2:
1323                             raise syntax_error('Expected a selector', start)
1324                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1325                     else:
1326                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1327                 elif type == tokenize.ENDMARKER:
1328                     break
1329             if current_selector:
1330                 selectors.append(current_selector)
1331             return selectors
1332
1333         def _build_selector_function(selector):
1334             if isinstance(selector, list):  # ,
1335                 fs = [_build_selector_function(s) for s in selector]
1336
1337                 def selector_function(ctx):
1338                     for f in fs:
1339                         for format in f(ctx):
1340                             yield format
1341                 return selector_function
1342
1343             elif selector.type == GROUP:  # ()
1344                 selector_function = _build_selector_function(selector.selector)
1345
1346             elif selector.type == PICKFIRST:  # /
1347                 fs = [_build_selector_function(s) for s in selector.selector]
1348
1349                 def selector_function(ctx):
1350                     for f in fs:
1351                         picked_formats = list(f(ctx))
1352                         if picked_formats:
1353                             return picked_formats
1354                     return []
1355
1356             elif selector.type == SINGLE:  # atom
1357                 format_spec = selector.selector if selector.selector is not None else 'best'
1358
1359                 if format_spec == 'all':
1360                     def selector_function(ctx):
1361                         formats = list(ctx['formats'])
1362                         if formats:
1363                             for f in formats:
1364                                 yield f
1365
1366                 else:
1367                     format_fallback = False
1368                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1369                     if format_spec_obj is not None:
1370                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1371                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1372                         not_format_type = 'v' if format_type == 'a' else 'a'
1373                         format_modified = format_spec_obj.group(3) is not None
1374
1375                         format_fallback = not format_type and not format_modified  # for b, w
1376                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1377                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1378                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1379                                     if format_type  # bv, ba, wv, wa
1380                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1381                                     if not format_modified  # b, w
1382                                     else None)  # b*, w*
1383                     else:
1384                         format_idx = -1
1385                         filter_f = ((lambda f: f.get('ext') == format_spec)
1386                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1387                                     else (lambda f: f.get('format_id') == format_spec))  # id
1388
1389                     def selector_function(ctx):
1390                         formats = list(ctx['formats'])
1391                         if not formats:
1392                             return
1393                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1394                         if matches:
1395                             yield matches[format_idx]
1396                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1397                             # for extractors with incomplete formats (audio only (soundcloud)
1398                             # or video only (imgur)) best/worst will fallback to
1399                             # best/worst {video,audio}-only format
1400                             yield formats[format_idx]
1401
1402             elif selector.type == MERGE:        # +
1403                 def _merge(formats_pair):
1404                     format_1, format_2 = formats_pair
1405
1406                     formats_info = []
1407                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1408                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1409
1410                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1411                         get_no_more = {"video": False, "audio": False}
1412                         for (i, fmt_info) in enumerate(formats_info):
1413                             for aud_vid in ["audio", "video"]:
1414                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1415                                     if get_no_more[aud_vid]:
1416                                         formats_info.pop(i)
1417                                     get_no_more[aud_vid] = True
1418
1419                     if len(formats_info) == 1:
1420                         return formats_info[0]
1421
1422                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1423                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1424
1425                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1426                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1427
1428                     output_ext = self.params.get('merge_output_format')
1429                     if not output_ext:
1430                         if the_only_video:
1431                             output_ext = the_only_video['ext']
1432                         elif the_only_audio and not video_fmts:
1433                             output_ext = the_only_audio['ext']
1434                         else:
1435                             output_ext = 'mkv'
1436
1437                     new_dict = {
1438                         'requested_formats': formats_info,
1439                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1440                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1441                         'ext': output_ext,
1442                     }
1443
1444                     if the_only_video:
1445                         new_dict.update({
1446                             'width': the_only_video.get('width'),
1447                             'height': the_only_video.get('height'),
1448                             'resolution': the_only_video.get('resolution'),
1449                             'fps': the_only_video.get('fps'),
1450                             'vcodec': the_only_video.get('vcodec'),
1451                             'vbr': the_only_video.get('vbr'),
1452                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1453                         })
1454
1455                     if the_only_audio:
1456                         new_dict.update({
1457                             'acodec': the_only_audio.get('acodec'),
1458                             'abr': the_only_audio.get('abr'),
1459                         })
1460
1461                     return new_dict
1462
1463                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1464
1465                 def selector_function(ctx):
1466                     for pair in itertools.product(
1467                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1468                         yield _merge(pair)
1469
1470             filters = [self._build_format_filter(f) for f in selector.filters]
1471
1472             def final_selector(ctx):
1473                 ctx_copy = copy.deepcopy(ctx)
1474                 for _filter in filters:
1475                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1476                 return selector_function(ctx_copy)
1477             return final_selector
1478
1479         stream = io.BytesIO(format_spec.encode('utf-8'))
1480         try:
1481             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1482         except tokenize.TokenError:
1483             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1484
1485         class TokenIterator(object):
1486             def __init__(self, tokens):
1487                 self.tokens = tokens
1488                 self.counter = 0
1489
1490             def __iter__(self):
1491                 return self
1492
1493             def __next__(self):
1494                 if self.counter >= len(self.tokens):
1495                     raise StopIteration()
1496                 value = self.tokens[self.counter]
1497                 self.counter += 1
1498                 return value
1499
1500             next = __next__
1501
1502             def restore_last_token(self):
1503                 self.counter -= 1
1504
1505         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1506         return _build_selector_function(parsed_selector)
1507
1508     def _calc_headers(self, info_dict):
1509         res = std_headers.copy()
1510
1511         add_headers = info_dict.get('http_headers')
1512         if add_headers:
1513             res.update(add_headers)
1514
1515         cookies = self._calc_cookies(info_dict)
1516         if cookies:
1517             res['Cookie'] = cookies
1518
1519         if 'X-Forwarded-For' not in res:
1520             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1521             if x_forwarded_for_ip:
1522                 res['X-Forwarded-For'] = x_forwarded_for_ip
1523
1524         return res
1525
1526     def _calc_cookies(self, info_dict):
1527         pr = sanitized_Request(info_dict['url'])
1528         self.cookiejar.add_cookie_header(pr)
1529         return pr.get_header('Cookie')
1530
1531     def process_video_result(self, info_dict, download=True):
1532         assert info_dict.get('_type', 'video') == 'video'
1533
1534         if 'id' not in info_dict:
1535             raise ExtractorError('Missing "id" field in extractor result')
1536         if 'title' not in info_dict:
1537             raise ExtractorError('Missing "title" field in extractor result')
1538
1539         def report_force_conversion(field, field_not, conversion):
1540             self.report_warning(
1541                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1542                 % (field, field_not, conversion))
1543
1544         def sanitize_string_field(info, string_field):
1545             field = info.get(string_field)
1546             if field is None or isinstance(field, compat_str):
1547                 return
1548             report_force_conversion(string_field, 'a string', 'string')
1549             info[string_field] = compat_str(field)
1550
1551         def sanitize_numeric_fields(info):
1552             for numeric_field in self._NUMERIC_FIELDS:
1553                 field = info.get(numeric_field)
1554                 if field is None or isinstance(field, compat_numeric_types):
1555                     continue
1556                 report_force_conversion(numeric_field, 'numeric', 'int')
1557                 info[numeric_field] = int_or_none(field)
1558
1559         sanitize_string_field(info_dict, 'id')
1560         sanitize_numeric_fields(info_dict)
1561
1562         if 'playlist' not in info_dict:
1563             # It isn't part of a playlist
1564             info_dict['playlist'] = None
1565             info_dict['playlist_index'] = None
1566
1567         thumbnails = info_dict.get('thumbnails')
1568         if thumbnails is None:
1569             thumbnail = info_dict.get('thumbnail')
1570             if thumbnail:
1571                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1572         if thumbnails:
1573             thumbnails.sort(key=lambda t: (
1574                 t.get('preference') if t.get('preference') is not None else -1,
1575                 t.get('width') if t.get('width') is not None else -1,
1576                 t.get('height') if t.get('height') is not None else -1,
1577                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1578             for i, t in enumerate(thumbnails):
1579                 t['url'] = sanitize_url(t['url'])
1580                 if t.get('width') and t.get('height'):
1581                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1582                 if t.get('id') is None:
1583                     t['id'] = '%d' % i
1584
1585         if self.params.get('list_thumbnails'):
1586             self.list_thumbnails(info_dict)
1587             return
1588
1589         thumbnail = info_dict.get('thumbnail')
1590         if thumbnail:
1591             info_dict['thumbnail'] = sanitize_url(thumbnail)
1592         elif thumbnails:
1593             info_dict['thumbnail'] = thumbnails[-1]['url']
1594
1595         if 'display_id' not in info_dict and 'id' in info_dict:
1596             info_dict['display_id'] = info_dict['id']
1597
1598         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1599             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1600             # see http://bugs.python.org/issue1646728)
1601             try:
1602                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1603                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1604             except (ValueError, OverflowError, OSError):
1605                 pass
1606
1607         # Auto generate title fields corresponding to the *_number fields when missing
1608         # in order to always have clean titles. This is very common for TV series.
1609         for field in ('chapter', 'season', 'episode'):
1610             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1611                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1612
1613         for cc_kind in ('subtitles', 'automatic_captions'):
1614             cc = info_dict.get(cc_kind)
1615             if cc:
1616                 for _, subtitle in cc.items():
1617                     for subtitle_format in subtitle:
1618                         if subtitle_format.get('url'):
1619                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1620                         if subtitle_format.get('ext') is None:
1621                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1622
1623         automatic_captions = info_dict.get('automatic_captions')
1624         subtitles = info_dict.get('subtitles')
1625
1626         if self.params.get('listsubtitles', False):
1627             if 'automatic_captions' in info_dict:
1628                 self.list_subtitles(
1629                     info_dict['id'], automatic_captions, 'automatic captions')
1630             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1631             return
1632
1633         info_dict['requested_subtitles'] = self.process_subtitles(
1634             info_dict['id'], subtitles, automatic_captions)
1635
1636         # We now pick which formats have to be downloaded
1637         if info_dict.get('formats') is None:
1638             # There's only one format available
1639             formats = [info_dict]
1640         else:
1641             formats = info_dict['formats']
1642
1643         if not formats:
1644             raise ExtractorError('No video formats found!')
1645
1646         def is_wellformed(f):
1647             url = f.get('url')
1648             if not url:
1649                 self.report_warning(
1650                     '"url" field is missing or empty - skipping format, '
1651                     'there is an error in extractor')
1652                 return False
1653             if isinstance(url, bytes):
1654                 sanitize_string_field(f, 'url')
1655             return True
1656
1657         # Filter out malformed formats for better extraction robustness
1658         formats = list(filter(is_wellformed, formats))
1659
1660         formats_dict = {}
1661
1662         # We check that all the formats have the format and format_id fields
1663         for i, format in enumerate(formats):
1664             sanitize_string_field(format, 'format_id')
1665             sanitize_numeric_fields(format)
1666             format['url'] = sanitize_url(format['url'])
1667             if not format.get('format_id'):
1668                 format['format_id'] = compat_str(i)
1669             else:
1670                 # Sanitize format_id from characters used in format selector expression
1671                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1672             format_id = format['format_id']
1673             if format_id not in formats_dict:
1674                 formats_dict[format_id] = []
1675             formats_dict[format_id].append(format)
1676
1677         # Make sure all formats have unique format_id
1678         for format_id, ambiguous_formats in formats_dict.items():
1679             if len(ambiguous_formats) > 1:
1680                 for i, format in enumerate(ambiguous_formats):
1681                     format['format_id'] = '%s-%d' % (format_id, i)
1682
1683         for i, format in enumerate(formats):
1684             if format.get('format') is None:
1685                 format['format'] = '{id} - {res}{note}'.format(
1686                     id=format['format_id'],
1687                     res=self.format_resolution(format),
1688                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1689                 )
1690             # Automatically determine file extension if missing
1691             if format.get('ext') is None:
1692                 format['ext'] = determine_ext(format['url']).lower()
1693             # Automatically determine protocol if missing (useful for format
1694             # selection purposes)
1695             if format.get('protocol') is None:
1696                 format['protocol'] = determine_protocol(format)
1697             # Add HTTP headers, so that external programs can use them from the
1698             # json output
1699             full_format_info = info_dict.copy()
1700             full_format_info.update(format)
1701             format['http_headers'] = self._calc_headers(full_format_info)
1702         # Remove private housekeeping stuff
1703         if '__x_forwarded_for_ip' in info_dict:
1704             del info_dict['__x_forwarded_for_ip']
1705
1706         # TODO Central sorting goes here
1707
1708         if formats[0] is not info_dict:
1709             # only set the 'formats' fields if the original info_dict list them
1710             # otherwise we end up with a circular reference, the first (and unique)
1711             # element in the 'formats' field in info_dict is info_dict itself,
1712             # which can't be exported to json
1713             info_dict['formats'] = formats
1714         if self.params.get('listformats'):
1715             self.list_formats(info_dict)
1716             return
1717
1718         req_format = self.params.get('format')
1719         if req_format is None:
1720             req_format = self._default_format_spec(info_dict, download=download)
1721             if self.params.get('verbose'):
1722                 self._write_string('[debug] Default format spec: %s\n' % req_format)
1723
1724         format_selector = self.build_format_selector(req_format)
1725
1726         # While in format selection we may need to have an access to the original
1727         # format set in order to calculate some metrics or do some processing.
1728         # For now we need to be able to guess whether original formats provided
1729         # by extractor are incomplete or not (i.e. whether extractor provides only
1730         # video-only or audio-only formats) for proper formats selection for
1731         # extractors with such incomplete formats (see
1732         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1733         # Since formats may be filtered during format selection and may not match
1734         # the original formats the results may be incorrect. Thus original formats
1735         # or pre-calculated metrics should be passed to format selection routines
1736         # as well.
1737         # We will pass a context object containing all necessary additional data
1738         # instead of just formats.
1739         # This fixes incorrect format selection issue (see
1740         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1741         incomplete_formats = (
1742             # All formats are video-only or
1743             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1744             # all formats are audio-only
1745             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1746
1747         ctx = {
1748             'formats': formats,
1749             'incomplete_formats': incomplete_formats,
1750         }
1751
1752         formats_to_download = list(format_selector(ctx))
1753         if not formats_to_download:
1754             raise ExtractorError('requested format not available',
1755                                  expected=True)
1756
1757         if download:
1758             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1759             if len(formats_to_download) > 1:
1760                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1761             for format in formats_to_download:
1762                 new_info = dict(info_dict)
1763                 new_info.update(format)
1764                 self.process_info(new_info)
1765         # We update the info dict with the best quality format (backwards compatibility)
1766         info_dict.update(formats_to_download[-1])
1767         return info_dict
1768
1769     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1770         """Select the requested subtitles and their format"""
1771         available_subs = {}
1772         if normal_subtitles and self.params.get('writesubtitles'):
1773             available_subs.update(normal_subtitles)
1774         if automatic_captions and self.params.get('writeautomaticsub'):
1775             for lang, cap_info in automatic_captions.items():
1776                 if lang not in available_subs:
1777                     available_subs[lang] = cap_info
1778
1779         if (not self.params.get('writesubtitles') and not
1780                 self.params.get('writeautomaticsub') or not
1781                 available_subs):
1782             return None
1783
1784         if self.params.get('allsubtitles', False):
1785             requested_langs = available_subs.keys()
1786         else:
1787             if self.params.get('subtitleslangs', False):
1788                 requested_langs = self.params.get('subtitleslangs')
1789             elif 'en' in available_subs:
1790                 requested_langs = ['en']
1791             else:
1792                 requested_langs = [list(available_subs.keys())[0]]
1793
1794         formats_query = self.params.get('subtitlesformat', 'best')
1795         formats_preference = formats_query.split('/') if formats_query else []
1796         subs = {}
1797         for lang in requested_langs:
1798             formats = available_subs.get(lang)
1799             if formats is None:
1800                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1801                 continue
1802             for ext in formats_preference:
1803                 if ext == 'best':
1804                     f = formats[-1]
1805                     break
1806                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1807                 if matches:
1808                     f = matches[-1]
1809                     break
1810             else:
1811                 f = formats[-1]
1812                 self.report_warning(
1813                     'No subtitle format found matching "%s" for language %s, '
1814                     'using %s' % (formats_query, lang, f['ext']))
1815             subs[lang] = f
1816         return subs
1817
1818     def __forced_printings(self, info_dict, filename, incomplete):
1819         def print_mandatory(field):
1820             if (self.params.get('force%s' % field, False)
1821                     and (not incomplete or info_dict.get(field) is not None)):
1822                 self.to_stdout(info_dict[field])
1823
1824         def print_optional(field):
1825             if (self.params.get('force%s' % field, False)
1826                     and info_dict.get(field) is not None):
1827                 self.to_stdout(info_dict[field])
1828
1829         print_mandatory('title')
1830         print_mandatory('id')
1831         if self.params.get('forceurl', False) and not incomplete:
1832             if info_dict.get('requested_formats') is not None:
1833                 for f in info_dict['requested_formats']:
1834                     self.to_stdout(f['url'] + f.get('play_path', ''))
1835             else:
1836                 # For RTMP URLs, also include the playpath
1837                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1838         print_optional('thumbnail')
1839         print_optional('description')
1840         if self.params.get('forcefilename', False) and filename is not None:
1841             self.to_stdout(filename)
1842         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1843             self.to_stdout(formatSeconds(info_dict['duration']))
1844         print_mandatory('format')
1845         if self.params.get('forcejson', False):
1846             self.to_stdout(json.dumps(info_dict))
1847
1848     def process_info(self, info_dict):
1849         """Process a single resolved IE result."""
1850
1851         assert info_dict.get('_type', 'video') == 'video'
1852
1853         max_downloads = self.params.get('max_downloads')
1854         if max_downloads is not None:
1855             if self._num_downloads >= int(max_downloads):
1856                 raise MaxDownloadsReached()
1857
1858         # TODO: backward compatibility, to be removed
1859         info_dict['fulltitle'] = info_dict['title']
1860
1861         if 'format' not in info_dict:
1862             info_dict['format'] = info_dict['ext']
1863
1864         reason = self._match_entry(info_dict, incomplete=False)
1865         if reason is not None:
1866             self.to_screen('[download] ' + reason)
1867             return
1868
1869         self._num_downloads += 1
1870
1871         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1872
1873         # Forced printings
1874         self.__forced_printings(info_dict, filename, incomplete=False)
1875
1876         if self.params.get('simulate', False):
1877             if self.params.get('force_write_download_archive', False):
1878                 self.record_download_archive(info_dict)
1879
1880             # Do nothing else if in simulate mode
1881             return
1882
1883         if filename is None:
1884             return
1885
1886         def ensure_dir_exists(path):
1887             try:
1888                 dn = os.path.dirname(path)
1889                 if dn and not os.path.exists(dn):
1890                     os.makedirs(dn)
1891                 return True
1892             except (OSError, IOError) as err:
1893                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1894                 return False
1895
1896         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1897             return
1898
1899         if self.params.get('writedescription', False):
1900             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1901             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1902                 self.to_screen('[info] Video description is already present')
1903             elif info_dict.get('description') is None:
1904                 self.report_warning('There\'s no description to write.')
1905             else:
1906                 try:
1907                     self.to_screen('[info] Writing video description to: ' + descfn)
1908                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1909                         descfile.write(info_dict['description'])
1910                 except (OSError, IOError):
1911                     self.report_error('Cannot write description file ' + descfn)
1912                     return
1913
1914         if self.params.get('writeannotations', False):
1915             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1916             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1917                 self.to_screen('[info] Video annotations are already present')
1918             elif not info_dict.get('annotations'):
1919                 self.report_warning('There are no annotations to write.')
1920             else:
1921                 try:
1922                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1923                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1924                         annofile.write(info_dict['annotations'])
1925                 except (KeyError, TypeError):
1926                     self.report_warning('There are no annotations to write.')
1927                 except (OSError, IOError):
1928                     self.report_error('Cannot write annotations file: ' + annofn)
1929                     return
1930
1931         def dl(name, info, subtitle=False):
1932             fd = get_suitable_downloader(info, self.params)(self, self.params)
1933             for ph in self._progress_hooks:
1934                 fd.add_progress_hook(ph)
1935             if self.params.get('verbose'):
1936                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
1937             return fd.download(name, info, subtitle)
1938
1939         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1940                                        self.params.get('writeautomaticsub')])
1941
1942         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1943             # subtitles download errors are already managed as troubles in relevant IE
1944             # that way it will silently go on when used with unsupporting IE
1945             subtitles = info_dict['requested_subtitles']
1946             # ie = self.get_info_extractor(info_dict['extractor_key'])
1947             for sub_lang, sub_info in subtitles.items():
1948                 sub_format = sub_info['ext']
1949                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1950                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1951                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1952                 else:
1953                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1954                     if sub_info.get('data') is not None:
1955                         try:
1956                             # Use newline='' to prevent conversion of newline characters
1957                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
1958                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1959                                 subfile.write(sub_info['data'])
1960                         except (OSError, IOError):
1961                             self.report_error('Cannot write subtitles file ' + sub_filename)
1962                             return
1963                     else:
1964                         try:
1965                             dl(sub_filename, sub_info, subtitle=True)
1966                             '''
1967                             if self.params.get('sleep_interval_subtitles', False):
1968                                 dl(sub_filename, sub_info)
1969                             else:
1970                                 sub_data = ie._request_webpage(
1971                                     sub_info['url'], info_dict['id'], note=False).read()
1972                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1973                                     subfile.write(sub_data)
1974                             '''
1975                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1976                             self.report_warning('Unable to download subtitle for "%s": %s' %
1977                                                 (sub_lang, error_to_compat_str(err)))
1978                             continue
1979
1980         if self.params.get('skip_download', False):
1981             if self.params.get('convertsubtitles', False):
1982                 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
1983                 filename_real_ext = os.path.splitext(filename)[1][1:]
1984                 filename_wo_ext = (
1985                     os.path.splitext(filename)[0]
1986                     if filename_real_ext == info_dict['ext']
1987                     else filename)
1988                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
1989                 if subconv.available:
1990                     info_dict.setdefault('__postprocessors', [])
1991                     # info_dict['__postprocessors'].append(subconv)
1992                 if os.path.exists(encodeFilename(afilename)):
1993                     self.to_screen(
1994                         '[download] %s has already been downloaded and '
1995                         'converted' % afilename)
1996                 else:
1997                     try:
1998                         self.post_process(filename, info_dict)
1999                     except (PostProcessingError) as err:
2000                         self.report_error('postprocessing: %s' % str(err))
2001                         return
2002
2003         if self.params.get('writeinfojson', False):
2004             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
2005             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
2006                 self.to_screen('[info] Video description metadata is already present')
2007             else:
2008                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
2009                 try:
2010                     write_json_file(self.filter_requested_info(info_dict), infofn)
2011                 except (OSError, IOError):
2012                     self.report_error('Cannot write metadata to JSON file ' + infofn)
2013                     return
2014
2015         self._write_thumbnails(info_dict, filename)
2016
2017         # Write internet shortcut files
2018         url_link = webloc_link = desktop_link = False
2019         if self.params.get('writelink', False):
2020             if sys.platform == "darwin":  # macOS.
2021                 webloc_link = True
2022             elif sys.platform.startswith("linux"):
2023                 desktop_link = True
2024             else:  # if sys.platform in ['win32', 'cygwin']:
2025                 url_link = True
2026         if self.params.get('writeurllink', False):
2027             url_link = True
2028         if self.params.get('writewebloclink', False):
2029             webloc_link = True
2030         if self.params.get('writedesktoplink', False):
2031             desktop_link = True
2032
2033         if url_link or webloc_link or desktop_link:
2034             if 'webpage_url' not in info_dict:
2035                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2036                 return
2037             ascii_url = iri_to_uri(info_dict['webpage_url'])
2038
2039         def _write_link_file(extension, template, newline, embed_filename):
2040             linkfn = replace_extension(filename, extension, info_dict.get('ext'))
2041             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2042                 self.to_screen('[info] Internet shortcut is already present')
2043             else:
2044                 try:
2045                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2046                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2047                         template_vars = {'url': ascii_url}
2048                         if embed_filename:
2049                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2050                         linkfile.write(template % template_vars)
2051                 except (OSError, IOError):
2052                     self.report_error('Cannot write internet shortcut ' + linkfn)
2053                     return False
2054             return True
2055
2056         if url_link:
2057             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2058                 return
2059         if webloc_link:
2060             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2061                 return
2062         if desktop_link:
2063             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2064                 return
2065
2066         # Download
2067         must_record_download_archive = False
2068         if not self.params.get('skip_download', False):
2069             try:
2070                 if info_dict.get('requested_formats') is not None:
2071                     downloaded = []
2072                     success = True
2073                     merger = FFmpegMergerPP(self)
2074                     if not merger.available:
2075                         postprocessors = []
2076                         self.report_warning('You have requested multiple '
2077                                             'formats but ffmpeg or avconv are not installed.'
2078                                             ' The formats won\'t be merged.')
2079                     else:
2080                         postprocessors = [merger]
2081
2082                     def compatible_formats(formats):
2083                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2084                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2085                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2086                         if len(video_formats) > 2 or len(audio_formats) > 2:
2087                             return False
2088
2089                         # Check extension
2090                         exts = set(format.get('ext') for format in formats)
2091                         COMPATIBLE_EXTS = (
2092                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2093                             set(('webm',)),
2094                         )
2095                         for ext_sets in COMPATIBLE_EXTS:
2096                             if ext_sets.issuperset(exts):
2097                                 return True
2098                         # TODO: Check acodec/vcodec
2099                         return False
2100
2101                     filename_real_ext = os.path.splitext(filename)[1][1:]
2102                     filename_wo_ext = (
2103                         os.path.splitext(filename)[0]
2104                         if filename_real_ext == info_dict['ext']
2105                         else filename)
2106                     requested_formats = info_dict['requested_formats']
2107                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2108                         info_dict['ext'] = 'mkv'
2109                         self.report_warning(
2110                             'Requested formats are incompatible for merge and will be merged into mkv.')
2111                     # Ensure filename always has a correct extension for successful merge
2112                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
2113                     if os.path.exists(encodeFilename(filename)):
2114                         self.to_screen(
2115                             '[download] %s has already been downloaded and '
2116                             'merged' % filename)
2117                     else:
2118                         for f in requested_formats:
2119                             new_info = dict(info_dict)
2120                             new_info.update(f)
2121                             fname = prepend_extension(
2122                                 self.prepare_filename(new_info),
2123                                 'f%s' % f['format_id'], new_info['ext'])
2124                             if not ensure_dir_exists(fname):
2125                                 return
2126                             downloaded.append(fname)
2127                             partial_success, real_download = dl(fname, new_info)
2128                             success = success and partial_success
2129                         info_dict['__postprocessors'] = postprocessors
2130                         info_dict['__files_to_merge'] = downloaded
2131                         # Even if there were no downloads, it is being merged only now
2132                         info_dict['__real_download'] = True
2133                 else:
2134                     # Just a single file
2135                     success, real_download = dl(filename, info_dict)
2136                     info_dict['__real_download'] = real_download
2137             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2138                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2139                 return
2140             except (OSError, IOError) as err:
2141                 raise UnavailableVideoError(err)
2142             except (ContentTooShortError, ) as err:
2143                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2144                 return
2145
2146             if success and filename != '-':
2147                 # Fixup content
2148                 fixup_policy = self.params.get('fixup')
2149                 if fixup_policy is None:
2150                     fixup_policy = 'detect_or_warn'
2151
2152                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2153
2154                 stretched_ratio = info_dict.get('stretched_ratio')
2155                 if stretched_ratio is not None and stretched_ratio != 1:
2156                     if fixup_policy == 'warn':
2157                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2158                             info_dict['id'], stretched_ratio))
2159                     elif fixup_policy == 'detect_or_warn':
2160                         stretched_pp = FFmpegFixupStretchedPP(self)
2161                         if stretched_pp.available:
2162                             info_dict.setdefault('__postprocessors', [])
2163                             info_dict['__postprocessors'].append(stretched_pp)
2164                         else:
2165                             self.report_warning(
2166                                 '%s: Non-uniform pixel ratio (%s). %s'
2167                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2168                     else:
2169                         assert fixup_policy in ('ignore', 'never')
2170
2171                 if (info_dict.get('requested_formats') is None
2172                         and info_dict.get('container') == 'm4a_dash'):
2173                     if fixup_policy == 'warn':
2174                         self.report_warning(
2175                             '%s: writing DASH m4a. '
2176                             'Only some players support this container.'
2177                             % info_dict['id'])
2178                     elif fixup_policy == 'detect_or_warn':
2179                         fixup_pp = FFmpegFixupM4aPP(self)
2180                         if fixup_pp.available:
2181                             info_dict.setdefault('__postprocessors', [])
2182                             info_dict['__postprocessors'].append(fixup_pp)
2183                         else:
2184                             self.report_warning(
2185                                 '%s: writing DASH m4a. '
2186                                 'Only some players support this container. %s'
2187                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2188                     else:
2189                         assert fixup_policy in ('ignore', 'never')
2190
2191                 if (info_dict.get('protocol') == 'm3u8_native'
2192                         or info_dict.get('protocol') == 'm3u8'
2193                         and self.params.get('hls_prefer_native')):
2194                     if fixup_policy == 'warn':
2195                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2196                             info_dict['id']))
2197                     elif fixup_policy == 'detect_or_warn':
2198                         fixup_pp = FFmpegFixupM3u8PP(self)
2199                         if fixup_pp.available:
2200                             info_dict.setdefault('__postprocessors', [])
2201                             info_dict['__postprocessors'].append(fixup_pp)
2202                         else:
2203                             self.report_warning(
2204                                 '%s: malformed AAC bitstream detected. %s'
2205                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2206                     else:
2207                         assert fixup_policy in ('ignore', 'never')
2208
2209                 try:
2210                     self.post_process(filename, info_dict)
2211                 except (PostProcessingError) as err:
2212                     self.report_error('postprocessing: %s' % str(err))
2213                     return
2214                 try:
2215                     for ph in self._post_hooks:
2216                         ph(filename)
2217                 except Exception as err:
2218                     self.report_error('post hooks: %s' % str(err))
2219                     return
2220                 must_record_download_archive = True
2221
2222         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2223             self.record_download_archive(info_dict)
2224         max_downloads = self.params.get('max_downloads')
2225         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2226             raise MaxDownloadsReached()
2227
2228     def download(self, url_list):
2229         """Download a given list of URLs."""
2230         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2231         if (len(url_list) > 1
2232                 and outtmpl != '-'
2233                 and '%' not in outtmpl
2234                 and self.params.get('max_downloads') != 1):
2235             raise SameFileError(outtmpl)
2236
2237         for url in url_list:
2238             try:
2239                 # It also downloads the videos
2240                 res = self.extract_info(
2241                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2242             except UnavailableVideoError:
2243                 self.report_error('unable to download video')
2244             except MaxDownloadsReached:
2245                 self.to_screen('[info] Maximum number of downloaded files reached.')
2246                 raise
2247             else:
2248                 if self.params.get('dump_single_json', False):
2249                     self.to_stdout(json.dumps(res))
2250
2251         return self._download_retcode
2252
2253     def download_with_info_file(self, info_filename):
2254         with contextlib.closing(fileinput.FileInput(
2255                 [info_filename], mode='r',
2256                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2257             # FileInput doesn't have a read method, we can't call json.load
2258             info = self.filter_requested_info(json.loads('\n'.join(f)))
2259         try:
2260             self.process_ie_result(info, download=True)
2261         except DownloadError:
2262             webpage_url = info.get('webpage_url')
2263             if webpage_url is not None:
2264                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2265                 return self.download([webpage_url])
2266             else:
2267                 raise
2268         return self._download_retcode
2269
2270     @staticmethod
2271     def filter_requested_info(info_dict):
2272         return dict(
2273             (k, v) for k, v in info_dict.items()
2274             if k not in ['requested_formats', 'requested_subtitles'])
2275
2276     def post_process(self, filename, ie_info):
2277         """Run all the postprocessors on the given file."""
2278         info = dict(ie_info)
2279         info['filepath'] = filename
2280         pps_chain = []
2281         if ie_info.get('__postprocessors') is not None:
2282             pps_chain.extend(ie_info['__postprocessors'])
2283         pps_chain.extend(self._pps)
2284         for pp in pps_chain:
2285             files_to_delete = []
2286             try:
2287                 files_to_delete, info = pp.run(info)
2288             except PostProcessingError as e:
2289                 self.report_error(e.msg)
2290             if files_to_delete and not self.params.get('keepvideo', False):
2291                 for old_filename in set(files_to_delete):
2292                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2293                     try:
2294                         os.remove(encodeFilename(old_filename))
2295                     except (IOError, OSError):
2296                         self.report_warning('Unable to remove downloaded original file')
2297
2298     def _make_archive_id(self, info_dict):
2299         video_id = info_dict.get('id')
2300         if not video_id:
2301             return
2302         # Future-proof against any change in case
2303         # and backwards compatibility with prior versions
2304         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2305         if extractor is None:
2306             url = str_or_none(info_dict.get('url'))
2307             if not url:
2308                 return
2309             # Try to find matching extractor for the URL and take its ie_key
2310             for ie in self._ies:
2311                 if ie.suitable(url):
2312                     extractor = ie.ie_key()
2313                     break
2314             else:
2315                 return
2316         return extractor.lower() + ' ' + video_id
2317
2318     def in_download_archive(self, info_dict):
2319         fn = self.params.get('download_archive')
2320         if fn is None:
2321             return False
2322
2323         vid_id = self._make_archive_id(info_dict)
2324         if not vid_id:
2325             return False  # Incomplete video information
2326
2327         return vid_id in self.archive
2328
2329     def record_download_archive(self, info_dict):
2330         fn = self.params.get('download_archive')
2331         if fn is None:
2332             return
2333         vid_id = self._make_archive_id(info_dict)
2334         assert vid_id
2335         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2336             archive_file.write(vid_id + '\n')
2337         self.archive.add(vid_id)
2338
2339     @staticmethod
2340     def format_resolution(format, default='unknown'):
2341         if format.get('vcodec') == 'none':
2342             return 'audio only'
2343         if format.get('resolution') is not None:
2344             return format['resolution']
2345         if format.get('height') is not None:
2346             if format.get('width') is not None:
2347                 res = '%sx%s' % (format['width'], format['height'])
2348             else:
2349                 res = '%sp' % format['height']
2350         elif format.get('width') is not None:
2351             res = '%dx?' % format['width']
2352         else:
2353             res = default
2354         return res
2355
2356     def _format_note(self, fdict):
2357         res = ''
2358         if fdict.get('ext') in ['f4f', 'f4m']:
2359             res += '(unsupported) '
2360         if fdict.get('language'):
2361             if res:
2362                 res += ' '
2363             res += '[%s] ' % fdict['language']
2364         if fdict.get('format_note') is not None:
2365             res += fdict['format_note'] + ' '
2366         if fdict.get('tbr') is not None:
2367             res += '%4dk ' % fdict['tbr']
2368         if fdict.get('container') is not None:
2369             if res:
2370                 res += ', '
2371             res += '%s container' % fdict['container']
2372         if (fdict.get('vcodec') is not None
2373                 and fdict.get('vcodec') != 'none'):
2374             if res:
2375                 res += ', '
2376             res += fdict['vcodec']
2377             if fdict.get('vbr') is not None:
2378                 res += '@'
2379         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2380             res += 'video@'
2381         if fdict.get('vbr') is not None:
2382             res += '%4dk' % fdict['vbr']
2383         if fdict.get('fps') is not None:
2384             if res:
2385                 res += ', '
2386             res += '%sfps' % fdict['fps']
2387         if fdict.get('acodec') is not None:
2388             if res:
2389                 res += ', '
2390             if fdict['acodec'] == 'none':
2391                 res += 'video only'
2392             else:
2393                 res += '%-5s' % fdict['acodec']
2394         elif fdict.get('abr') is not None:
2395             if res:
2396                 res += ', '
2397             res += 'audio'
2398         if fdict.get('abr') is not None:
2399             res += '@%3dk' % fdict['abr']
2400         if fdict.get('asr') is not None:
2401             res += ' (%5dHz)' % fdict['asr']
2402         if fdict.get('filesize') is not None:
2403             if res:
2404                 res += ', '
2405             res += format_bytes(fdict['filesize'])
2406         elif fdict.get('filesize_approx') is not None:
2407             if res:
2408                 res += ', '
2409             res += '~' + format_bytes(fdict['filesize_approx'])
2410         return res
2411
2412     def _format_note_table(self, f):
2413         def join_fields(*vargs):
2414             return ', '.join((val for val in vargs if val != ''))
2415
2416         return join_fields(
2417             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2418             format_field(f, 'language', '[%s]'),
2419             format_field(f, 'format_note'),
2420             format_field(f, 'container', ignore=(None, f.get('ext'))),
2421             format_field(f, 'asr', '%5dHz'))
2422
2423     def list_formats(self, info_dict):
2424         formats = info_dict.get('formats', [info_dict])
2425         new_format = self.params.get('listformats_table', False)
2426         if new_format:
2427             table = [
2428                 [
2429                     format_field(f, 'format_id'),
2430                     format_field(f, 'ext'),
2431                     self.format_resolution(f),
2432                     format_field(f, 'fps', '%d'),
2433                     '|',
2434                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2435                     format_field(f, 'tbr', '%4dk'),
2436                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2437                     '|',
2438                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2439                     format_field(f, 'vbr', '%4dk'),
2440                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2441                     format_field(f, 'abr', '%3dk'),
2442                     format_field(f, 'asr', '%5dHz'),
2443                     self._format_note_table(f)]
2444                 for f in formats
2445                 if f.get('preference') is None or f['preference'] >= -1000]
2446             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2447                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2448         else:
2449             table = [
2450                 [
2451                     format_field(f, 'format_id'),
2452                     format_field(f, 'ext'),
2453                     self.format_resolution(f),
2454                     self._format_note(f)]
2455                 for f in formats
2456                 if f.get('preference') is None or f['preference'] >= -1000]
2457             header_line = ['format code', 'extension', 'resolution', 'note']
2458
2459         # if len(formats) > 1:
2460         #     table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2461         self.to_screen(
2462             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2463                 header_line,
2464                 table,
2465                 delim=new_format,
2466                 extraGap=(0 if new_format else 1),
2467                 hideEmpty=new_format)))
2468
2469     def list_thumbnails(self, info_dict):
2470         thumbnails = info_dict.get('thumbnails')
2471         if not thumbnails:
2472             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2473             return
2474
2475         self.to_screen(
2476             '[info] Thumbnails for %s:' % info_dict['id'])
2477         self.to_screen(render_table(
2478             ['ID', 'width', 'height', 'URL'],
2479             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2480
2481     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2482         if not subtitles:
2483             self.to_screen('%s has no %s' % (video_id, name))
2484             return
2485         self.to_screen(
2486             'Available %s for %s:' % (name, video_id))
2487         self.to_screen(render_table(
2488             ['Language', 'formats'],
2489             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2490                 for lang, formats in subtitles.items()]))
2491
2492     def urlopen(self, req):
2493         """ Start an HTTP download """
2494         if isinstance(req, compat_basestring):
2495             req = sanitized_Request(req)
2496         return self._opener.open(req, timeout=self._socket_timeout)
2497
2498     def print_debug_header(self):
2499         if not self.params.get('verbose'):
2500             return
2501
2502         if type('') is not compat_str:
2503             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2504             self.report_warning(
2505                 'Your Python is broken! Update to a newer and supported version')
2506
2507         stdout_encoding = getattr(
2508             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2509         encoding_str = (
2510             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2511                 locale.getpreferredencoding(),
2512                 sys.getfilesystemencoding(),
2513                 stdout_encoding,
2514                 self.get_encoding()))
2515         write_string(encoding_str, encoding=None)
2516
2517         self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
2518         if _LAZY_LOADER:
2519             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2520         try:
2521             sp = subprocess.Popen(
2522                 ['git', 'rev-parse', '--short', 'HEAD'],
2523                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2524                 cwd=os.path.dirname(os.path.abspath(__file__)))
2525             out, err = process_communicate_or_kill(sp)
2526             out = out.decode().strip()
2527             if re.match('[0-9a-f]+', out):
2528                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2529         except Exception:
2530             try:
2531                 sys.exc_clear()
2532             except Exception:
2533                 pass
2534
2535         def python_implementation():
2536             impl_name = platform.python_implementation()
2537             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2538                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2539             return impl_name
2540
2541         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2542             platform.python_version(), python_implementation(),
2543             platform_name()))
2544
2545         exe_versions = FFmpegPostProcessor.get_versions(self)
2546         exe_versions['rtmpdump'] = rtmpdump_version()
2547         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2548         exe_str = ', '.join(
2549             '%s %s' % (exe, v)
2550             for exe, v in sorted(exe_versions.items())
2551             if v
2552         )
2553         if not exe_str:
2554             exe_str = 'none'
2555         self._write_string('[debug] exe versions: %s\n' % exe_str)
2556
2557         proxy_map = {}
2558         for handler in self._opener.handlers:
2559             if hasattr(handler, 'proxies'):
2560                 proxy_map.update(handler.proxies)
2561         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2562
2563         if self.params.get('call_home', False):
2564             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2565             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2566             return
2567             latest_version = self.urlopen(
2568                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2569             if version_tuple(latest_version) > version_tuple(__version__):
2570                 self.report_warning(
2571                     'You are using an outdated version (newest version: %s)! '
2572                     'See https://yt-dl.org/update if you need help updating.' %
2573                     latest_version)
2574
2575     def _setup_opener(self):
2576         timeout_val = self.params.get('socket_timeout')
2577         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2578
2579         opts_cookiefile = self.params.get('cookiefile')
2580         opts_proxy = self.params.get('proxy')
2581
2582         if opts_cookiefile is None:
2583             self.cookiejar = compat_cookiejar.CookieJar()
2584         else:
2585             opts_cookiefile = expand_path(opts_cookiefile)
2586             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2587             if os.access(opts_cookiefile, os.R_OK):
2588                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2589
2590         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2591         if opts_proxy is not None:
2592             if opts_proxy == '':
2593                 proxies = {}
2594             else:
2595                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2596         else:
2597             proxies = compat_urllib_request.getproxies()
2598             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2599             if 'http' in proxies and 'https' not in proxies:
2600                 proxies['https'] = proxies['http']
2601         proxy_handler = PerRequestProxyHandler(proxies)
2602
2603         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2604         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2605         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2606         redirect_handler = YoutubeDLRedirectHandler()
2607         data_handler = compat_urllib_request_DataHandler()
2608
2609         # When passing our own FileHandler instance, build_opener won't add the
2610         # default FileHandler and allows us to disable the file protocol, which
2611         # can be used for malicious purposes (see
2612         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2613         file_handler = compat_urllib_request.FileHandler()
2614
2615         def file_open(*args, **kwargs):
2616             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2617         file_handler.file_open = file_open
2618
2619         opener = compat_urllib_request.build_opener(
2620             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2621
2622         # Delete the default user-agent header, which would otherwise apply in
2623         # cases where our custom HTTP handler doesn't come into play
2624         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2625         opener.addheaders = []
2626         self._opener = opener
2627
2628     def encode(self, s):
2629         if isinstance(s, bytes):
2630             return s  # Already encoded
2631
2632         try:
2633             return s.encode(self.get_encoding())
2634         except UnicodeEncodeError as err:
2635             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2636             raise
2637
2638     def get_encoding(self):
2639         encoding = self.params.get('encoding')
2640         if encoding is None:
2641             encoding = preferredencoding()
2642         return encoding
2643
2644     def _write_thumbnails(self, info_dict, filename):
2645         if self.params.get('writethumbnail', False):
2646             thumbnails = info_dict.get('thumbnails')
2647             if thumbnails:
2648                 thumbnails = [thumbnails[-1]]
2649         elif self.params.get('write_all_thumbnails', False):
2650             thumbnails = info_dict.get('thumbnails')
2651         else:
2652             return
2653
2654         if not thumbnails:
2655             # No thumbnails present, so return immediately
2656             return
2657
2658         for t in thumbnails:
2659             thumb_ext = determine_ext(t['url'], 'jpg')
2660             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2661             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2662             t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
2663
2664             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2665                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2666                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2667             else:
2668                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2669                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2670                 try:
2671                     uf = self.urlopen(t['url'])
2672                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2673                         shutil.copyfileobj(uf, thumbf)
2674                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2675                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2676                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2677                     self.report_warning('Unable to download thumbnail "%s": %s' %
2678                                         (t['url'], error_to_compat_str(err)))