youtube_dlc/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from string import ascii_letters
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_str,
  40     compat_tokenize_tokenize,
  41     compat_urllib_error,
  42     compat_urllib_request,
  43     compat_urllib_request_DataHandler,
  44 )
  45 from .utils import (
  46     age_restricted,
  47     args_to_str,
  48     ContentTooShortError,
  49     date_from_str,
  50     DateRange,
  51     DEFAULT_OUTTMPL,
  52     determine_ext,
  53     determine_protocol,
  54     DOT_DESKTOP_LINK_TEMPLATE,
  55     DOT_URL_LINK_TEMPLATE,
  56     DOT_WEBLOC_LINK_TEMPLATE,
  57     DownloadError,
  58     encode_compat_str,
  59     encodeFilename,
  60     error_to_compat_str,
  61     ExistingVideoReached,
  62     expand_path,
  63     ExtractorError,
  64     format_bytes,
  65     format_field,
  66     formatSeconds,
  67     GeoRestrictedError,
  68     int_or_none,
  69     iri_to_uri,
  70     ISO3166Utils,
  71     locked_file,
  72     make_HTTPS_handler,
  73     MaxDownloadsReached,
  74     orderedSet,
  75     PagedList,
  76     parse_filesize,
  77     PerRequestProxyHandler,
  78     platform_name,
  79     PostProcessingError,
  80     preferredencoding,
  81     prepend_extension,
  82     register_socks_protocols,
  83     render_table,
  84     replace_extension,
  85     RejectedVideoReached,
  86     SameFileError,
  87     sanitize_filename,
  88     sanitize_path,
  89     sanitize_url,
  90     sanitized_Request,
  91     std_headers,
  92     str_or_none,
  93     subtitles_filename,
  94     to_high_limit_path,
  95     UnavailableVideoError,
  96     url_basename,
  97     version_tuple,
  98     write_json_file,
  99     write_string,
 100     YoutubeDLCookieJar,
 101     YoutubeDLCookieProcessor,
 102     YoutubeDLHandler,
 103     YoutubeDLRedirectHandler,
 104     process_communicate_or_kill,
 105 )
 106 from .cache import Cache
 107 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
 108 from .extractor.openload import PhantomJSwrapper
 109 from .downloader import get_suitable_downloader
 110 from .downloader.rtmp import rtmpdump_version
 111 from .postprocessor import (
 112     FFmpegFixupM3u8PP,
 113     FFmpegFixupM4aPP,
 114     FFmpegFixupStretchedPP,
 115     FFmpegMergerPP,
 116     FFmpegPostProcessor,
 117     FFmpegSubtitlesConvertorPP,
 118     get_postprocessor,
 119 )
 120 from .version import __version__
 121
 122 if compat_os_name == 'nt':
 123     import ctypes
 124
 125
 126 class YoutubeDL(object):
 127     """YoutubeDL class.
 128
 129     YoutubeDL objects are the ones responsible of downloading the
 130     actual video file and writing it to disk if the user has requested
 131     it, among some other tasks. In most cases there should be one per
 132     program. As, given a video URL, the downloader doesn't know how to
 133     extract all the needed information, task that InfoExtractors do, it
 134     has to pass the URL to one of them.
 135
 136     For this, YoutubeDL objects have a method that allows
 137     InfoExtractors to be registered in a given order. When it is passed
 138     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 139     finds that reports being able to handle it. The InfoExtractor extracts
 140     all the information about the video or videos the URL refers to, and
 141     YoutubeDL process the extracted information, possibly using a File
 142     Downloader to download the video.
 143
 144     YoutubeDL objects accept a lot of parameters. In order not to saturate
 145     the object constructor with arguments, it receives a dictionary of
 146     options instead. These options are available through the params
 147     attribute for the InfoExtractors to use. The YoutubeDL also
 148     registers itself as the downloader in charge for the InfoExtractors
 149     that are added to it, so this is a "mutual registration".
 150
 151     Available options:
 152
 153     username:          Username for authentication purposes.
 154     password:          Password for authentication purposes.
 155     videopassword:     Password for accessing a video.
 156     ap_mso:            Adobe Pass multiple-system operator identifier.
 157     ap_username:       Multiple-system operator account username.
 158     ap_password:       Multiple-system operator account password.
 159     usenetrc:          Use netrc for authentication instead.
 160     verbose:           Print additional info to stdout.
 161     quiet:             Do not print messages to stdout.
 162     no_warnings:       Do not print out anything for warnings.
 163     forceurl:          Force printing final URL.
 164     forcetitle:        Force printing title.
 165     forceid:           Force printing ID.
 166     forcethumbnail:    Force printing thumbnail URL.
 167     forcedescription:  Force printing description.
 168     forcefilename:     Force printing final filename.
 169     forceduration:     Force printing duration.
 170     forcejson:         Force printing info_dict as JSON.
 171     dump_single_json:  Force printing the info_dict of the whole playlist
 172                        (or video) as a single JSON line.
 173     force_write_download_archive: Force writing download archive regardless of
 174                        'skip_download' or 'simulate'.
 175     simulate:          Do not download the video files.
 176     format:            Video format code. see "FORMAT SELECTION" for more details.
 177     format_sort:       How to sort the video formats. see "Sorting Formats" for more details.
 178     format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
 179     allow_multiple_video_streams:   Allow multiple video streams to be merged into a single file
 180     allow_multiple_audio_streams:   Allow multiple audio streams to be merged into a single file
 181     outtmpl:           Template for output names.
 182     restrictfilenames: Do not allow "&" and spaces in file names.
 183     trim_file_name:    Limit length of filename (extension excluded).
 184     ignoreerrors:      Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
 185     force_generic_extractor: Force downloader to use the generic extractor
 186     overwrites:        Overwrite all video and metadata files if True,
 187                        overwrite only non-video files if None
 188                        and don't overwrite any file if False
 189     playliststart:     Playlist item to start at.
 190     playlistend:       Playlist item to end at.
 191     playlist_items:    Specific indices of playlist to download.
 192     playlistreverse:   Download playlist items in reverse order.
 193     playlistrandom:    Download playlist items in random order.
 194     matchtitle:        Download only matching titles.
 195     rejecttitle:       Reject downloads for matching titles.
 196     logger:            Log messages to a logging.Logger instance.
 197     logtostderr:       Log messages to stderr instead of stdout.
 198     writedescription:  Write the video description to a .description file
 199     writeinfojson:     Write the video description to a .info.json file
 200     writeannotations:  Write the video annotations to a .annotations.xml file
 201     writethumbnail:    Write the thumbnail image to a file
 202     write_all_thumbnails:  Write all thumbnail formats to files
 203     writelink:         Write an internet shortcut file, depending on the
 204                        current platform (.url/.webloc/.desktop)
 205     writeurllink:      Write a Windows internet shortcut file (.url)
 206     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 207     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 208     writesubtitles:    Write the video subtitles to a file
 209     writeautomaticsub: Write the automatically generated subtitles to a file
 210     allsubtitles:      Downloads all the subtitles of the video
 211                        (requires writesubtitles or writeautomaticsub)
 212     listsubtitles:     Lists all available subtitles for the video
 213     subtitlesformat:   The format code for subtitles
 214     subtitleslangs:    List of languages of the subtitles to download
 215     keepvideo:         Keep the video file after post-processing
 216     daterange:         A DateRange object, download only if the upload_date is in the range.
 217     skip_download:     Skip the actual download of the video file
 218     cachedir:          Location of the cache files in the filesystem.
 219                        False to disable filesystem cache.
 220     noplaylist:        Download single video instead of a playlist if in doubt.
 221     age_limit:         An integer representing the user's age in years.
 222                        Unsuitable videos for the given age are skipped.
 223     min_views:         An integer representing the minimum view count the video
 224                        must have in order to not be skipped.
 225                        Videos without view count information are always
 226                        downloaded. None for no limit.
 227     max_views:         An integer representing the maximum view count.
 228                        Videos that are more popular than that are not
 229                        downloaded.
 230                        Videos without view count information are always
 231                        downloaded. None for no limit.
 232     download_archive:  File name of a file where all downloads are recorded.
 233                        Videos already present in the file are not downloaded
 234                        again.
 235     break_on_existing: Stop the download process after attempting to download a file that's
 236                        in the archive.
 237     break_on_reject:   Stop the download process when encountering a video that has been filtered out.
 238     cookiefile:        File name where cookies should be read from and dumped to.
 239     nocheckcertificate:Do not verify SSL certificates
 240     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 241                        At the moment, this is only supported by YouTube.
 242     proxy:             URL of the proxy server to use
 243     geo_verification_proxy:  URL of the proxy to use for IP address verification
 244                        on geo-restricted sites.
 245     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 246     bidi_workaround:   Work around buggy terminals without bidirectional text
 247                        support, using fridibi
 248     debug_printtraffic:Print out sent and received HTTP traffic
 249     include_ads:       Download ads as well
 250     default_search:    Prepend this string if an input url is not valid.
 251                        'auto' for elaborate guessing
 252     encoding:          Use this encoding instead of the system-specified.
 253     extract_flat:      Do not resolve URLs, return the immediate result.
 254                        Pass in 'in_playlist' to only show this behavior for
 255                        playlist items.
 256     postprocessors:    A list of dictionaries, each with an entry
 257                        * key:  The name of the postprocessor. See
 258                                youtube_dlc/postprocessor/__init__.py for a list.
 259                        as well as any further keyword arguments for the
 260                        postprocessor.
 261     post_hooks:        A list of functions that get called as the final step
 262                        for each video file, after all postprocessors have been
 263                        called. The filename will be passed as the only argument.
 264     progress_hooks:    A list of functions that get called on download
 265                        progress, with a dictionary with the entries
 266                        * status: One of "downloading", "error", or "finished".
 267                                  Check this first and ignore unknown values.
 268
 269                        If status is one of "downloading", or "finished", the
 270                        following properties may also be present:
 271                        * filename: The final filename (always present)
 272                        * tmpfilename: The filename we're currently writing to
 273                        * downloaded_bytes: Bytes on disk
 274                        * total_bytes: Size of the whole file, None if unknown
 275                        * total_bytes_estimate: Guess of the eventual file size,
 276                                                None if unavailable.
 277                        * elapsed: The number of seconds since download started.
 278                        * eta: The estimated time in seconds, None if unknown
 279                        * speed: The download speed in bytes/second, None if
 280                                 unknown
 281                        * fragment_index: The counter of the currently
 282                                          downloaded video fragment.
 283                        * fragment_count: The number of fragments (= individual
 284                                          files that will be merged)
 285
 286                        Progress hooks are guaranteed to be called at least once
 287                        (with status "finished") if the download is successful.
 288     merge_output_format: Extension to use when merging formats.
 289     fixup:             Automatically correct known faults of the file.
 290                        One of:
 291                        - "never": do nothing
 292                        - "warn": only emit a warning
 293                        - "detect_or_warn": check whether we can do anything
 294                                            about it, warn otherwise (default)
 295     source_address:    Client-side IP address to bind to.
 296     call_home:         Boolean, true iff we are allowed to contact the
 297                        youtube-dlc servers for debugging.
 298     sleep_interval:    Number of seconds to sleep before each download when
 299                        used alone or a lower bound of a range for randomized
 300                        sleep before each download (minimum possible number
 301                        of seconds to sleep) when used along with
 302                        max_sleep_interval.
 303     max_sleep_interval:Upper bound of a range for randomized sleep before each
 304                        download (maximum possible number of seconds to sleep).
 305                        Must only be used along with sleep_interval.
 306                        Actual sleep time will be a random float from range
 307                        [sleep_interval; max_sleep_interval].
 308     listformats:       Print an overview of available video formats and exit.
 309     list_thumbnails:   Print a table of all thumbnails and exit.
 310     match_filter:      A function that gets called with the info_dict of
 311                        every video.
 312                        If it returns a message, the video is ignored.
 313                        If it returns None, the video is downloaded.
 314                        match_filter_func in utils.py is one example for this.
 315     no_color:          Do not emit color codes in output.
 316     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 317                        HTTP header
 318     geo_bypass_country:
 319                        Two-letter ISO 3166-2 country code that will be used for
 320                        explicit geographic restriction bypassing via faking
 321                        X-Forwarded-For HTTP header
 322     geo_bypass_ip_block:
 323                        IP range in CIDR notation that will be used similarly to
 324                        geo_bypass_country
 325
 326     The following options determine which downloader is picked:
 327     external_downloader: Executable of the external downloader to call.
 328                        None or unset for standard (built-in) downloader.
 329     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 330                        if True, otherwise use ffmpeg/avconv if False, otherwise
 331                        use downloader suggested by extractor if None.
 332
 333     The following parameters are not used by YoutubeDL itself, they are used by
 334     the downloader (see youtube_dlc/downloader/common.py):
 335     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 336     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 337     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 338     http_chunk_size.
 339
 340     The following options are used by the post processors:
 341     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 342                        otherwise prefer ffmpeg.
 343     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 344                        to the binary or its containing directory.
 345     postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
 346                         of additional command-line arguments for the postprocessor.
 347                         Use 'default' as the name for arguments to passed to all PP.
 348
 349     The following options are used by the Youtube extractor:
 350     youtube_include_dash_manifest: If True (default), DASH manifests and related
 351                         data will be downloaded and processed by extractor.
 352                         You can reduce network I/O by disabling it if you don't
 353                         care about DASH.
 354     """
 355
 356     _NUMERIC_FIELDS = set((
 357         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 358         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 359         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 360         'average_rating', 'comment_count', 'age_limit',
 361         'start_time', 'end_time',
 362         'chapter_number', 'season_number', 'episode_number',
 363         'track_number', 'disc_number', 'release_year',
 364         'playlist_index',
 365     ))
 366
 367     params = None
 368     _ies = []
 369     _pps = []
 370     _download_retcode = None
 371     _num_downloads = None
 372     _playlist_level = 0
 373     _playlist_urls = set()
 374     _screen_file = None
 375
 376     def __init__(self, params=None, auto_init=True):
 377         """Create a FileDownloader object with the given options."""
 378         if params is None:
 379             params = {}
 380         self._ies = []
 381         self._ies_instances = {}
 382         self._pps = []
 383         self._post_hooks = []
 384         self._progress_hooks = []
 385         self._download_retcode = 0
 386         self._num_downloads = 0
 387         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 388         self._err_file = sys.stderr
 389         self.params = {
 390             # Default parameters
 391             'nocheckcertificate': False,
 392         }
 393         self.params.update(params)
 394         self.cache = Cache(self)
 395         self.archive = set()
 396
 397         """Preload the archive, if any is specified"""
 398         def preload_download_archive(self):
 399             fn = self.params.get('download_archive')
 400             if fn is None:
 401                 return False
 402             try:
 403                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 404                     for line in archive_file:
 405                         self.archive.add(line.strip())
 406             except IOError as ioe:
 407                 if ioe.errno != errno.ENOENT:
 408                     raise
 409                 return False
 410             return True
 411
 412         def check_deprecated(param, option, suggestion):
 413             if self.params.get(param) is not None:
 414                 self.report_warning(
 415                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 416                 return True
 417             return False
 418
 419         if self.params.get('verbose'):
 420             self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
 421
 422         preload_download_archive(self)
 423
 424         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 425             if self.params.get('geo_verification_proxy') is None:
 426                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 427
 428         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 429         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 430         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 431
 432         if params.get('bidi_workaround', False):
 433             try:
 434                 import pty
 435                 master, slave = pty.openpty()
 436                 width = compat_get_terminal_size().columns
 437                 if width is None:
 438                     width_args = []
 439                 else:
 440                     width_args = ['-w', str(width)]
 441                 sp_kwargs = dict(
 442                     stdin=subprocess.PIPE,
 443                     stdout=slave,
 444                     stderr=self._err_file)
 445                 try:
 446                     self._output_process = subprocess.Popen(
 447                         ['bidiv'] + width_args, **sp_kwargs
 448                     )
 449                 except OSError:
 450                     self._output_process = subprocess.Popen(
 451                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 452                 self._output_channel = os.fdopen(master, 'rb')
 453             except OSError as ose:
 454                 if ose.errno == errno.ENOENT:
 455                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 456                 else:
 457                     raise
 458
 459         if (sys.platform != 'win32'
 460                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 461                 and not params.get('restrictfilenames', False)):
 462             # Unicode filesystem API will throw errors (#1474, #13027)
 463             self.report_warning(
 464                 'Assuming --restrict-filenames since file system encoding '
 465                 'cannot encode all characters. '
 466                 'Set the LC_ALL environment variable to fix this.')
 467             self.params['restrictfilenames'] = True
 468
 469         if isinstance(params.get('outtmpl'), bytes):
 470             self.report_warning(
 471                 'Parameter outtmpl is bytes, but should be a unicode string. '
 472                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 473
 474         self._setup_opener()
 475
 476         if auto_init:
 477             self.print_debug_header()
 478             self.add_default_info_extractors()
 479
 480         for pp_def_raw in self.params.get('postprocessors', []):
 481             pp_class = get_postprocessor(pp_def_raw['key'])
 482             pp_def = dict(pp_def_raw)
 483             del pp_def['key']
 484             pp = pp_class(self, **compat_kwargs(pp_def))
 485             self.add_post_processor(pp)
 486
 487         for ph in self.params.get('post_hooks', []):
 488             self.add_post_hook(ph)
 489
 490         for ph in self.params.get('progress_hooks', []):
 491             self.add_progress_hook(ph)
 492
 493         register_socks_protocols()
 494
 495     def warn_if_short_id(self, argv):
 496         # short YouTube ID starting with dash?
 497         idxs = [
 498             i for i, a in enumerate(argv)
 499             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 500         if idxs:
 501             correct_argv = (
 502                 ['youtube-dlc']
 503                 + [a for i, a in enumerate(argv) if i not in idxs]
 504                 + ['--'] + [argv[i] for i in idxs]
 505             )
 506             self.report_warning(
 507                 'Long argument string detected. '
 508                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 509                 args_to_str(correct_argv))
 510
 511     def add_info_extractor(self, ie):
 512         """Add an InfoExtractor object to the end of the list."""
 513         self._ies.append(ie)
 514         if not isinstance(ie, type):
 515             self._ies_instances[ie.ie_key()] = ie
 516             ie.set_downloader(self)
 517
 518     def get_info_extractor(self, ie_key):
 519         """
 520         Get an instance of an IE with name ie_key, it will try to get one from
 521         the _ies list, if there's no instance it will create a new one and add
 522         it to the extractor list.
 523         """
 524         ie = self._ies_instances.get(ie_key)
 525         if ie is None:
 526             ie = get_info_extractor(ie_key)()
 527             self.add_info_extractor(ie)
 528         return ie
 529
 530     def add_default_info_extractors(self):
 531         """
 532         Add the InfoExtractors returned by gen_extractors to the end of the list
 533         """
 534         for ie in gen_extractor_classes():
 535             self.add_info_extractor(ie)
 536
 537     def add_post_processor(self, pp):
 538         """Add a PostProcessor object to the end of the chain."""
 539         self._pps.append(pp)
 540         pp.set_downloader(self)
 541
 542     def add_post_hook(self, ph):
 543         """Add the post hook"""
 544         self._post_hooks.append(ph)
 545
 546     def add_progress_hook(self, ph):
 547         """Add the progress hook (currently only for the file downloader)"""
 548         self._progress_hooks.append(ph)
 549
 550     def _bidi_workaround(self, message):
 551         if not hasattr(self, '_output_channel'):
 552             return message
 553
 554         assert hasattr(self, '_output_process')
 555         assert isinstance(message, compat_str)
 556         line_count = message.count('\n') + 1
 557         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 558         self._output_process.stdin.flush()
 559         res = ''.join(self._output_channel.readline().decode('utf-8')
 560                       for _ in range(line_count))
 561         return res[:-len('\n')]
 562
 563     def to_screen(self, message, skip_eol=False):
 564         """Print message to stdout if not in quiet mode."""
 565         return self.to_stdout(message, skip_eol, check_quiet=True)
 566
 567     def _write_string(self, s, out=None):
 568         write_string(s, out=out, encoding=self.params.get('encoding'))
 569
 570     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 571         """Print message to stdout if not in quiet mode."""
 572         if self.params.get('logger'):
 573             self.params['logger'].debug(message)
 574         elif not check_quiet or not self.params.get('quiet', False):
 575             message = self._bidi_workaround(message)
 576             terminator = ['\n', ''][skip_eol]
 577             output = message + terminator
 578
 579             self._write_string(output, self._screen_file)
 580
 581     def to_stderr(self, message):
 582         """Print message to stderr."""
 583         assert isinstance(message, compat_str)
 584         if self.params.get('logger'):
 585             self.params['logger'].error(message)
 586         else:
 587             message = self._bidi_workaround(message)
 588             output = message + '\n'
 589             self._write_string(output, self._err_file)
 590
 591     def to_console_title(self, message):
 592         if not self.params.get('consoletitle', False):
 593             return
 594         if compat_os_name == 'nt':
 595             if ctypes.windll.kernel32.GetConsoleWindow():
 596                 # c_wchar_p() might not be necessary if `message` is
 597                 # already of type unicode()
 598                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 599         elif 'TERM' in os.environ:
 600             self._write_string('\033[0;%s\007' % message, self._screen_file)
 601
 602     def save_console_title(self):
 603         if not self.params.get('consoletitle', False):
 604             return
 605         if self.params.get('simulate', False):
 606             return
 607         if compat_os_name != 'nt' and 'TERM' in os.environ:
 608             # Save the title on stack
 609             self._write_string('\033[22;0t', self._screen_file)
 610
 611     def restore_console_title(self):
 612         if not self.params.get('consoletitle', False):
 613             return
 614         if self.params.get('simulate', False):
 615             return
 616         if compat_os_name != 'nt' and 'TERM' in os.environ:
 617             # Restore the title from stack
 618             self._write_string('\033[23;0t', self._screen_file)
 619
 620     def __enter__(self):
 621         self.save_console_title()
 622         return self
 623
 624     def __exit__(self, *args):
 625         self.restore_console_title()
 626
 627         if self.params.get('cookiefile') is not None:
 628             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 629
 630     def trouble(self, message=None, tb=None):
 631         """Determine action to take when a download problem appears.
 632
 633         Depending on if the downloader has been configured to ignore
 634         download errors or not, this method may throw an exception or
 635         not when errors are found, after printing the message.
 636
 637         tb, if given, is additional traceback information.
 638         """
 639         if message is not None:
 640             self.to_stderr(message)
 641         if self.params.get('verbose'):
 642             if tb is None:
 643                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 644                     tb = ''
 645                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 646                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 647                     tb += encode_compat_str(traceback.format_exc())
 648                 else:
 649                     tb_data = traceback.format_list(traceback.extract_stack())
 650                     tb = ''.join(tb_data)
 651             self.to_stderr(tb)
 652         if not self.params.get('ignoreerrors', False):
 653             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 654                 exc_info = sys.exc_info()[1].exc_info
 655             else:
 656                 exc_info = sys.exc_info()
 657             raise DownloadError(message, exc_info)
 658         self._download_retcode = 1
 659
 660     def report_warning(self, message):
 661         '''
 662         Print the message to stderr, it will be prefixed with 'WARNING:'
 663         If stderr is a tty file the 'WARNING:' will be colored
 664         '''
 665         if self.params.get('logger') is not None:
 666             self.params['logger'].warning(message)
 667         else:
 668             if self.params.get('no_warnings'):
 669                 return
 670             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 671                 _msg_header = '\033[0;33mWARNING:\033[0m'
 672             else:
 673                 _msg_header = 'WARNING:'
 674             warning_message = '%s %s' % (_msg_header, message)
 675             self.to_stderr(warning_message)
 676
 677     def report_error(self, message, tb=None):
 678         '''
 679         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 680         in red if stderr is a tty file.
 681         '''
 682         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 683             _msg_header = '\033[0;31mERROR:\033[0m'
 684         else:
 685             _msg_header = 'ERROR:'
 686         error_message = '%s %s' % (_msg_header, message)
 687         self.trouble(error_message, tb)
 688
 689     def report_file_already_downloaded(self, file_name):
 690         """Report file has already been fully downloaded."""
 691         try:
 692             self.to_screen('[download] %s has already been downloaded' % file_name)
 693         except UnicodeEncodeError:
 694             self.to_screen('[download] The file has already been downloaded')
 695
 696     def report_file_delete(self, file_name):
 697         """Report that existing file will be deleted."""
 698         try:
 699             self.to_screen('Deleting already existent file %s' % file_name)
 700         except UnicodeEncodeError:
 701             self.to_screen('Deleting already existent file')
 702
 703     def prepare_filename(self, info_dict):
 704         """Generate the output filename."""
 705         try:
 706             template_dict = dict(info_dict)
 707
 708             template_dict['epoch'] = int(time.time())
 709             autonumber_size = self.params.get('autonumber_size')
 710             if autonumber_size is None:
 711                 autonumber_size = 5
 712             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 713             if template_dict.get('resolution') is None:
 714                 if template_dict.get('width') and template_dict.get('height'):
 715                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 716                 elif template_dict.get('height'):
 717                     template_dict['resolution'] = '%sp' % template_dict['height']
 718                 elif template_dict.get('width'):
 719                     template_dict['resolution'] = '%dx?' % template_dict['width']
 720
 721             sanitize = lambda k, v: sanitize_filename(
 722                 compat_str(v),
 723                 restricted=self.params.get('restrictfilenames'),
 724                 is_id=(k == 'id' or k.endswith('_id')))
 725             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 726                                  for k, v in template_dict.items()
 727                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 728             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 729
 730             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 731
 732             # For fields playlist_index and autonumber convert all occurrences
 733             # of %(field)s to %(field)0Nd for backward compatibility
 734             field_size_compat_map = {
 735                 'playlist_index': len(str(template_dict['n_entries'])),
 736                 'autonumber': autonumber_size,
 737             }
 738             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 739             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 740             if mobj:
 741                 outtmpl = re.sub(
 742                     FIELD_SIZE_COMPAT_RE,
 743                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 744                     outtmpl)
 745
 746             # Missing numeric fields used together with integer presentation types
 747             # in format specification will break the argument substitution since
 748             # string 'NA' is returned for missing fields. We will patch output
 749             # template for missing fields to meet string presentation type.
 750             for numeric_field in self._NUMERIC_FIELDS:
 751                 if numeric_field not in template_dict:
 752                     # As of [1] format syntax is:
 753                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 754                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 755                     FORMAT_RE = r'''(?x)
 756                         (?<!%)
 757                         %
 758                         \({0}\)  # mapping key
 759                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 760                         (?:\d+)?  # minimum field width (optional)
 761                         (?:\.\d+)?  # precision (optional)
 762                         [hlL]?  # length modifier (optional)
 763                         [diouxXeEfFgGcrs%]  # conversion type
 764                     '''
 765                     outtmpl = re.sub(
 766                         FORMAT_RE.format(numeric_field),
 767                         r'%({0})s'.format(numeric_field), outtmpl)
 768
 769             # expand_path translates '%%' into '%' and '$$' into '$'
 770             # correspondingly that is not what we want since we need to keep
 771             # '%%' intact for template dict substitution step. Working around
 772             # with boundary-alike separator hack.
 773             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 774             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 775
 776             # outtmpl should be expand_path'ed before template dict substitution
 777             # because meta fields may contain env variables we don't want to
 778             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 779             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 780             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 781
 782             # https://github.com/blackjack4494/youtube-dlc/issues/85
 783             trim_file_name = self.params.get('trim_file_name', False)
 784             if trim_file_name:
 785                 fn_groups = filename.rsplit('.')
 786                 ext = fn_groups[-1]
 787                 sub_ext = ''
 788                 if len(fn_groups) > 2:
 789                     sub_ext = fn_groups[-2]
 790                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 791
 792             # Temporary fix for #4787
 793             # 'Treat' all problem characters by passing filename through preferredencoding
 794             # to workaround encoding issues with subprocess on python2 @ Windows
 795             if sys.version_info < (3, 0) and sys.platform == 'win32':
 796                 filename = encodeFilename(filename, True).decode(preferredencoding())
 797             return sanitize_path(filename)
 798         except ValueError as err:
 799             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 800             return None
 801
 802     def _match_entry(self, info_dict, incomplete):
 803         """ Returns None if the file should be downloaded """
 804
 805         def check_filter():
 806             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 807             if 'title' in info_dict:
 808                 # This can happen when we're just evaluating the playlist
 809                 title = info_dict['title']
 810                 matchtitle = self.params.get('matchtitle', False)
 811                 if matchtitle:
 812                     if not re.search(matchtitle, title, re.IGNORECASE):
 813                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 814                 rejecttitle = self.params.get('rejecttitle', False)
 815                 if rejecttitle:
 816                     if re.search(rejecttitle, title, re.IGNORECASE):
 817                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 818             date = info_dict.get('upload_date')
 819             if date is not None:
 820                 dateRange = self.params.get('daterange', DateRange())
 821                 if date not in dateRange:
 822                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 823             view_count = info_dict.get('view_count')
 824             if view_count is not None:
 825                 min_views = self.params.get('min_views')
 826                 if min_views is not None and view_count < min_views:
 827                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 828                 max_views = self.params.get('max_views')
 829                 if max_views is not None and view_count > max_views:
 830                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 831             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 832                 return 'Skipping "%s" because it is age restricted' % video_title
 833             if self.in_download_archive(info_dict):
 834                 return '%s has already been recorded in archive' % video_title
 835
 836             if not incomplete:
 837                 match_filter = self.params.get('match_filter')
 838                 if match_filter is not None:
 839                     ret = match_filter(info_dict)
 840                     if ret is not None:
 841                         return ret
 842             return None
 843
 844         reason = check_filter()
 845         if reason is not None:
 846             self.to_screen('[download] ' + reason)
 847             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
 848                 raise ExistingVideoReached()
 849             elif self.params.get('break_on_reject', False):
 850                 raise RejectedVideoReached()
 851         return reason
 852
 853     @staticmethod
 854     def add_extra_info(info_dict, extra_info):
 855         '''Set the keys from extra_info in info dict if they are missing'''
 856         for key, value in extra_info.items():
 857             info_dict.setdefault(key, value)
 858
 859     def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
 860                      process=True, force_generic_extractor=False):
 861         '''
 862         Returns a list with a dictionary for each video we find.
 863         If 'download', also downloads the videos.
 864         extra_info is a dict containing the extra values to add to each result
 865         '''
 866
 867         if not ie_key and force_generic_extractor:
 868             ie_key = 'Generic'
 869
 870         if ie_key:
 871             ies = [self.get_info_extractor(ie_key)]
 872         else:
 873             ies = self._ies
 874
 875         for ie in ies:
 876             if not ie.suitable(url):
 877                 continue
 878
 879             ie_key = ie.ie_key()
 880             ie = self.get_info_extractor(ie_key)
 881             if not ie.working():
 882                 self.report_warning('The program functionality for this site has been marked as broken, '
 883                                     'and will probably not work.')
 884
 885             try:
 886                 temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
 887             except (AssertionError, IndexError, AttributeError):
 888                 temp_id = None
 889             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
 890                 self.to_screen("[%s] %s: has already been recorded in archive" % (
 891                                ie_key, temp_id))
 892                 break
 893
 894             return self.__extract_info(url, ie, download, extra_info, process, info_dict)
 895
 896         else:
 897             self.report_error('no suitable InfoExtractor for URL %s' % url)
 898
 899     def __handle_extraction_exceptions(func):
 900         def wrapper(self, *args, **kwargs):
 901             try:
 902                 return func(self, *args, **kwargs)
 903             except GeoRestrictedError as e:
 904                 msg = e.msg
 905                 if e.countries:
 906                     msg += '\nThis video is available in %s.' % ', '.join(
 907                         map(ISO3166Utils.short2full, e.countries))
 908                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 909                 self.report_error(msg)
 910             except ExtractorError as e:  # An error we somewhat expected
 911                 self.report_error(compat_str(e), e.format_traceback())
 912             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
 913                 raise
 914             except Exception as e:
 915                 if self.params.get('ignoreerrors', False):
 916                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 917                 else:
 918                     raise
 919         return wrapper
 920
 921     @__handle_extraction_exceptions
 922     def __extract_info(self, url, ie, download, extra_info, process, info_dict):
 923         ie_result = ie.extract(url)
 924         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 925             return
 926         if isinstance(ie_result, list):
 927             # Backwards compatibility: old IE result format
 928             ie_result = {
 929                 '_type': 'compat_list',
 930                 'entries': ie_result,
 931             }
 932         if info_dict:
 933             if info_dict.get('id'):
 934                 ie_result['id'] = info_dict['id']
 935             if info_dict.get('title'):
 936                 ie_result['title'] = info_dict['title']
 937         self.add_default_extra_info(ie_result, ie, url)
 938         if process:
 939             return self.process_ie_result(ie_result, download, extra_info)
 940         else:
 941             return ie_result
 942
 943     def add_default_extra_info(self, ie_result, ie, url):
 944         self.add_extra_info(ie_result, {
 945             'extractor': ie.IE_NAME,
 946             'webpage_url': url,
 947             'duration_string': (
 948                 formatSeconds(ie_result['duration'], '-')
 949                 if ie_result.get('duration', None) is not None
 950                 else None),
 951             'webpage_url_basename': url_basename(url),
 952             'extractor_key': ie.ie_key(),
 953         })
 954
 955     def process_ie_result(self, ie_result, download=True, extra_info={}):
 956         """
 957         Take the result of the ie(may be modified) and resolve all unresolved
 958         references (URLs, playlist items).
 959
 960         It will also download the videos if 'download'.
 961         Returns the resolved ie_result.
 962         """
 963         result_type = ie_result.get('_type', 'video')
 964
 965         if result_type in ('url', 'url_transparent'):
 966             ie_result['url'] = sanitize_url(ie_result['url'])
 967             extract_flat = self.params.get('extract_flat', False)
 968             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
 969                     or extract_flat is True):
 970                 self.__forced_printings(
 971                     ie_result, self.prepare_filename(ie_result),
 972                     incomplete=True)
 973                 return ie_result
 974
 975         if result_type == 'video':
 976             self.add_extra_info(ie_result, extra_info)
 977             return self.process_video_result(ie_result, download=download)
 978         elif result_type == 'url':
 979             # We have to add extra_info to the results because it may be
 980             # contained in a playlist
 981             return self.extract_info(ie_result['url'],
 982                                      download, info_dict=ie_result,
 983                                      ie_key=ie_result.get('ie_key'),
 984                                      extra_info=extra_info)
 985         elif result_type == 'url_transparent':
 986             # Use the information from the embedding page
 987             info = self.extract_info(
 988                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 989                 extra_info=extra_info, download=False, process=False)
 990
 991             # extract_info may return None when ignoreerrors is enabled and
 992             # extraction failed with an error, don't crash and return early
 993             # in this case
 994             if not info:
 995                 return info
 996
 997             force_properties = dict(
 998                 (k, v) for k, v in ie_result.items() if v is not None)
 999             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1000                 if f in force_properties:
1001                     del force_properties[f]
1002             new_result = info.copy()
1003             new_result.update(force_properties)
1004
1005             # Extracted info may not be a video result (i.e.
1006             # info.get('_type', 'video') != video) but rather an url or
1007             # url_transparent. In such cases outer metadata (from ie_result)
1008             # should be propagated to inner one (info). For this to happen
1009             # _type of info should be overridden with url_transparent. This
1010             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1011             if new_result.get('_type') == 'url':
1012                 new_result['_type'] = 'url_transparent'
1013
1014             return self.process_ie_result(
1015                 new_result, download=download, extra_info=extra_info)
1016         elif result_type in ('playlist', 'multi_video'):
1017             # Protect from infinite recursion due to recursively nested playlists
1018             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1019             webpage_url = ie_result['webpage_url']
1020             if webpage_url in self._playlist_urls:
1021                 self.to_screen(
1022                     '[download] Skipping already downloaded playlist: %s'
1023                     % ie_result.get('title') or ie_result.get('id'))
1024                 return
1025
1026             self._playlist_level += 1
1027             self._playlist_urls.add(webpage_url)
1028             try:
1029                 return self.__process_playlist(ie_result, download)
1030             finally:
1031                 self._playlist_level -= 1
1032                 if not self._playlist_level:
1033                     self._playlist_urls.clear()
1034         elif result_type == 'compat_list':
1035             self.report_warning(
1036                 'Extractor %s returned a compat_list result. '
1037                 'It needs to be updated.' % ie_result.get('extractor'))
1038
1039             def _fixup(r):
1040                 self.add_extra_info(
1041                     r,
1042                     {
1043                         'extractor': ie_result['extractor'],
1044                         'webpage_url': ie_result['webpage_url'],
1045                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1046                         'extractor_key': ie_result['extractor_key'],
1047                     }
1048                 )
1049                 return r
1050             ie_result['entries'] = [
1051                 self.process_ie_result(_fixup(r), download, extra_info)
1052                 for r in ie_result['entries']
1053             ]
1054             return ie_result
1055         else:
1056             raise Exception('Invalid result type: %s' % result_type)
1057
1058     def __process_playlist(self, ie_result, download):
1059         # We process each entry in the playlist
1060         playlist = ie_result.get('title') or ie_result.get('id')
1061         self.to_screen('[download] Downloading playlist: %s' % playlist)
1062
1063         playlist_results = []
1064
1065         playliststart = self.params.get('playliststart', 1) - 1
1066         playlistend = self.params.get('playlistend')
1067         # For backwards compatibility, interpret -1 as whole list
1068         if playlistend == -1:
1069             playlistend = None
1070
1071         playlistitems_str = self.params.get('playlist_items')
1072         playlistitems = None
1073         if playlistitems_str is not None:
1074             def iter_playlistitems(format):
1075                 for string_segment in format.split(','):
1076                     if '-' in string_segment:
1077                         start, end = string_segment.split('-')
1078                         for item in range(int(start), int(end) + 1):
1079                             yield int(item)
1080                     else:
1081                         yield int(string_segment)
1082             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1083
1084         ie_entries = ie_result['entries']
1085
1086         def make_playlistitems_entries(list_ie_entries):
1087             num_entries = len(list_ie_entries)
1088             return [
1089                 list_ie_entries[i - 1] for i in playlistitems
1090                 if -num_entries <= i - 1 < num_entries]
1091
1092         def report_download(num_entries):
1093             self.to_screen(
1094                 '[%s] playlist %s: Downloading %d videos' %
1095                 (ie_result['extractor'], playlist, num_entries))
1096
1097         if isinstance(ie_entries, list):
1098             n_all_entries = len(ie_entries)
1099             if playlistitems:
1100                 entries = make_playlistitems_entries(ie_entries)
1101             else:
1102                 entries = ie_entries[playliststart:playlistend]
1103             n_entries = len(entries)
1104             self.to_screen(
1105                 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1106                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1107         elif isinstance(ie_entries, PagedList):
1108             if playlistitems:
1109                 entries = []
1110                 for item in playlistitems:
1111                     entries.extend(ie_entries.getslice(
1112                         item - 1, item
1113                     ))
1114             else:
1115                 entries = ie_entries.getslice(
1116                     playliststart, playlistend)
1117             n_entries = len(entries)
1118             report_download(n_entries)
1119         else:  # iterable
1120             if playlistitems:
1121                 entries = make_playlistitems_entries(list(itertools.islice(
1122                     ie_entries, 0, max(playlistitems))))
1123             else:
1124                 entries = list(itertools.islice(
1125                     ie_entries, playliststart, playlistend))
1126             n_entries = len(entries)
1127             report_download(n_entries)
1128
1129         if self.params.get('playlistreverse', False):
1130             entries = entries[::-1]
1131
1132         if self.params.get('playlistrandom', False):
1133             random.shuffle(entries)
1134
1135         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1136
1137         for i, entry in enumerate(entries, 1):
1138             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1139             # This __x_forwarded_for_ip thing is a bit ugly but requires
1140             # minimal changes
1141             if x_forwarded_for:
1142                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1143             extra = {
1144                 'n_entries': n_entries,
1145                 'playlist': playlist,
1146                 'playlist_id': ie_result.get('id'),
1147                 'playlist_title': ie_result.get('title'),
1148                 'playlist_uploader': ie_result.get('uploader'),
1149                 'playlist_uploader_id': ie_result.get('uploader_id'),
1150                 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1151                 'extractor': ie_result['extractor'],
1152                 'webpage_url': ie_result['webpage_url'],
1153                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1154                 'extractor_key': ie_result['extractor_key'],
1155             }
1156
1157             if self._match_entry(entry, incomplete=True) is not None:
1158                 continue
1159
1160             entry_result = self.__process_iterable_entry(entry, download, extra)
1161             # TODO: skip failed (empty) entries?
1162             playlist_results.append(entry_result)
1163         ie_result['entries'] = playlist_results
1164         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1165         return ie_result
1166
1167     @__handle_extraction_exceptions
1168     def __process_iterable_entry(self, entry, download, extra_info):
1169         return self.process_ie_result(
1170             entry, download=download, extra_info=extra_info)
1171
1172     def _build_format_filter(self, filter_spec):
1173         " Returns a function to filter the formats according to the filter_spec "
1174
1175         OPERATORS = {
1176             '<': operator.lt,
1177             '<=': operator.le,
1178             '>': operator.gt,
1179             '>=': operator.ge,
1180             '=': operator.eq,
1181             '!=': operator.ne,
1182         }
1183         operator_rex = re.compile(r'''(?x)\s*
1184             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1185             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1186             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1187             $
1188             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1189         m = operator_rex.search(filter_spec)
1190         if m:
1191             try:
1192                 comparison_value = int(m.group('value'))
1193             except ValueError:
1194                 comparison_value = parse_filesize(m.group('value'))
1195                 if comparison_value is None:
1196                     comparison_value = parse_filesize(m.group('value') + 'B')
1197                 if comparison_value is None:
1198                     raise ValueError(
1199                         'Invalid value %r in format specification %r' % (
1200                             m.group('value'), filter_spec))
1201             op = OPERATORS[m.group('op')]
1202
1203         if not m:
1204             STR_OPERATORS = {
1205                 '=': operator.eq,
1206                 '^=': lambda attr, value: attr.startswith(value),
1207                 '$=': lambda attr, value: attr.endswith(value),
1208                 '*=': lambda attr, value: value in attr,
1209             }
1210             str_operator_rex = re.compile(r'''(?x)
1211                 \s*(?P<key>[a-zA-Z0-9._-]+)
1212                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1213                 \s*(?P<value>[a-zA-Z0-9._-]+)
1214                 \s*$
1215                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1216             m = str_operator_rex.search(filter_spec)
1217             if m:
1218                 comparison_value = m.group('value')
1219                 str_op = STR_OPERATORS[m.group('op')]
1220                 if m.group('negation'):
1221                     op = lambda attr, value: not str_op(attr, value)
1222                 else:
1223                     op = str_op
1224
1225         if not m:
1226             raise ValueError('Invalid filter specification %r' % filter_spec)
1227
1228         def _filter(f):
1229             actual_value = f.get(m.group('key'))
1230             if actual_value is None:
1231                 return m.group('none_inclusive')
1232             return op(actual_value, comparison_value)
1233         return _filter
1234
1235     def _default_format_spec(self, info_dict, download=True):
1236
1237         def can_merge():
1238             merger = FFmpegMergerPP(self)
1239             return merger.available and merger.can_merge()
1240
1241         prefer_best = (
1242             not self.params.get('simulate', False)
1243             and download
1244             and (
1245                 not can_merge()
1246                 or info_dict.get('is_live', False)
1247                 or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
1248
1249         return (
1250             'best/bestvideo+bestaudio'
1251             if prefer_best
1252             else 'bestvideo*+bestaudio/best'
1253             if not self.params.get('allow_multiple_audio_streams', False)
1254             else 'bestvideo+bestaudio/best')
1255
1256     def build_format_selector(self, format_spec):
1257         def syntax_error(note, start):
1258             message = (
1259                 'Invalid format specification: '
1260                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1261             return SyntaxError(message)
1262
1263         PICKFIRST = 'PICKFIRST'
1264         MERGE = 'MERGE'
1265         SINGLE = 'SINGLE'
1266         GROUP = 'GROUP'
1267         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1268
1269         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1270                                   'video': self.params.get('allow_multiple_video_streams', False)}
1271
1272         def _parse_filter(tokens):
1273             filter_parts = []
1274             for type, string, start, _, _ in tokens:
1275                 if type == tokenize.OP and string == ']':
1276                     return ''.join(filter_parts)
1277                 else:
1278                     filter_parts.append(string)
1279
1280         def _remove_unused_ops(tokens):
1281             # Remove operators that we don't use and join them with the surrounding strings
1282             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1283             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1284             last_string, last_start, last_end, last_line = None, None, None, None
1285             for type, string, start, end, line in tokens:
1286                 if type == tokenize.OP and string == '[':
1287                     if last_string:
1288                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1289                         last_string = None
1290                     yield type, string, start, end, line
1291                     # everything inside brackets will be handled by _parse_filter
1292                     for type, string, start, end, line in tokens:
1293                         yield type, string, start, end, line
1294                         if type == tokenize.OP and string == ']':
1295                             break
1296                 elif type == tokenize.OP and string in ALLOWED_OPS:
1297                     if last_string:
1298                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1299                         last_string = None
1300                     yield type, string, start, end, line
1301                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1302                     if not last_string:
1303                         last_string = string
1304                         last_start = start
1305                         last_end = end
1306                     else:
1307                         last_string += string
1308             if last_string:
1309                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1310
1311         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1312             selectors = []
1313             current_selector = None
1314             for type, string, start, _, _ in tokens:
1315                 # ENCODING is only defined in python 3.x
1316                 if type == getattr(tokenize, 'ENCODING', None):
1317                     continue
1318                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1319                     current_selector = FormatSelector(SINGLE, string, [])
1320                 elif type == tokenize.OP:
1321                     if string == ')':
1322                         if not inside_group:
1323                             # ')' will be handled by the parentheses group
1324                             tokens.restore_last_token()
1325                         break
1326                     elif inside_merge and string in ['/', ',']:
1327                         tokens.restore_last_token()
1328                         break
1329                     elif inside_choice and string == ',':
1330                         tokens.restore_last_token()
1331                         break
1332                     elif string == ',':
1333                         if not current_selector:
1334                             raise syntax_error('"," must follow a format selector', start)
1335                         selectors.append(current_selector)
1336                         current_selector = None
1337                     elif string == '/':
1338                         if not current_selector:
1339                             raise syntax_error('"/" must follow a format selector', start)
1340                         first_choice = current_selector
1341                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1342                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1343                     elif string == '[':
1344                         if not current_selector:
1345                             current_selector = FormatSelector(SINGLE, 'best', [])
1346                         format_filter = _parse_filter(tokens)
1347                         current_selector.filters.append(format_filter)
1348                     elif string == '(':
1349                         if current_selector:
1350                             raise syntax_error('Unexpected "("', start)
1351                         group = _parse_format_selection(tokens, inside_group=True)
1352                         current_selector = FormatSelector(GROUP, group, [])
1353                     elif string == '+':
1354                         if not current_selector:
1355                             raise syntax_error('Unexpected "+"', start)
1356                         selector_1 = current_selector
1357                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1358                         if not selector_2:
1359                             raise syntax_error('Expected a selector', start)
1360                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1361                     else:
1362                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1363                 elif type == tokenize.ENDMARKER:
1364                     break
1365             if current_selector:
1366                 selectors.append(current_selector)
1367             return selectors
1368
1369         def _build_selector_function(selector):
1370             if isinstance(selector, list):  # ,
1371                 fs = [_build_selector_function(s) for s in selector]
1372
1373                 def selector_function(ctx):
1374                     for f in fs:
1375                         for format in f(ctx):
1376                             yield format
1377                 return selector_function
1378
1379             elif selector.type == GROUP:  # ()
1380                 selector_function = _build_selector_function(selector.selector)
1381
1382             elif selector.type == PICKFIRST:  # /
1383                 fs = [_build_selector_function(s) for s in selector.selector]
1384
1385                 def selector_function(ctx):
1386                     for f in fs:
1387                         picked_formats = list(f(ctx))
1388                         if picked_formats:
1389                             return picked_formats
1390                     return []
1391
1392             elif selector.type == SINGLE:  # atom
1393                 format_spec = selector.selector if selector.selector is not None else 'best'
1394
1395                 if format_spec == 'all':
1396                     def selector_function(ctx):
1397                         formats = list(ctx['formats'])
1398                         if formats:
1399                             for f in formats:
1400                                 yield f
1401
1402                 else:
1403                     format_fallback = False
1404                     format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1405                     if format_spec_obj is not None:
1406                         format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1407                         format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1408                         not_format_type = 'v' if format_type == 'a' else 'a'
1409                         format_modified = format_spec_obj.group(3) is not None
1410
1411                         format_fallback = not format_type and not format_modified  # for b, w
1412                         filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1413                                     if format_type and format_modified  # bv*, ba*, wv*, wa*
1414                                     else (lambda f: f.get(not_format_type + 'codec') == 'none')
1415                                     if format_type  # bv, ba, wv, wa
1416                                     else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1417                                     if not format_modified  # b, w
1418                                     else None)  # b*, w*
1419                     else:
1420                         format_idx = -1
1421                         filter_f = ((lambda f: f.get('ext') == format_spec)
1422                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1423                                     else (lambda f: f.get('format_id') == format_spec))  # id
1424
1425                     def selector_function(ctx):
1426                         formats = list(ctx['formats'])
1427                         if not formats:
1428                             return
1429                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1430                         if matches:
1431                             yield matches[format_idx]
1432                         elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1433                             # for extractors with incomplete formats (audio only (soundcloud)
1434                             # or video only (imgur)) best/worst will fallback to
1435                             # best/worst {video,audio}-only format
1436                             yield formats[format_idx]
1437
1438             elif selector.type == MERGE:        # +
1439                 def _merge(formats_pair):
1440                     format_1, format_2 = formats_pair
1441
1442                     formats_info = []
1443                     formats_info.extend(format_1.get('requested_formats', (format_1,)))
1444                     formats_info.extend(format_2.get('requested_formats', (format_2,)))
1445
1446                     if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1447                         get_no_more = {"video": False, "audio": False}
1448                         for (i, fmt_info) in enumerate(formats_info):
1449                             for aud_vid in ["audio", "video"]:
1450                                 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1451                                     if get_no_more[aud_vid]:
1452                                         formats_info.pop(i)
1453                                     get_no_more[aud_vid] = True
1454
1455                     if len(formats_info) == 1:
1456                         return formats_info[0]
1457
1458                     video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1459                     audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1460
1461                     the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1462                     the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1463
1464                     output_ext = self.params.get('merge_output_format')
1465                     if not output_ext:
1466                         if the_only_video:
1467                             output_ext = the_only_video['ext']
1468                         elif the_only_audio and not video_fmts:
1469                             output_ext = the_only_audio['ext']
1470                         else:
1471                             output_ext = 'mkv'
1472
1473                     new_dict = {
1474                         'requested_formats': formats_info,
1475                         'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1476                         'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1477                         'ext': output_ext,
1478                     }
1479
1480                     if the_only_video:
1481                         new_dict.update({
1482                             'width': the_only_video.get('width'),
1483                             'height': the_only_video.get('height'),
1484                             'resolution': the_only_video.get('resolution'),
1485                             'fps': the_only_video.get('fps'),
1486                             'vcodec': the_only_video.get('vcodec'),
1487                             'vbr': the_only_video.get('vbr'),
1488                             'stretched_ratio': the_only_video.get('stretched_ratio'),
1489                         })
1490
1491                     if the_only_audio:
1492                         new_dict.update({
1493                             'acodec': the_only_audio.get('acodec'),
1494                             'abr': the_only_audio.get('abr'),
1495                         })
1496
1497                     return new_dict
1498
1499                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1500
1501                 def selector_function(ctx):
1502                     for pair in itertools.product(
1503                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1504                         yield _merge(pair)
1505
1506             filters = [self._build_format_filter(f) for f in selector.filters]
1507
1508             def final_selector(ctx):
1509                 ctx_copy = copy.deepcopy(ctx)
1510                 for _filter in filters:
1511                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1512                 return selector_function(ctx_copy)
1513             return final_selector
1514
1515         stream = io.BytesIO(format_spec.encode('utf-8'))
1516         try:
1517             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1518         except tokenize.TokenError:
1519             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1520
1521         class TokenIterator(object):
1522             def __init__(self, tokens):
1523                 self.tokens = tokens
1524                 self.counter = 0
1525
1526             def __iter__(self):
1527                 return self
1528
1529             def __next__(self):
1530                 if self.counter >= len(self.tokens):
1531                     raise StopIteration()
1532                 value = self.tokens[self.counter]
1533                 self.counter += 1
1534                 return value
1535
1536             next = __next__
1537
1538             def restore_last_token(self):
1539                 self.counter -= 1
1540
1541         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1542         return _build_selector_function(parsed_selector)
1543
1544     def _calc_headers(self, info_dict):
1545         res = std_headers.copy()
1546
1547         add_headers = info_dict.get('http_headers')
1548         if add_headers:
1549             res.update(add_headers)
1550
1551         cookies = self._calc_cookies(info_dict)
1552         if cookies:
1553             res['Cookie'] = cookies
1554
1555         if 'X-Forwarded-For' not in res:
1556             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1557             if x_forwarded_for_ip:
1558                 res['X-Forwarded-For'] = x_forwarded_for_ip
1559
1560         return res
1561
1562     def _calc_cookies(self, info_dict):
1563         pr = sanitized_Request(info_dict['url'])
1564         self.cookiejar.add_cookie_header(pr)
1565         return pr.get_header('Cookie')
1566
1567     def process_video_result(self, info_dict, download=True):
1568         assert info_dict.get('_type', 'video') == 'video'
1569
1570         if 'id' not in info_dict:
1571             raise ExtractorError('Missing "id" field in extractor result')
1572         if 'title' not in info_dict:
1573             raise ExtractorError('Missing "title" field in extractor result')
1574
1575         def report_force_conversion(field, field_not, conversion):
1576             self.report_warning(
1577                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1578                 % (field, field_not, conversion))
1579
1580         def sanitize_string_field(info, string_field):
1581             field = info.get(string_field)
1582             if field is None or isinstance(field, compat_str):
1583                 return
1584             report_force_conversion(string_field, 'a string', 'string')
1585             info[string_field] = compat_str(field)
1586
1587         def sanitize_numeric_fields(info):
1588             for numeric_field in self._NUMERIC_FIELDS:
1589                 field = info.get(numeric_field)
1590                 if field is None or isinstance(field, compat_numeric_types):
1591                     continue
1592                 report_force_conversion(numeric_field, 'numeric', 'int')
1593                 info[numeric_field] = int_or_none(field)
1594
1595         sanitize_string_field(info_dict, 'id')
1596         sanitize_numeric_fields(info_dict)
1597
1598         if 'playlist' not in info_dict:
1599             # It isn't part of a playlist
1600             info_dict['playlist'] = None
1601             info_dict['playlist_index'] = None
1602
1603         thumbnails = info_dict.get('thumbnails')
1604         if thumbnails is None:
1605             thumbnail = info_dict.get('thumbnail')
1606             if thumbnail:
1607                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1608         if thumbnails:
1609             thumbnails.sort(key=lambda t: (
1610                 t.get('preference') if t.get('preference') is not None else -1,
1611                 t.get('width') if t.get('width') is not None else -1,
1612                 t.get('height') if t.get('height') is not None else -1,
1613                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1614             for i, t in enumerate(thumbnails):
1615                 t['url'] = sanitize_url(t['url'])
1616                 if t.get('width') and t.get('height'):
1617                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1618                 if t.get('id') is None:
1619                     t['id'] = '%d' % i
1620
1621         if self.params.get('list_thumbnails'):
1622             self.list_thumbnails(info_dict)
1623             return
1624
1625         thumbnail = info_dict.get('thumbnail')
1626         if thumbnail:
1627             info_dict['thumbnail'] = sanitize_url(thumbnail)
1628         elif thumbnails:
1629             info_dict['thumbnail'] = thumbnails[-1]['url']
1630
1631         if 'display_id' not in info_dict and 'id' in info_dict:
1632             info_dict['display_id'] = info_dict['id']
1633
1634         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1635             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1636             # see http://bugs.python.org/issue1646728)
1637             try:
1638                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1639                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1640             except (ValueError, OverflowError, OSError):
1641                 pass
1642
1643         # Auto generate title fields corresponding to the *_number fields when missing
1644         # in order to always have clean titles. This is very common for TV series.
1645         for field in ('chapter', 'season', 'episode'):
1646             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1647                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1648
1649         for cc_kind in ('subtitles', 'automatic_captions'):
1650             cc = info_dict.get(cc_kind)
1651             if cc:
1652                 for _, subtitle in cc.items():
1653                     for subtitle_format in subtitle:
1654                         if subtitle_format.get('url'):
1655                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1656                         if subtitle_format.get('ext') is None:
1657                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1658
1659         automatic_captions = info_dict.get('automatic_captions')
1660         subtitles = info_dict.get('subtitles')
1661
1662         if self.params.get('listsubtitles', False):
1663             if 'automatic_captions' in info_dict:
1664                 self.list_subtitles(
1665                     info_dict['id'], automatic_captions, 'automatic captions')
1666             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1667             return
1668
1669         info_dict['requested_subtitles'] = self.process_subtitles(
1670             info_dict['id'], subtitles, automatic_captions)
1671
1672         # We now pick which formats have to be downloaded
1673         if info_dict.get('formats') is None:
1674             # There's only one format available
1675             formats = [info_dict]
1676         else:
1677             formats = info_dict['formats']
1678
1679         if not formats:
1680             raise ExtractorError('No video formats found!')
1681
1682         def is_wellformed(f):
1683             url = f.get('url')
1684             if not url:
1685                 self.report_warning(
1686                     '"url" field is missing or empty - skipping format, '
1687                     'there is an error in extractor')
1688                 return False
1689             if isinstance(url, bytes):
1690                 sanitize_string_field(f, 'url')
1691             return True
1692
1693         # Filter out malformed formats for better extraction robustness
1694         formats = list(filter(is_wellformed, formats))
1695
1696         formats_dict = {}
1697
1698         # We check that all the formats have the format and format_id fields
1699         for i, format in enumerate(formats):
1700             sanitize_string_field(format, 'format_id')
1701             sanitize_numeric_fields(format)
1702             format['url'] = sanitize_url(format['url'])
1703             if not format.get('format_id'):
1704                 format['format_id'] = compat_str(i)
1705             else:
1706                 # Sanitize format_id from characters used in format selector expression
1707                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1708             format_id = format['format_id']
1709             if format_id not in formats_dict:
1710                 formats_dict[format_id] = []
1711             formats_dict[format_id].append(format)
1712
1713         # Make sure all formats have unique format_id
1714         for format_id, ambiguous_formats in formats_dict.items():
1715             if len(ambiguous_formats) > 1:
1716                 for i, format in enumerate(ambiguous_formats):
1717                     format['format_id'] = '%s-%d' % (format_id, i)
1718
1719         for i, format in enumerate(formats):
1720             if format.get('format') is None:
1721                 format['format'] = '{id} - {res}{note}'.format(
1722                     id=format['format_id'],
1723                     res=self.format_resolution(format),
1724                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1725                 )
1726             # Automatically determine file extension if missing
1727             if format.get('ext') is None:
1728                 format['ext'] = determine_ext(format['url']).lower()
1729             # Automatically determine protocol if missing (useful for format
1730             # selection purposes)
1731             if format.get('protocol') is None:
1732                 format['protocol'] = determine_protocol(format)
1733             # Add HTTP headers, so that external programs can use them from the
1734             # json output
1735             full_format_info = info_dict.copy()
1736             full_format_info.update(format)
1737             format['http_headers'] = self._calc_headers(full_format_info)
1738         # Remove private housekeeping stuff
1739         if '__x_forwarded_for_ip' in info_dict:
1740             del info_dict['__x_forwarded_for_ip']
1741
1742         # TODO Central sorting goes here
1743
1744         if formats[0] is not info_dict:
1745             # only set the 'formats' fields if the original info_dict list them
1746             # otherwise we end up with a circular reference, the first (and unique)
1747             # element in the 'formats' field in info_dict is info_dict itself,
1748             # which can't be exported to json
1749             info_dict['formats'] = formats
1750         if self.params.get('listformats'):
1751             self.list_formats(info_dict)
1752             return
1753
1754         req_format = self.params.get('format')
1755         if req_format is None:
1756             req_format = self._default_format_spec(info_dict, download=download)
1757             if self.params.get('verbose'):
1758                 self._write_string('[debug] Default format spec: %s\n' % req_format)
1759
1760         format_selector = self.build_format_selector(req_format)
1761
1762         # While in format selection we may need to have an access to the original
1763         # format set in order to calculate some metrics or do some processing.
1764         # For now we need to be able to guess whether original formats provided
1765         # by extractor are incomplete or not (i.e. whether extractor provides only
1766         # video-only or audio-only formats) for proper formats selection for
1767         # extractors with such incomplete formats (see
1768         # https://github.com/ytdl-org/youtube-dl/pull/5556).
1769         # Since formats may be filtered during format selection and may not match
1770         # the original formats the results may be incorrect. Thus original formats
1771         # or pre-calculated metrics should be passed to format selection routines
1772         # as well.
1773         # We will pass a context object containing all necessary additional data
1774         # instead of just formats.
1775         # This fixes incorrect format selection issue (see
1776         # https://github.com/ytdl-org/youtube-dl/issues/10083).
1777         incomplete_formats = (
1778             # All formats are video-only or
1779             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1780             # all formats are audio-only
1781             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1782
1783         ctx = {
1784             'formats': formats,
1785             'incomplete_formats': incomplete_formats,
1786         }
1787
1788         formats_to_download = list(format_selector(ctx))
1789         if not formats_to_download:
1790             raise ExtractorError('requested format not available',
1791                                  expected=True)
1792
1793         if download:
1794             self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
1795             if len(formats_to_download) > 1:
1796                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1797             for format in formats_to_download:
1798                 new_info = dict(info_dict)
1799                 new_info.update(format)
1800                 self.process_info(new_info)
1801         # We update the info dict with the best quality format (backwards compatibility)
1802         info_dict.update(formats_to_download[-1])
1803         return info_dict
1804
1805     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1806         """Select the requested subtitles and their format"""
1807         available_subs = {}
1808         if normal_subtitles and self.params.get('writesubtitles'):
1809             available_subs.update(normal_subtitles)
1810         if automatic_captions and self.params.get('writeautomaticsub'):
1811             for lang, cap_info in automatic_captions.items():
1812                 if lang not in available_subs:
1813                     available_subs[lang] = cap_info
1814
1815         if (not self.params.get('writesubtitles') and not
1816                 self.params.get('writeautomaticsub') or not
1817                 available_subs):
1818             return None
1819
1820         if self.params.get('allsubtitles', False):
1821             requested_langs = available_subs.keys()
1822         else:
1823             if self.params.get('subtitleslangs', False):
1824                 requested_langs = self.params.get('subtitleslangs')
1825             elif 'en' in available_subs:
1826                 requested_langs = ['en']
1827             else:
1828                 requested_langs = [list(available_subs.keys())[0]]
1829
1830         formats_query = self.params.get('subtitlesformat', 'best')
1831         formats_preference = formats_query.split('/') if formats_query else []
1832         subs = {}
1833         for lang in requested_langs:
1834             formats = available_subs.get(lang)
1835             if formats is None:
1836                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1837                 continue
1838             for ext in formats_preference:
1839                 if ext == 'best':
1840                     f = formats[-1]
1841                     break
1842                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1843                 if matches:
1844                     f = matches[-1]
1845                     break
1846             else:
1847                 f = formats[-1]
1848                 self.report_warning(
1849                     'No subtitle format found matching "%s" for language %s, '
1850                     'using %s' % (formats_query, lang, f['ext']))
1851             subs[lang] = f
1852         return subs
1853
1854     def __forced_printings(self, info_dict, filename, incomplete):
1855         def print_mandatory(field):
1856             if (self.params.get('force%s' % field, False)
1857                     and (not incomplete or info_dict.get(field) is not None)):
1858                 self.to_stdout(info_dict[field])
1859
1860         def print_optional(field):
1861             if (self.params.get('force%s' % field, False)
1862                     and info_dict.get(field) is not None):
1863                 self.to_stdout(info_dict[field])
1864
1865         print_mandatory('title')
1866         print_mandatory('id')
1867         if self.params.get('forceurl', False) and not incomplete:
1868             if info_dict.get('requested_formats') is not None:
1869                 for f in info_dict['requested_formats']:
1870                     self.to_stdout(f['url'] + f.get('play_path', ''))
1871             else:
1872                 # For RTMP URLs, also include the playpath
1873                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1874         print_optional('thumbnail')
1875         print_optional('description')
1876         if self.params.get('forcefilename', False) and filename is not None:
1877             self.to_stdout(filename)
1878         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1879             self.to_stdout(formatSeconds(info_dict['duration']))
1880         print_mandatory('format')
1881         if self.params.get('forcejson', False):
1882             self.to_stdout(json.dumps(info_dict))
1883
1884     def process_info(self, info_dict):
1885         """Process a single resolved IE result."""
1886
1887         assert info_dict.get('_type', 'video') == 'video'
1888
1889         max_downloads = self.params.get('max_downloads')
1890         if max_downloads is not None:
1891             if self._num_downloads >= int(max_downloads):
1892                 raise MaxDownloadsReached()
1893
1894         # TODO: backward compatibility, to be removed
1895         info_dict['fulltitle'] = info_dict['title']
1896
1897         if 'format' not in info_dict:
1898             info_dict['format'] = info_dict['ext']
1899
1900         if self._match_entry(info_dict, incomplete=False) is not None:
1901             return
1902
1903         self._num_downloads += 1
1904
1905         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1906
1907         # Forced printings
1908         self.__forced_printings(info_dict, filename, incomplete=False)
1909
1910         if self.params.get('simulate', False):
1911             if self.params.get('force_write_download_archive', False):
1912                 self.record_download_archive(info_dict)
1913
1914             # Do nothing else if in simulate mode
1915             return
1916
1917         if filename is None:
1918             return
1919
1920         def ensure_dir_exists(path):
1921             try:
1922                 dn = os.path.dirname(path)
1923                 if dn and not os.path.exists(dn):
1924                     os.makedirs(dn)
1925                 return True
1926             except (OSError, IOError) as err:
1927                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1928                 return False
1929
1930         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1931             return
1932
1933         if self.params.get('writedescription', False):
1934             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1935             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1936                 self.to_screen('[info] Video description is already present')
1937             elif info_dict.get('description') is None:
1938                 self.report_warning('There\'s no description to write.')
1939             else:
1940                 try:
1941                     self.to_screen('[info] Writing video description to: ' + descfn)
1942                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1943                         descfile.write(info_dict['description'])
1944                 except (OSError, IOError):
1945                     self.report_error('Cannot write description file ' + descfn)
1946                     return
1947
1948         if self.params.get('writeannotations', False):
1949             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1950             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
1951                 self.to_screen('[info] Video annotations are already present')
1952             elif not info_dict.get('annotations'):
1953                 self.report_warning('There are no annotations to write.')
1954             else:
1955                 try:
1956                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1957                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1958                         annofile.write(info_dict['annotations'])
1959                 except (KeyError, TypeError):
1960                     self.report_warning('There are no annotations to write.')
1961                 except (OSError, IOError):
1962                     self.report_error('Cannot write annotations file: ' + annofn)
1963                     return
1964
1965         def dl(name, info, subtitle=False):
1966             fd = get_suitable_downloader(info, self.params)(self, self.params)
1967             for ph in self._progress_hooks:
1968                 fd.add_progress_hook(ph)
1969             if self.params.get('verbose'):
1970                 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
1971             return fd.download(name, info, subtitle)
1972
1973         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1974                                        self.params.get('writeautomaticsub')])
1975
1976         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1977             # subtitles download errors are already managed as troubles in relevant IE
1978             # that way it will silently go on when used with unsupporting IE
1979             subtitles = info_dict['requested_subtitles']
1980             # ie = self.get_info_extractor(info_dict['extractor_key'])
1981             for sub_lang, sub_info in subtitles.items():
1982                 sub_format = sub_info['ext']
1983                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1984                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
1985                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1986                 else:
1987                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1988                     if sub_info.get('data') is not None:
1989                         try:
1990                             # Use newline='' to prevent conversion of newline characters
1991                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
1992                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1993                                 subfile.write(sub_info['data'])
1994                         except (OSError, IOError):
1995                             self.report_error('Cannot write subtitles file ' + sub_filename)
1996                             return
1997                     else:
1998                         try:
1999                             dl(sub_filename, sub_info, subtitle=True)
2000                             '''
2001                             if self.params.get('sleep_interval_subtitles', False):
2002                                 dl(sub_filename, sub_info)
2003                             else:
2004                                 sub_data = ie._request_webpage(
2005                                     sub_info['url'], info_dict['id'], note=False).read()
2006                                 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2007                                     subfile.write(sub_data)
2008                             '''
2009                         except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2010                             self.report_warning('Unable to download subtitle for "%s": %s' %
2011                                                 (sub_lang, error_to_compat_str(err)))
2012                             continue
2013
2014         if self.params.get('skip_download', False):
2015             if self.params.get('convertsubtitles', False):
2016                 subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
2017                 filename_real_ext = os.path.splitext(filename)[1][1:]
2018                 filename_wo_ext = (
2019                     os.path.splitext(filename)[0]
2020                     if filename_real_ext == info_dict['ext']
2021                     else filename)
2022                 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
2023                 if subconv.available:
2024                     info_dict.setdefault('__postprocessors', [])
2025                     # info_dict['__postprocessors'].append(subconv)
2026                 if os.path.exists(encodeFilename(afilename)):
2027                     self.to_screen(
2028                         '[download] %s has already been downloaded and '
2029                         'converted' % afilename)
2030                 else:
2031                     try:
2032                         self.post_process(filename, info_dict)
2033                     except (PostProcessingError) as err:
2034                         self.report_error('postprocessing: %s' % str(err))
2035                         return
2036
2037         if self.params.get('writeinfojson', False):
2038             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
2039             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2040                 self.to_screen('[info] Video description metadata is already present')
2041             else:
2042                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
2043                 try:
2044                     write_json_file(self.filter_requested_info(info_dict), infofn)
2045                 except (OSError, IOError):
2046                     self.report_error('Cannot write metadata to JSON file ' + infofn)
2047                     return
2048
2049         self._write_thumbnails(info_dict, filename)
2050
2051         # Write internet shortcut files
2052         url_link = webloc_link = desktop_link = False
2053         if self.params.get('writelink', False):
2054             if sys.platform == "darwin":  # macOS.
2055                 webloc_link = True
2056             elif sys.platform.startswith("linux"):
2057                 desktop_link = True
2058             else:  # if sys.platform in ['win32', 'cygwin']:
2059                 url_link = True
2060         if self.params.get('writeurllink', False):
2061             url_link = True
2062         if self.params.get('writewebloclink', False):
2063             webloc_link = True
2064         if self.params.get('writedesktoplink', False):
2065             desktop_link = True
2066
2067         if url_link or webloc_link or desktop_link:
2068             if 'webpage_url' not in info_dict:
2069                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2070                 return
2071             ascii_url = iri_to_uri(info_dict['webpage_url'])
2072
2073         def _write_link_file(extension, template, newline, embed_filename):
2074             linkfn = replace_extension(filename, extension, info_dict.get('ext'))
2075             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
2076                 self.to_screen('[info] Internet shortcut is already present')
2077             else:
2078                 try:
2079                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2080                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2081                         template_vars = {'url': ascii_url}
2082                         if embed_filename:
2083                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2084                         linkfile.write(template % template_vars)
2085                 except (OSError, IOError):
2086                     self.report_error('Cannot write internet shortcut ' + linkfn)
2087                     return False
2088             return True
2089
2090         if url_link:
2091             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2092                 return
2093         if webloc_link:
2094             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2095                 return
2096         if desktop_link:
2097             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2098                 return
2099
2100         # Download
2101         must_record_download_archive = False
2102         if not self.params.get('skip_download', False):
2103             try:
2104                 if info_dict.get('requested_formats') is not None:
2105                     downloaded = []
2106                     success = True
2107                     merger = FFmpegMergerPP(self)
2108                     if not merger.available:
2109                         postprocessors = []
2110                         self.report_warning('You have requested multiple '
2111                                             'formats but ffmpeg or avconv are not installed.'
2112                                             ' The formats won\'t be merged.')
2113                     else:
2114                         postprocessors = [merger]
2115
2116                     def compatible_formats(formats):
2117                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2118                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2119                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2120                         if len(video_formats) > 2 or len(audio_formats) > 2:
2121                             return False
2122
2123                         # Check extension
2124                         exts = set(format.get('ext') for format in formats)
2125                         COMPATIBLE_EXTS = (
2126                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2127                             set(('webm',)),
2128                         )
2129                         for ext_sets in COMPATIBLE_EXTS:
2130                             if ext_sets.issuperset(exts):
2131                                 return True
2132                         # TODO: Check acodec/vcodec
2133                         return False
2134
2135                     filename_real_ext = os.path.splitext(filename)[1][1:]
2136                     filename_wo_ext = (
2137                         os.path.splitext(filename)[0]
2138                         if filename_real_ext == info_dict['ext']
2139                         else filename)
2140                     requested_formats = info_dict['requested_formats']
2141                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2142                         info_dict['ext'] = 'mkv'
2143                         self.report_warning(
2144                             'Requested formats are incompatible for merge and will be merged into mkv.')
2145                     # Ensure filename always has a correct extension for successful merge
2146                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
2147                     file_exists = os.path.exists(encodeFilename(filename))
2148                     if not self.params.get('overwrites', False) and file_exists:
2149                         self.to_screen(
2150                             '[download] %s has already been downloaded and '
2151                             'merged' % filename)
2152                     else:
2153                         if file_exists:
2154                             self.report_file_delete(filename)
2155                             os.remove(encodeFilename(filename))
2156                         for f in requested_formats:
2157                             new_info = dict(info_dict)
2158                             new_info.update(f)
2159                             fname = prepend_extension(
2160                                 self.prepare_filename(new_info),
2161                                 'f%s' % f['format_id'], new_info['ext'])
2162                             if not ensure_dir_exists(fname):
2163                                 return
2164                             downloaded.append(fname)
2165                             partial_success, real_download = dl(fname, new_info)
2166                             success = success and partial_success
2167                         info_dict['__postprocessors'] = postprocessors
2168                         info_dict['__files_to_merge'] = downloaded
2169                         # Even if there were no downloads, it is being merged only now
2170                         info_dict['__real_download'] = True
2171                 else:
2172                     # Delete existing file with --yes-overwrites
2173                     if self.params.get('overwrites', False):
2174                         if os.path.exists(encodeFilename(filename)):
2175                             self.report_file_delete(filename)
2176                             os.remove(encodeFilename(filename))
2177                     # Just a single file
2178                     success, real_download = dl(filename, info_dict)
2179                     info_dict['__real_download'] = real_download
2180             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2181                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2182                 return
2183             except (OSError, IOError) as err:
2184                 raise UnavailableVideoError(err)
2185             except (ContentTooShortError, ) as err:
2186                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2187                 return
2188
2189             if success and filename != '-':
2190                 # Fixup content
2191                 fixup_policy = self.params.get('fixup')
2192                 if fixup_policy is None:
2193                     fixup_policy = 'detect_or_warn'
2194
2195                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2196
2197                 stretched_ratio = info_dict.get('stretched_ratio')
2198                 if stretched_ratio is not None and stretched_ratio != 1:
2199                     if fixup_policy == 'warn':
2200                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2201                             info_dict['id'], stretched_ratio))
2202                     elif fixup_policy == 'detect_or_warn':
2203                         stretched_pp = FFmpegFixupStretchedPP(self)
2204                         if stretched_pp.available:
2205                             info_dict.setdefault('__postprocessors', [])
2206                             info_dict['__postprocessors'].append(stretched_pp)
2207                         else:
2208                             self.report_warning(
2209                                 '%s: Non-uniform pixel ratio (%s). %s'
2210                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2211                     else:
2212                         assert fixup_policy in ('ignore', 'never')
2213
2214                 if (info_dict.get('requested_formats') is None
2215                         and info_dict.get('container') == 'm4a_dash'):
2216                     if fixup_policy == 'warn':
2217                         self.report_warning(
2218                             '%s: writing DASH m4a. '
2219                             'Only some players support this container.'
2220                             % info_dict['id'])
2221                     elif fixup_policy == 'detect_or_warn':
2222                         fixup_pp = FFmpegFixupM4aPP(self)
2223                         if fixup_pp.available:
2224                             info_dict.setdefault('__postprocessors', [])
2225                             info_dict['__postprocessors'].append(fixup_pp)
2226                         else:
2227                             self.report_warning(
2228                                 '%s: writing DASH m4a. '
2229                                 'Only some players support this container. %s'
2230                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2231                     else:
2232                         assert fixup_policy in ('ignore', 'never')
2233
2234                 if (info_dict.get('protocol') == 'm3u8_native'
2235                         or info_dict.get('protocol') == 'm3u8'
2236                         and self.params.get('hls_prefer_native')):
2237                     if fixup_policy == 'warn':
2238                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2239                             info_dict['id']))
2240                     elif fixup_policy == 'detect_or_warn':
2241                         fixup_pp = FFmpegFixupM3u8PP(self)
2242                         if fixup_pp.available:
2243                             info_dict.setdefault('__postprocessors', [])
2244                             info_dict['__postprocessors'].append(fixup_pp)
2245                         else:
2246                             self.report_warning(
2247                                 '%s: malformed AAC bitstream detected. %s'
2248                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2249                     else:
2250                         assert fixup_policy in ('ignore', 'never')
2251
2252                 try:
2253                     self.post_process(filename, info_dict)
2254                 except (PostProcessingError) as err:
2255                     self.report_error('postprocessing: %s' % str(err))
2256                     return
2257                 try:
2258                     for ph in self._post_hooks:
2259                         ph(filename)
2260                 except Exception as err:
2261                     self.report_error('post hooks: %s' % str(err))
2262                     return
2263                 must_record_download_archive = True
2264
2265         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2266             self.record_download_archive(info_dict)
2267         max_downloads = self.params.get('max_downloads')
2268         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2269             raise MaxDownloadsReached()
2270
2271     def download(self, url_list):
2272         """Download a given list of URLs."""
2273         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2274         if (len(url_list) > 1
2275                 and outtmpl != '-'
2276                 and '%' not in outtmpl
2277                 and self.params.get('max_downloads') != 1):
2278             raise SameFileError(outtmpl)
2279
2280         for url in url_list:
2281             try:
2282                 # It also downloads the videos
2283                 res = self.extract_info(
2284                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2285             except UnavailableVideoError:
2286                 self.report_error('unable to download video')
2287             except MaxDownloadsReached:
2288                 self.to_screen('[info] Maximum number of downloaded files reached')
2289                 raise
2290             except ExistingVideoReached:
2291                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2292                 raise
2293             except RejectedVideoReached:
2294                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2295                 raise
2296             else:
2297                 if self.params.get('dump_single_json', False):
2298                     self.to_stdout(json.dumps(res))
2299
2300         return self._download_retcode
2301
2302     def download_with_info_file(self, info_filename):
2303         with contextlib.closing(fileinput.FileInput(
2304                 [info_filename], mode='r',
2305                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2306             # FileInput doesn't have a read method, we can't call json.load
2307             info = self.filter_requested_info(json.loads('\n'.join(f)))
2308         try:
2309             self.process_ie_result(info, download=True)
2310         except DownloadError:
2311             webpage_url = info.get('webpage_url')
2312             if webpage_url is not None:
2313                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2314                 return self.download([webpage_url])
2315             else:
2316                 raise
2317         return self._download_retcode
2318
2319     @staticmethod
2320     def filter_requested_info(info_dict):
2321         return dict(
2322             (k, v) for k, v in info_dict.items()
2323             if k not in ['requested_formats', 'requested_subtitles'])
2324
2325     def post_process(self, filename, ie_info):
2326         """Run all the postprocessors on the given file."""
2327         info = dict(ie_info)
2328         info['filepath'] = filename
2329         pps_chain = []
2330         if ie_info.get('__postprocessors') is not None:
2331             pps_chain.extend(ie_info['__postprocessors'])
2332         pps_chain.extend(self._pps)
2333         for pp in pps_chain:
2334             files_to_delete = []
2335             try:
2336                 files_to_delete, info = pp.run(info)
2337             except PostProcessingError as e:
2338                 self.report_error(e.msg)
2339             if files_to_delete and not self.params.get('keepvideo', False):
2340                 for old_filename in set(files_to_delete):
2341                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2342                     try:
2343                         os.remove(encodeFilename(old_filename))
2344                     except (IOError, OSError):
2345                         self.report_warning('Unable to remove downloaded original file')
2346
2347     def _make_archive_id(self, info_dict):
2348         video_id = info_dict.get('id')
2349         if not video_id:
2350             return
2351         # Future-proof against any change in case
2352         # and backwards compatibility with prior versions
2353         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2354         if extractor is None:
2355             url = str_or_none(info_dict.get('url'))
2356             if not url:
2357                 return
2358             # Try to find matching extractor for the URL and take its ie_key
2359             for ie in self._ies:
2360                 if ie.suitable(url):
2361                     extractor = ie.ie_key()
2362                     break
2363             else:
2364                 return
2365         return extractor.lower() + ' ' + video_id
2366
2367     def in_download_archive(self, info_dict):
2368         fn = self.params.get('download_archive')
2369         if fn is None:
2370             return False
2371
2372         vid_id = self._make_archive_id(info_dict)
2373         if not vid_id:
2374             return False  # Incomplete video information
2375
2376         return vid_id in self.archive
2377
2378     def record_download_archive(self, info_dict):
2379         fn = self.params.get('download_archive')
2380         if fn is None:
2381             return
2382         vid_id = self._make_archive_id(info_dict)
2383         assert vid_id
2384         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2385             archive_file.write(vid_id + '\n')
2386         self.archive.add(vid_id)
2387
2388     @staticmethod
2389     def format_resolution(format, default='unknown'):
2390         if format.get('vcodec') == 'none':
2391             return 'audio only'
2392         if format.get('resolution') is not None:
2393             return format['resolution']
2394         if format.get('height') is not None:
2395             if format.get('width') is not None:
2396                 res = '%sx%s' % (format['width'], format['height'])
2397             else:
2398                 res = '%sp' % format['height']
2399         elif format.get('width') is not None:
2400             res = '%dx?' % format['width']
2401         else:
2402             res = default
2403         return res
2404
2405     def _format_note(self, fdict):
2406         res = ''
2407         if fdict.get('ext') in ['f4f', 'f4m']:
2408             res += '(unsupported) '
2409         if fdict.get('language'):
2410             if res:
2411                 res += ' '
2412             res += '[%s] ' % fdict['language']
2413         if fdict.get('format_note') is not None:
2414             res += fdict['format_note'] + ' '
2415         if fdict.get('tbr') is not None:
2416             res += '%4dk ' % fdict['tbr']
2417         if fdict.get('container') is not None:
2418             if res:
2419                 res += ', '
2420             res += '%s container' % fdict['container']
2421         if (fdict.get('vcodec') is not None
2422                 and fdict.get('vcodec') != 'none'):
2423             if res:
2424                 res += ', '
2425             res += fdict['vcodec']
2426             if fdict.get('vbr') is not None:
2427                 res += '@'
2428         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2429             res += 'video@'
2430         if fdict.get('vbr') is not None:
2431             res += '%4dk' % fdict['vbr']
2432         if fdict.get('fps') is not None:
2433             if res:
2434                 res += ', '
2435             res += '%sfps' % fdict['fps']
2436         if fdict.get('acodec') is not None:
2437             if res:
2438                 res += ', '
2439             if fdict['acodec'] == 'none':
2440                 res += 'video only'
2441             else:
2442                 res += '%-5s' % fdict['acodec']
2443         elif fdict.get('abr') is not None:
2444             if res:
2445                 res += ', '
2446             res += 'audio'
2447         if fdict.get('abr') is not None:
2448             res += '@%3dk' % fdict['abr']
2449         if fdict.get('asr') is not None:
2450             res += ' (%5dHz)' % fdict['asr']
2451         if fdict.get('filesize') is not None:
2452             if res:
2453                 res += ', '
2454             res += format_bytes(fdict['filesize'])
2455         elif fdict.get('filesize_approx') is not None:
2456             if res:
2457                 res += ', '
2458             res += '~' + format_bytes(fdict['filesize_approx'])
2459         return res
2460
2461     def _format_note_table(self, f):
2462         def join_fields(*vargs):
2463             return ', '.join((val for val in vargs if val != ''))
2464
2465         return join_fields(
2466             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2467             format_field(f, 'language', '[%s]'),
2468             format_field(f, 'format_note'),
2469             format_field(f, 'container', ignore=(None, f.get('ext'))),
2470             format_field(f, 'asr', '%5dHz'))
2471
2472     def list_formats(self, info_dict):
2473         formats = info_dict.get('formats', [info_dict])
2474         new_format = self.params.get('listformats_table', False)
2475         if new_format:
2476             table = [
2477                 [
2478                     format_field(f, 'format_id'),
2479                     format_field(f, 'ext'),
2480                     self.format_resolution(f),
2481                     format_field(f, 'fps', '%d'),
2482                     '|',
2483                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2484                     format_field(f, 'tbr', '%4dk'),
2485                     f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2486                     '|',
2487                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2488                     format_field(f, 'vbr', '%4dk'),
2489                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2490                     format_field(f, 'abr', '%3dk'),
2491                     format_field(f, 'asr', '%5dHz'),
2492                     self._format_note_table(f)]
2493                 for f in formats
2494                 if f.get('preference') is None or f['preference'] >= -1000]
2495             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2496                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2497         else:
2498             table = [
2499                 [
2500                     format_field(f, 'format_id'),
2501                     format_field(f, 'ext'),
2502                     self.format_resolution(f),
2503                     self._format_note(f)]
2504                 for f in formats
2505                 if f.get('preference') is None or f['preference'] >= -1000]
2506             header_line = ['format code', 'extension', 'resolution', 'note']
2507
2508         # if len(formats) > 1:
2509         #     table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2510         self.to_screen(
2511             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2512                 header_line,
2513                 table,
2514                 delim=new_format,
2515                 extraGap=(0 if new_format else 1),
2516                 hideEmpty=new_format)))
2517
2518     def list_thumbnails(self, info_dict):
2519         thumbnails = info_dict.get('thumbnails')
2520         if not thumbnails:
2521             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2522             return
2523
2524         self.to_screen(
2525             '[info] Thumbnails for %s:' % info_dict['id'])
2526         self.to_screen(render_table(
2527             ['ID', 'width', 'height', 'URL'],
2528             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2529
2530     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2531         if not subtitles:
2532             self.to_screen('%s has no %s' % (video_id, name))
2533             return
2534         self.to_screen(
2535             'Available %s for %s:' % (name, video_id))
2536         self.to_screen(render_table(
2537             ['Language', 'formats'],
2538             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2539                 for lang, formats in subtitles.items()]))
2540
2541     def urlopen(self, req):
2542         """ Start an HTTP download """
2543         if isinstance(req, compat_basestring):
2544             req = sanitized_Request(req)
2545         return self._opener.open(req, timeout=self._socket_timeout)
2546
2547     def print_debug_header(self):
2548         if not self.params.get('verbose'):
2549             return
2550
2551         if type('') is not compat_str:
2552             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2553             self.report_warning(
2554                 'Your Python is broken! Update to a newer and supported version')
2555
2556         stdout_encoding = getattr(
2557             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2558         encoding_str = (
2559             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2560                 locale.getpreferredencoding(),
2561                 sys.getfilesystemencoding(),
2562                 stdout_encoding,
2563                 self.get_encoding()))
2564         write_string(encoding_str, encoding=None)
2565
2566         self._write_string('[debug] yt-dlp version ' + __version__ + '\n')
2567         if _LAZY_LOADER:
2568             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2569         try:
2570             sp = subprocess.Popen(
2571                 ['git', 'rev-parse', '--short', 'HEAD'],
2572                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2573                 cwd=os.path.dirname(os.path.abspath(__file__)))
2574             out, err = process_communicate_or_kill(sp)
2575             out = out.decode().strip()
2576             if re.match('[0-9a-f]+', out):
2577                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2578         except Exception:
2579             try:
2580                 sys.exc_clear()
2581             except Exception:
2582                 pass
2583
2584         def python_implementation():
2585             impl_name = platform.python_implementation()
2586             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2587                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2588             return impl_name
2589
2590         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2591             platform.python_version(), python_implementation(),
2592             platform_name()))
2593
2594         exe_versions = FFmpegPostProcessor.get_versions(self)
2595         exe_versions['rtmpdump'] = rtmpdump_version()
2596         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2597         exe_str = ', '.join(
2598             '%s %s' % (exe, v)
2599             for exe, v in sorted(exe_versions.items())
2600             if v
2601         )
2602         if not exe_str:
2603             exe_str = 'none'
2604         self._write_string('[debug] exe versions: %s\n' % exe_str)
2605
2606         proxy_map = {}
2607         for handler in self._opener.handlers:
2608             if hasattr(handler, 'proxies'):
2609                 proxy_map.update(handler.proxies)
2610         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2611
2612         if self.params.get('call_home', False):
2613             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2614             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2615             return
2616             latest_version = self.urlopen(
2617                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2618             if version_tuple(latest_version) > version_tuple(__version__):
2619                 self.report_warning(
2620                     'You are using an outdated version (newest version: %s)! '
2621                     'See https://yt-dl.org/update if you need help updating.' %
2622                     latest_version)
2623
2624     def _setup_opener(self):
2625         timeout_val = self.params.get('socket_timeout')
2626         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2627
2628         opts_cookiefile = self.params.get('cookiefile')
2629         opts_proxy = self.params.get('proxy')
2630
2631         if opts_cookiefile is None:
2632             self.cookiejar = compat_cookiejar.CookieJar()
2633         else:
2634             opts_cookiefile = expand_path(opts_cookiefile)
2635             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2636             if os.access(opts_cookiefile, os.R_OK):
2637                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2638
2639         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2640         if opts_proxy is not None:
2641             if opts_proxy == '':
2642                 proxies = {}
2643             else:
2644                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2645         else:
2646             proxies = compat_urllib_request.getproxies()
2647             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2648             if 'http' in proxies and 'https' not in proxies:
2649                 proxies['https'] = proxies['http']
2650         proxy_handler = PerRequestProxyHandler(proxies)
2651
2652         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2653         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2654         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2655         redirect_handler = YoutubeDLRedirectHandler()
2656         data_handler = compat_urllib_request_DataHandler()
2657
2658         # When passing our own FileHandler instance, build_opener won't add the
2659         # default FileHandler and allows us to disable the file protocol, which
2660         # can be used for malicious purposes (see
2661         # https://github.com/ytdl-org/youtube-dl/issues/8227)
2662         file_handler = compat_urllib_request.FileHandler()
2663
2664         def file_open(*args, **kwargs):
2665             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
2666         file_handler.file_open = file_open
2667
2668         opener = compat_urllib_request.build_opener(
2669             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2670
2671         # Delete the default user-agent header, which would otherwise apply in
2672         # cases where our custom HTTP handler doesn't come into play
2673         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2674         opener.addheaders = []
2675         self._opener = opener
2676
2677     def encode(self, s):
2678         if isinstance(s, bytes):
2679             return s  # Already encoded
2680
2681         try:
2682             return s.encode(self.get_encoding())
2683         except UnicodeEncodeError as err:
2684             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2685             raise
2686
2687     def get_encoding(self):
2688         encoding = self.params.get('encoding')
2689         if encoding is None:
2690             encoding = preferredencoding()
2691         return encoding
2692
2693     def _write_thumbnails(self, info_dict, filename):
2694         if self.params.get('writethumbnail', False):
2695             thumbnails = info_dict.get('thumbnails')
2696             if thumbnails:
2697                 thumbnails = [thumbnails[-1]]
2698         elif self.params.get('write_all_thumbnails', False):
2699             thumbnails = info_dict.get('thumbnails')
2700         else:
2701             return
2702
2703         if not thumbnails:
2704             # No thumbnails present, so return immediately
2705             return
2706
2707         for t in thumbnails:
2708             thumb_ext = determine_ext(t['url'], 'jpg')
2709             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2710             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2711             t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
2712
2713             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
2714                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2715                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2716             else:
2717                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2718                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2719                 try:
2720                     uf = self.urlopen(t['url'])
2721                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2722                         shutil.copyfileobj(uf, thumbf)
2723                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2724                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2725                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2726                     self.report_warning('Unable to download thumbnail "%s": %s' %
2727                                         (t['url'], error_to_compat_str(err)))