yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import time
  24 import tokenize
  25 import traceback
  26 import random
  27
  28 from string import ascii_letters
  29 from zipimport import zipimporter
  30
  31 from .compat import (
  32     compat_basestring,
  33     compat_cookiejar,
  34     compat_get_terminal_size,
  35     compat_kwargs,
  36     compat_numeric_types,
  37     compat_os_name,
  38     compat_str,
  39     compat_tokenize_tokenize,
  40     compat_urllib_error,
  41     compat_urllib_request,
  42     compat_urllib_request_DataHandler,
  43 )
  44 from .utils import (
  45     age_restricted,
  46     args_to_str,
  47     ContentTooShortError,
  48     date_from_str,
  49     DateRange,
  50     DEFAULT_OUTTMPL,
  51     determine_ext,
  52     determine_protocol,
  53     DOT_DESKTOP_LINK_TEMPLATE,
  54     DOT_URL_LINK_TEMPLATE,
  55     DOT_WEBLOC_LINK_TEMPLATE,
  56     DownloadError,
  57     encode_compat_str,
  58     encodeFilename,
  59     EntryNotInPlaylist,
  60     error_to_compat_str,
  61     ExistingVideoReached,
  62     expand_path,
  63     ExtractorError,
  64     float_or_none,
  65     format_bytes,
  66     format_field,
  67     FORMAT_RE,
  68     formatSeconds,
  69     GeoRestrictedError,
  70     int_or_none,
  71     iri_to_uri,
  72     ISO3166Utils,
  73     locked_file,
  74     make_dir,
  75     make_HTTPS_handler,
  76     MaxDownloadsReached,
  77     network_exceptions,
  78     orderedSet,
  79     OUTTMPL_TYPES,
  80     PagedList,
  81     parse_filesize,
  82     PerRequestProxyHandler,
  83     platform_name,
  84     PostProcessingError,
  85     preferredencoding,
  86     prepend_extension,
  87     process_communicate_or_kill,
  88     random_uuidv4,
  89     register_socks_protocols,
  90     RejectedVideoReached,
  91     render_table,
  92     replace_extension,
  93     SameFileError,
  94     sanitize_filename,
  95     sanitize_path,
  96     sanitize_url,
  97     sanitized_Request,
  98     std_headers,
  99     str_or_none,
 100     strftime_or_none,
 101     subtitles_filename,
 102     to_high_limit_path,
 103     traverse_dict,
 104     UnavailableVideoError,
 105     url_basename,
 106     version_tuple,
 107     write_json_file,
 108     write_string,
 109     YoutubeDLCookieJar,
 110     YoutubeDLCookieProcessor,
 111     YoutubeDLHandler,
 112     YoutubeDLRedirectHandler,
 113 )
 114 from .cache import Cache
 115 from .extractor import (
 116     gen_extractor_classes,
 117     get_info_extractor,
 118     _LAZY_LOADER,
 119     _PLUGIN_CLASSES
 120 )
 121 from .extractor.openload import PhantomJSwrapper
 122 from .downloader import (
 123     get_suitable_downloader,
 124     shorten_protocol_name
 125 )
 126 from .downloader.rtmp import rtmpdump_version
 127 from .postprocessor import (
 128     FFmpegFixupM3u8PP,
 129     FFmpegFixupM4aPP,
 130     FFmpegFixupStretchedPP,
 131     FFmpegMergerPP,
 132     FFmpegPostProcessor,
 133     # FFmpegSubtitlesConvertorPP,
 134     get_postprocessor,
 135     MoveFilesAfterDownloadPP,
 136 )
 137 from .version import __version__
 138
 139 if compat_os_name == 'nt':
 140     import ctypes
 141
 142
 143 class YoutubeDL(object):
 144     """YoutubeDL class.
 145
 146     YoutubeDL objects are the ones responsible of downloading the
 147     actual video file and writing it to disk if the user has requested
 148     it, among some other tasks. In most cases there should be one per
 149     program. As, given a video URL, the downloader doesn't know how to
 150     extract all the needed information, task that InfoExtractors do, it
 151     has to pass the URL to one of them.
 152
 153     For this, YoutubeDL objects have a method that allows
 154     InfoExtractors to be registered in a given order. When it is passed
 155     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 156     finds that reports being able to handle it. The InfoExtractor extracts
 157     all the information about the video or videos the URL refers to, and
 158     YoutubeDL process the extracted information, possibly using a File
 159     Downloader to download the video.
 160
 161     YoutubeDL objects accept a lot of parameters. In order not to saturate
 162     the object constructor with arguments, it receives a dictionary of
 163     options instead. These options are available through the params
 164     attribute for the InfoExtractors to use. The YoutubeDL also
 165     registers itself as the downloader in charge for the InfoExtractors
 166     that are added to it, so this is a "mutual registration".
 167
 168     Available options:
 169
 170     username:          Username for authentication purposes.
 171     password:          Password for authentication purposes.
 172     videopassword:     Password for accessing a video.
 173     ap_mso:            Adobe Pass multiple-system operator identifier.
 174     ap_username:       Multiple-system operator account username.
 175     ap_password:       Multiple-system operator account password.
 176     usenetrc:          Use netrc for authentication instead.
 177     verbose:           Print additional info to stdout.
 178     quiet:             Do not print messages to stdout.
 179     no_warnings:       Do not print out anything for warnings.
 180     forceprint:        A list of templates to force print
 181     forceurl:          Force printing final URL. (Deprecated)
 182     forcetitle:        Force printing title. (Deprecated)
 183     forceid:           Force printing ID. (Deprecated)
 184     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 185     forcedescription:  Force printing description. (Deprecated)
 186     forcefilename:     Force printing final filename. (Deprecated)
 187     forceduration:     Force printing duration. (Deprecated)
 188     forcejson:         Force printing info_dict as JSON.
 189     dump_single_json:  Force printing the info_dict of the whole playlist
 190                        (or video) as a single JSON line.
 191     force_write_download_archive: Force writing download archive regardless
 192                        of 'skip_download' or 'simulate'.
 193     simulate:          Do not download the video files.
 194     format:            Video format code. see "FORMAT SELECTION" for more details.
 195     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 196     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 197                        extracting metadata even if the video is not actually
 198                        available for download (experimental)
 199     format_sort:       How to sort the video formats. see "Sorting Formats"
 200                        for more details.
 201     format_sort_force: Force the given format_sort. see "Sorting Formats"
 202                        for more details.
 203     allow_multiple_video_streams:   Allow multiple video streams to be merged
 204                        into a single file
 205     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 206                        into a single file
 207     paths:             Dictionary of output paths. The allowed keys are 'home'
 208                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 209     outtmpl:           Dictionary of templates for output names. Allowed keys
 210                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 211                        A string a also accepted for backward compatibility
 212     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 213     restrictfilenames: Do not allow "&" and spaces in file names
 214     trim_file_name:    Limit length of filename (extension excluded)
 215     windowsfilenames:  Force the filenames to be windows compatible
 216     ignoreerrors:      Do not stop on download errors
 217                        (Default True when running yt-dlp,
 218                        but False when directly accessing YoutubeDL class)
 219     skip_playlist_after_errors: Number of allowed failures until the rest of
 220                        the playlist is skipped
 221     force_generic_extractor: Force downloader to use the generic extractor
 222     overwrites:        Overwrite all video and metadata files if True,
 223                        overwrite only non-video files if None
 224                        and don't overwrite any file if False
 225     playliststart:     Playlist item to start at.
 226     playlistend:       Playlist item to end at.
 227     playlist_items:    Specific indices of playlist to download.
 228     playlistreverse:   Download playlist items in reverse order.
 229     playlistrandom:    Download playlist items in random order.
 230     matchtitle:        Download only matching titles.
 231     rejecttitle:       Reject downloads for matching titles.
 232     logger:            Log messages to a logging.Logger instance.
 233     logtostderr:       Log messages to stderr instead of stdout.
 234     writedescription:  Write the video description to a .description file
 235     writeinfojson:     Write the video description to a .info.json file
 236     clean_infojson:    Remove private fields from the infojson
 237     writecomments:     Extract video comments. This will not be written to disk
 238                        unless writeinfojson is also given
 239     writeannotations:  Write the video annotations to a .annotations.xml file
 240     writethumbnail:    Write the thumbnail image to a file
 241     allow_playlist_files: Whether to write playlists' description, infojson etc
 242                        also to disk when using the 'write*' options
 243     write_all_thumbnails:  Write all thumbnail formats to files
 244     writelink:         Write an internet shortcut file, depending on the
 245                        current platform (.url/.webloc/.desktop)
 246     writeurllink:      Write a Windows internet shortcut file (.url)
 247     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 248     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 249     writesubtitles:    Write the video subtitles to a file
 250     writeautomaticsub: Write the automatically generated subtitles to a file
 251     allsubtitles:      Deprecated - Use subtitlelangs = ['all']
 252                        Downloads all the subtitles of the video
 253                        (requires writesubtitles or writeautomaticsub)
 254     listsubtitles:     Lists all available subtitles for the video
 255     subtitlesformat:   The format code for subtitles
 256     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 257                        The list may contain "all" to refer to all the available
 258                        subtitles. The language can be prefixed with a "-" to
 259                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 260     keepvideo:         Keep the video file after post-processing
 261     daterange:         A DateRange object, download only if the upload_date is in the range.
 262     skip_download:     Skip the actual download of the video file
 263     cachedir:          Location of the cache files in the filesystem.
 264                        False to disable filesystem cache.
 265     noplaylist:        Download single video instead of a playlist if in doubt.
 266     age_limit:         An integer representing the user's age in years.
 267                        Unsuitable videos for the given age are skipped.
 268     min_views:         An integer representing the minimum view count the video
 269                        must have in order to not be skipped.
 270                        Videos without view count information are always
 271                        downloaded. None for no limit.
 272     max_views:         An integer representing the maximum view count.
 273                        Videos that are more popular than that are not
 274                        downloaded.
 275                        Videos without view count information are always
 276                        downloaded. None for no limit.
 277     download_archive:  File name of a file where all downloads are recorded.
 278                        Videos already present in the file are not downloaded
 279                        again.
 280     break_on_existing: Stop the download process after attempting to download a
 281                        file that is in the archive.
 282     break_on_reject:   Stop the download process when encountering a video that
 283                        has been filtered out.
 284     cookiefile:        File name where cookies should be read from and dumped to
 285     nocheckcertificate:Do not verify SSL certificates
 286     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 287                        At the moment, this is only supported by YouTube.
 288     proxy:             URL of the proxy server to use
 289     geo_verification_proxy:  URL of the proxy to use for IP address verification
 290                        on geo-restricted sites.
 291     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 292     bidi_workaround:   Work around buggy terminals without bidirectional text
 293                        support, using fridibi
 294     debug_printtraffic:Print out sent and received HTTP traffic
 295     include_ads:       Download ads as well
 296     default_search:    Prepend this string if an input url is not valid.
 297                        'auto' for elaborate guessing
 298     encoding:          Use this encoding instead of the system-specified.
 299     extract_flat:      Do not resolve URLs, return the immediate result.
 300                        Pass in 'in_playlist' to only show this behavior for
 301                        playlist items.
 302     postprocessors:    A list of dictionaries, each with an entry
 303                        * key:  The name of the postprocessor. See
 304                                yt_dlp/postprocessor/__init__.py for a list.
 305                        * when: When to run the postprocessor. Can be one of
 306                                pre_process|before_dl|post_process|after_move.
 307                                Assumed to be 'post_process' if not given
 308     post_hooks:        A list of functions that get called as the final step
 309                        for each video file, after all postprocessors have been
 310                        called. The filename will be passed as the only argument.
 311     progress_hooks:    A list of functions that get called on download
 312                        progress, with a dictionary with the entries
 313                        * status: One of "downloading", "error", or "finished".
 314                                  Check this first and ignore unknown values.
 315
 316                        If status is one of "downloading", or "finished", the
 317                        following properties may also be present:
 318                        * filename: The final filename (always present)
 319                        * tmpfilename: The filename we're currently writing to
 320                        * downloaded_bytes: Bytes on disk
 321                        * total_bytes: Size of the whole file, None if unknown
 322                        * total_bytes_estimate: Guess of the eventual file size,
 323                                                None if unavailable.
 324                        * elapsed: The number of seconds since download started.
 325                        * eta: The estimated time in seconds, None if unknown
 326                        * speed: The download speed in bytes/second, None if
 327                                 unknown
 328                        * fragment_index: The counter of the currently
 329                                          downloaded video fragment.
 330                        * fragment_count: The number of fragments (= individual
 331                                          files that will be merged)
 332
 333                        Progress hooks are guaranteed to be called at least once
 334                        (with status "finished") if the download is successful.
 335     merge_output_format: Extension to use when merging formats.
 336     final_ext:         Expected final extension; used to detect when the file was
 337                        already downloaded and converted. "merge_output_format" is
 338                        replaced by this extension when given
 339     fixup:             Automatically correct known faults of the file.
 340                        One of:
 341                        - "never": do nothing
 342                        - "warn": only emit a warning
 343                        - "detect_or_warn": check whether we can do anything
 344                                            about it, warn otherwise (default)
 345     source_address:    Client-side IP address to bind to.
 346     call_home:         Boolean, true iff we are allowed to contact the
 347                        yt-dlp servers for debugging. (BROKEN)
 348     sleep_interval_requests: Number of seconds to sleep between requests
 349                        during extraction
 350     sleep_interval:    Number of seconds to sleep before each download when
 351                        used alone or a lower bound of a range for randomized
 352                        sleep before each download (minimum possible number
 353                        of seconds to sleep) when used along with
 354                        max_sleep_interval.
 355     max_sleep_interval:Upper bound of a range for randomized sleep before each
 356                        download (maximum possible number of seconds to sleep).
 357                        Must only be used along with sleep_interval.
 358                        Actual sleep time will be a random float from range
 359                        [sleep_interval; max_sleep_interval].
 360     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 361     listformats:       Print an overview of available video formats and exit.
 362     list_thumbnails:   Print a table of all thumbnails and exit.
 363     match_filter:      A function that gets called with the info_dict of
 364                        every video.
 365                        If it returns a message, the video is ignored.
 366                        If it returns None, the video is downloaded.
 367                        match_filter_func in utils.py is one example for this.
 368     no_color:          Do not emit color codes in output.
 369     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 370                        HTTP header
 371     geo_bypass_country:
 372                        Two-letter ISO 3166-2 country code that will be used for
 373                        explicit geographic restriction bypassing via faking
 374                        X-Forwarded-For HTTP header
 375     geo_bypass_ip_block:
 376                        IP range in CIDR notation that will be used similarly to
 377                        geo_bypass_country
 378
 379     The following options determine which downloader is picked:
 380     external_downloader: A dictionary of protocol keys and the executable of the
 381                        external downloader to use for it. The allowed protocols
 382                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 383                        Set the value to 'native' to use the native downloader
 384     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 385                        or {'m3u8': 'ffmpeg'} instead.
 386                        Use the native HLS downloader instead of ffmpeg/avconv
 387                        if True, otherwise use ffmpeg/avconv if False, otherwise
 388                        use downloader suggested by extractor if None.
 389     compat_opts:       Compatibility options. See "Differences in default behavior".
 390                        Note that only format-sort, format-spec, no-live-chat, no-attach-info-json
 391                        playlist-index, list-formats, no-youtube-channel-redirect
 392                        and no-youtube-unavailable-videos works when used via the API
 393
 394     The following parameters are not used by YoutubeDL itself, they are used by
 395     the downloader (see yt_dlp/downloader/common.py):
 396     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 397     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 398     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
 399     http_chunk_size.
 400
 401     The following options are used by the post processors:
 402     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 403                        otherwise prefer ffmpeg. (avconv support is deprecated)
 404     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 405                        to the binary or its containing directory.
 406     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 407                         and a list of additional command-line arguments for the
 408                         postprocessor/executable. The dict can also have "PP+EXE" keys
 409                         which are used when the given exe is used by the given PP.
 410                         Use 'default' as the name for arguments to passed to all PP
 411
 412     The following options are used by the extractors:
 413     extractor_retries: Number of times to retry for known errors
 414     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 415     hls_split_discontinuity: Split HLS playlists to different formats at
 416                        discontinuities such as ad breaks (default: False)
 417     youtube_include_dash_manifest: If True (default), DASH manifests and related
 418                        data will be downloaded and processed by extractor.
 419                        You can reduce network I/O by disabling it if you don't
 420                        care about DASH. (only for youtube)
 421     youtube_include_hls_manifest: If True (default), HLS manifests and related
 422                        data will be downloaded and processed by extractor.
 423                        You can reduce network I/O by disabling it if you don't
 424                        care about HLS. (only for youtube)
 425     """
 426
 427     _NUMERIC_FIELDS = set((
 428         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 429         'timestamp', 'upload_year', 'upload_month', 'upload_day',
 430         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 431         'average_rating', 'comment_count', 'age_limit',
 432         'start_time', 'end_time',
 433         'chapter_number', 'season_number', 'episode_number',
 434         'track_number', 'disc_number', 'release_year',
 435         'playlist_index',
 436     ))
 437
 438     params = None
 439     _ies = []
 440     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 441     __prepare_filename_warned = False
 442     _first_webpage_request = True
 443     _download_retcode = None
 444     _num_downloads = None
 445     _playlist_level = 0
 446     _playlist_urls = set()
 447     _screen_file = None
 448
 449     def __init__(self, params=None, auto_init=True):
 450         """Create a FileDownloader object with the given options."""
 451         if params is None:
 452             params = {}
 453         self._ies = []
 454         self._ies_instances = {}
 455         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 456         self.__prepare_filename_warned = False
 457         self._first_webpage_request = True
 458         self._post_hooks = []
 459         self._progress_hooks = []
 460         self._download_retcode = 0
 461         self._num_downloads = 0
 462         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 463         self._err_file = sys.stderr
 464         self.params = {
 465             # Default parameters
 466             'nocheckcertificate': False,
 467         }
 468         self.params.update(params)
 469         self.cache = Cache(self)
 470
 471         if sys.version_info < (3, 6):
 472             self.report_warning(
 473                 'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
 474                 'Update to Python 3.6 or above' % sys.version_info[:2])
 475
 476         def check_deprecated(param, option, suggestion):
 477             if self.params.get(param) is not None:
 478                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 479                 return True
 480             return False
 481
 482         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 483             if self.params.get('geo_verification_proxy') is None:
 484                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 485
 486         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 487         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 488         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 489
 490         for msg in self.params.get('warnings', []):
 491             self.report_warning(msg)
 492
 493         if self.params.get('final_ext'):
 494             if self.params.get('merge_output_format'):
 495                 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
 496             self.params['merge_output_format'] = self.params['final_ext']
 497
 498         if 'overwrites' in self.params and self.params['overwrites'] is None:
 499             del self.params['overwrites']
 500
 501         if params.get('bidi_workaround', False):
 502             try:
 503                 import pty
 504                 master, slave = pty.openpty()
 505                 width = compat_get_terminal_size().columns
 506                 if width is None:
 507                     width_args = []
 508                 else:
 509                     width_args = ['-w', str(width)]
 510                 sp_kwargs = dict(
 511                     stdin=subprocess.PIPE,
 512                     stdout=slave,
 513                     stderr=self._err_file)
 514                 try:
 515                     self._output_process = subprocess.Popen(
 516                         ['bidiv'] + width_args, **sp_kwargs
 517                     )
 518                 except OSError:
 519                     self._output_process = subprocess.Popen(
 520                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 521                 self._output_channel = os.fdopen(master, 'rb')
 522             except OSError as ose:
 523                 if ose.errno == errno.ENOENT:
 524                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 525                 else:
 526                     raise
 527
 528         if (sys.platform != 'win32'
 529                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 530                 and not params.get('restrictfilenames', False)):
 531             # Unicode filesystem API will throw errors (#1474, #13027)
 532             self.report_warning(
 533                 'Assuming --restrict-filenames since file system encoding '
 534                 'cannot encode all characters. '
 535                 'Set the LC_ALL environment variable to fix this.')
 536             self.params['restrictfilenames'] = True
 537
 538         self.outtmpl_dict = self.parse_outtmpl()
 539
 540         self._setup_opener()
 541
 542         """Preload the archive, if any is specified"""
 543         def preload_download_archive(fn):
 544             if fn is None:
 545                 return False
 546             self.write_debug('Loading archive file %r\n' % fn)
 547             try:
 548                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 549                     for line in archive_file:
 550                         self.archive.add(line.strip())
 551             except IOError as ioe:
 552                 if ioe.errno != errno.ENOENT:
 553                     raise
 554                 return False
 555             return True
 556
 557         self.archive = set()
 558         preload_download_archive(self.params.get('download_archive'))
 559
 560         if auto_init:
 561             self.print_debug_header()
 562             self.add_default_info_extractors()
 563
 564         for pp_def_raw in self.params.get('postprocessors', []):
 565             pp_class = get_postprocessor(pp_def_raw['key'])
 566             pp_def = dict(pp_def_raw)
 567             del pp_def['key']
 568             if 'when' in pp_def:
 569                 when = pp_def['when']
 570                 del pp_def['when']
 571             else:
 572                 when = 'post_process'
 573             pp = pp_class(self, **compat_kwargs(pp_def))
 574             self.add_post_processor(pp, when=when)
 575
 576         for ph in self.params.get('post_hooks', []):
 577             self.add_post_hook(ph)
 578
 579         for ph in self.params.get('progress_hooks', []):
 580             self.add_progress_hook(ph)
 581
 582         register_socks_protocols()
 583
 584     def warn_if_short_id(self, argv):
 585         # short YouTube ID starting with dash?
 586         idxs = [
 587             i for i, a in enumerate(argv)
 588             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 589         if idxs:
 590             correct_argv = (
 591                 ['yt-dlp']
 592                 + [a for i, a in enumerate(argv) if i not in idxs]
 593                 + ['--'] + [argv[i] for i in idxs]
 594             )
 595             self.report_warning(
 596                 'Long argument string detected. '
 597                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 598                 args_to_str(correct_argv))
 599
 600     def add_info_extractor(self, ie):
 601         """Add an InfoExtractor object to the end of the list."""
 602         self._ies.append(ie)
 603         if not isinstance(ie, type):
 604             self._ies_instances[ie.ie_key()] = ie
 605             ie.set_downloader(self)
 606
 607     def get_info_extractor(self, ie_key):
 608         """
 609         Get an instance of an IE with name ie_key, it will try to get one from
 610         the _ies list, if there's no instance it will create a new one and add
 611         it to the extractor list.
 612         """
 613         ie = self._ies_instances.get(ie_key)
 614         if ie is None:
 615             ie = get_info_extractor(ie_key)()
 616             self.add_info_extractor(ie)
 617         return ie
 618
 619     def add_default_info_extractors(self):
 620         """
 621         Add the InfoExtractors returned by gen_extractors to the end of the list
 622         """
 623         for ie in gen_extractor_classes():
 624             self.add_info_extractor(ie)
 625
 626     def add_post_processor(self, pp, when='post_process'):
 627         """Add a PostProcessor object to the end of the chain."""
 628         self._pps[when].append(pp)
 629         pp.set_downloader(self)
 630
 631     def add_post_hook(self, ph):
 632         """Add the post hook"""
 633         self._post_hooks.append(ph)
 634
 635     def add_progress_hook(self, ph):
 636         """Add the progress hook (currently only for the file downloader)"""
 637         self._progress_hooks.append(ph)
 638
 639     def _bidi_workaround(self, message):
 640         if not hasattr(self, '_output_channel'):
 641             return message
 642
 643         assert hasattr(self, '_output_process')
 644         assert isinstance(message, compat_str)
 645         line_count = message.count('\n') + 1
 646         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 647         self._output_process.stdin.flush()
 648         res = ''.join(self._output_channel.readline().decode('utf-8')
 649                       for _ in range(line_count))
 650         return res[:-len('\n')]
 651
 652     def _write_string(self, s, out=None):
 653         write_string(s, out=out, encoding=self.params.get('encoding'))
 654
 655     def to_stdout(self, message, skip_eol=False, quiet=False):
 656         """Print message to stdout"""
 657         if self.params.get('logger'):
 658             self.params['logger'].debug(message)
 659         elif not quiet:
 660             message = self._bidi_workaround(message)
 661             terminator = ['\n', ''][skip_eol]
 662             output = message + terminator
 663
 664             self._write_string(output, self._screen_file)
 665
 666     def to_stderr(self, message):
 667         """Print message to stderr"""
 668         assert isinstance(message, compat_str)
 669         if self.params.get('logger'):
 670             self.params['logger'].error(message)
 671         else:
 672             message = self._bidi_workaround(message)
 673             output = message + '\n'
 674             self._write_string(output, self._err_file)
 675
 676     def to_console_title(self, message):
 677         if not self.params.get('consoletitle', False):
 678             return
 679         if compat_os_name == 'nt':
 680             if ctypes.windll.kernel32.GetConsoleWindow():
 681                 # c_wchar_p() might not be necessary if `message` is
 682                 # already of type unicode()
 683                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 684         elif 'TERM' in os.environ:
 685             self._write_string('\033]0;%s\007' % message, self._screen_file)
 686
 687     def save_console_title(self):
 688         if not self.params.get('consoletitle', False):
 689             return
 690         if self.params.get('simulate', False):
 691             return
 692         if compat_os_name != 'nt' and 'TERM' in os.environ:
 693             # Save the title on stack
 694             self._write_string('\033[22;0t', self._screen_file)
 695
 696     def restore_console_title(self):
 697         if not self.params.get('consoletitle', False):
 698             return
 699         if self.params.get('simulate', False):
 700             return
 701         if compat_os_name != 'nt' and 'TERM' in os.environ:
 702             # Restore the title from stack
 703             self._write_string('\033[23;0t', self._screen_file)
 704
 705     def __enter__(self):
 706         self.save_console_title()
 707         return self
 708
 709     def __exit__(self, *args):
 710         self.restore_console_title()
 711
 712         if self.params.get('cookiefile') is not None:
 713             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 714
 715     def trouble(self, message=None, tb=None):
 716         """Determine action to take when a download problem appears.
 717
 718         Depending on if the downloader has been configured to ignore
 719         download errors or not, this method may throw an exception or
 720         not when errors are found, after printing the message.
 721
 722         tb, if given, is additional traceback information.
 723         """
 724         if message is not None:
 725             self.to_stderr(message)
 726         if self.params.get('verbose'):
 727             if tb is None:
 728                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 729                     tb = ''
 730                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 731                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 732                     tb += encode_compat_str(traceback.format_exc())
 733                 else:
 734                     tb_data = traceback.format_list(traceback.extract_stack())
 735                     tb = ''.join(tb_data)
 736             self.to_stderr(tb)
 737         if not self.params.get('ignoreerrors', False):
 738             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 739                 exc_info = sys.exc_info()[1].exc_info
 740             else:
 741                 exc_info = sys.exc_info()
 742             raise DownloadError(message, exc_info)
 743         self._download_retcode = 1
 744
 745     def to_screen(self, message, skip_eol=False):
 746         """Print message to stdout if not in quiet mode"""
 747         self.to_stdout(
 748             message, skip_eol, quiet=self.params.get('quiet', False))
 749
 750     def report_warning(self, message):
 751         '''
 752         Print the message to stderr, it will be prefixed with 'WARNING:'
 753         If stderr is a tty file the 'WARNING:' will be colored
 754         '''
 755         if self.params.get('logger') is not None:
 756             self.params['logger'].warning(message)
 757         else:
 758             if self.params.get('no_warnings'):
 759                 return
 760             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 761                 _msg_header = '\033[0;33mWARNING:\033[0m'
 762             else:
 763                 _msg_header = 'WARNING:'
 764             warning_message = '%s %s' % (_msg_header, message)
 765             self.to_stderr(warning_message)
 766
 767     def report_error(self, message, tb=None):
 768         '''
 769         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 770         in red if stderr is a tty file.
 771         '''
 772         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 773             _msg_header = '\033[0;31mERROR:\033[0m'
 774         else:
 775             _msg_header = 'ERROR:'
 776         error_message = '%s %s' % (_msg_header, message)
 777         self.trouble(error_message, tb)
 778
 779     def write_debug(self, message):
 780         '''Log debug message or Print message to stderr'''
 781         if not self.params.get('verbose', False):
 782             return
 783         message = '[debug] %s' % message
 784         if self.params.get('logger'):
 785             self.params['logger'].debug(message)
 786         else:
 787             self._write_string('%s\n' % message)
 788
 789     def report_file_already_downloaded(self, file_name):
 790         """Report file has already been fully downloaded."""
 791         try:
 792             self.to_screen('[download] %s has already been downloaded' % file_name)
 793         except UnicodeEncodeError:
 794             self.to_screen('[download] The file has already been downloaded')
 795
 796     def report_file_delete(self, file_name):
 797         """Report that existing file will be deleted."""
 798         try:
 799             self.to_screen('Deleting existing file %s' % file_name)
 800         except UnicodeEncodeError:
 801             self.to_screen('Deleting existing file')
 802
 803     def parse_outtmpl(self):
 804         outtmpl_dict = self.params.get('outtmpl', {})
 805         if not isinstance(outtmpl_dict, dict):
 806             outtmpl_dict = {'default': outtmpl_dict}
 807         outtmpl_dict.update({
 808             k: v for k, v in DEFAULT_OUTTMPL.items()
 809             if not outtmpl_dict.get(k)})
 810         for key, val in outtmpl_dict.items():
 811             if isinstance(val, bytes):
 812                 self.report_warning(
 813                     'Parameter outtmpl is bytes, but should be a unicode string. '
 814                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 815         return outtmpl_dict
 816
 817     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 818         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
 819         template_dict = dict(info_dict)
 820         na = self.params.get('outtmpl_na_placeholder', 'NA')
 821
 822         # duration_string
 823         template_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 824             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 825             if info_dict.get('duration', None) is not None
 826             else None)
 827
 828         # epoch
 829         template_dict['epoch'] = int(time.time())
 830
 831         # autonumber
 832         autonumber_size = self.params.get('autonumber_size')
 833         if autonumber_size is None:
 834             autonumber_size = 5
 835         template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 836
 837         # resolution if not defined
 838         if template_dict.get('resolution') is None:
 839             if template_dict.get('width') and template_dict.get('height'):
 840                 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 841             elif template_dict.get('height'):
 842                 template_dict['resolution'] = '%sp' % template_dict['height']
 843             elif template_dict.get('width'):
 844                 template_dict['resolution'] = '%dx?' % template_dict['width']
 845
 846         # For fields playlist_index and autonumber convert all occurrences
 847         # of %(field)s to %(field)0Nd for backward compatibility
 848         field_size_compat_map = {
 849             'playlist_index': len(str(template_dict.get('_last_playlist_index') or '')),
 850             'autonumber': autonumber_size,
 851         }
 852         FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 853         mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 854         if mobj:
 855             outtmpl = re.sub(
 856                 FIELD_SIZE_COMPAT_RE,
 857                 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 858                 outtmpl)
 859
 860         numeric_fields = list(self._NUMERIC_FIELDS)
 861         if sanitize is None:
 862             sanitize = lambda k, v: v
 863
 864         EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P<key>[^)]*)')
 865         # Field is of the form key1.key2...
 866         # where keys (except first) can be string, int or slice
 867         FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
 868         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 869             (?P<negate>-)?
 870             (?P<fields>{0})
 871             (?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
 872             (?:>(?P<strf_format>.+?))?
 873             (?:\|(?P<default>.*?))?
 874             $'''.format(FIELD_RE))
 875         MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
 876         MATH_FUNCTIONS = {
 877             '+': float.__add__,
 878             '-': float.__sub__,
 879         }
 880         for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl):
 881             final_key = outer_mobj.group('key')
 882             str_type = outer_mobj.group('type')
 883             value = None
 884             mobj = re.match(INTERNAL_FORMAT_RE, final_key)
 885             if mobj is not None:
 886                 mobj = mobj.groupdict()
 887                 # Object traversal
 888                 fields = mobj['fields'].split('.')
 889                 value = traverse_dict(template_dict, fields)
 890                 # Negative
 891                 if mobj['negate']:
 892                     value = float_or_none(value)
 893                     if value is not None:
 894                         value *= -1
 895                 # Do maths
 896                 if mobj['maths']:
 897                     value = float_or_none(value)
 898                     operator = None
 899                     for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]:
 900                         if item == '':
 901                             value = None
 902                         if value is None:
 903                             break
 904                         if operator:
 905                             item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
 906                             offset = float_or_none(item)
 907                             if offset is None:
 908                                 offset = float_or_none(traverse_dict(template_dict, item.split('.')))
 909                             try:
 910                                 value = operator(value, multiplier * offset)
 911                             except (TypeError, ZeroDivisionError):
 912                                 value = None
 913                             operator = None
 914                         else:
 915                             operator = MATH_FUNCTIONS[item]
 916                 # Datetime formatting
 917                 if mobj['strf_format']:
 918                     value = strftime_or_none(value, mobj['strf_format'])
 919                 # Set default
 920                 if value is None and mobj['default'] is not None:
 921                     value = mobj['default']
 922             # Sanitize
 923             if str_type in 'crs' and value is not None:  # string
 924                 value = sanitize('%{}'.format(str_type) % fields[-1], value)
 925             else:  # numeric
 926                 numeric_fields.append(final_key)
 927                 value = float_or_none(value)
 928             if value is not None:
 929                 template_dict[final_key] = value
 930
 931         # Missing numeric fields used together with integer presentation types
 932         # in format specification will break the argument substitution since
 933         # string NA placeholder is returned for missing fields. We will patch
 934         # output template for missing fields to meet string presentation type.
 935         for numeric_field in numeric_fields:
 936             if template_dict.get(numeric_field) is None:
 937                 outtmpl = re.sub(
 938                     FORMAT_RE.format(re.escape(numeric_field)),
 939                     r'%({0})s'.format(numeric_field), outtmpl)
 940
 941         template_dict = collections.defaultdict(lambda: na, (
 942             (k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 943             for k, v in template_dict.items() if v is not None))
 944         return outtmpl, template_dict
 945
 946     def _prepare_filename(self, info_dict, tmpl_type='default'):
 947         try:
 948             sanitize = lambda k, v: sanitize_filename(
 949                 compat_str(v),
 950                 restricted=self.params.get('restrictfilenames'),
 951                 is_id=(k == 'id' or k.endswith('_id')))
 952             outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
 953             outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
 954
 955             # expand_path translates '%%' into '%' and '$$' into '$'
 956             # correspondingly that is not what we want since we need to keep
 957             # '%%' intact for template dict substitution step. Working around
 958             # with boundary-alike separator hack.
 959             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 960             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 961
 962             # outtmpl should be expand_path'ed before template dict substitution
 963             # because meta fields may contain env variables we don't want to
 964             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 965             # title "Hello $PATH", we don't want `$PATH` to be expanded.
 966             filename = expand_path(outtmpl).replace(sep, '') % template_dict
 967
 968             force_ext = OUTTMPL_TYPES.get(tmpl_type)
 969             if force_ext is not None:
 970                 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
 971
 972             # https://github.com/blackjack4494/youtube-dlc/issues/85
 973             trim_file_name = self.params.get('trim_file_name', False)
 974             if trim_file_name:
 975                 fn_groups = filename.rsplit('.')
 976                 ext = fn_groups[-1]
 977                 sub_ext = ''
 978                 if len(fn_groups) > 2:
 979                     sub_ext = fn_groups[-2]
 980                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
 981
 982             return filename
 983         except ValueError as err:
 984             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 985             return None
 986
 987     def prepare_filename(self, info_dict, dir_type='', warn=False):
 988         """Generate the output filename."""
 989         paths = self.params.get('paths', {})
 990         assert isinstance(paths, dict)
 991         filename = self._prepare_filename(info_dict, dir_type or 'default')
 992
 993         if warn and not self.__prepare_filename_warned:
 994             if not paths:
 995                 pass
 996             elif filename == '-':
 997                 self.report_warning('--paths is ignored when an outputting to stdout')
 998             elif os.path.isabs(filename):
 999                 self.report_warning('--paths is ignored since an absolute path is given in output template')
1000             self.__prepare_filename_warned = True
1001         if filename == '-' or not filename:
1002             return filename
1003
1004         homepath = expand_path(paths.get('home', '').strip())
1005         assert isinstance(homepath, compat_str)
1006         subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
1007         assert isinstance(subdir, compat_str)
1008         path = os.path.join(homepath, subdir, filename)
1009
1010         # Temporary fix for #4787
1011         # 'Treat' all problem characters by passing filename through preferredencoding
1012         # to workaround encoding issues with subprocess on python2 @ Windows
1013         if sys.version_info < (3, 0) and sys.platform == 'win32':
1014             path = encodeFilename(path, True).decode(preferredencoding())
1015         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1016
1017     def _match_entry(self, info_dict, incomplete):
1018         """ Returns None if the file should be downloaded """
1019
1020         def check_filter():
1021             video_title = info_dict.get('title', info_dict.get('id', 'video'))
1022             if 'title' in info_dict:
1023                 # This can happen when we're just evaluating the playlist
1024                 title = info_dict['title']
1025                 matchtitle = self.params.get('matchtitle', False)
1026                 if matchtitle:
1027                     if not re.search(matchtitle, title, re.IGNORECASE):
1028                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1029                 rejecttitle = self.params.get('rejecttitle', False)
1030                 if rejecttitle:
1031                     if re.search(rejecttitle, title, re.IGNORECASE):
1032                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1033             date = info_dict.get('upload_date')
1034             if date is not None:
1035                 dateRange = self.params.get('daterange', DateRange())
1036                 if date not in dateRange:
1037                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1038             view_count = info_dict.get('view_count')
1039             if view_count is not None:
1040                 min_views = self.params.get('min_views')
1041                 if min_views is not None and view_count < min_views:
1042                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1043                 max_views = self.params.get('max_views')
1044                 if max_views is not None and view_count > max_views:
1045                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1046             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1047                 return 'Skipping "%s" because it is age restricted' % video_title
1048             if self.in_download_archive(info_dict):
1049                 return '%s has already been recorded in archive' % video_title
1050
1051             if not incomplete:
1052                 match_filter = self.params.get('match_filter')
1053                 if match_filter is not None:
1054                     ret = match_filter(info_dict)
1055                     if ret is not None:
1056                         return ret
1057             return None
1058
1059         reason = check_filter()
1060         if reason is not None:
1061             self.to_screen('[download] ' + reason)
1062             if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
1063                 raise ExistingVideoReached()
1064             elif self.params.get('break_on_reject', False):
1065                 raise RejectedVideoReached()
1066         return reason
1067
1068     @staticmethod
1069     def add_extra_info(info_dict, extra_info):
1070         '''Set the keys from extra_info in info dict if they are missing'''
1071         for key, value in extra_info.items():
1072             info_dict.setdefault(key, value)
1073
1074     def extract_info(self, url, download=True, ie_key=None, extra_info={},
1075                      process=True, force_generic_extractor=False):
1076         """
1077         Return a list with a dictionary for each video extracted.
1078
1079         Arguments:
1080         url -- URL to extract
1081
1082         Keyword arguments:
1083         download -- whether to download videos during extraction
1084         ie_key -- extractor key hint
1085         extra_info -- dictionary containing the extra values to add to each result
1086         process -- whether to resolve all unresolved references (URLs, playlist items),
1087             must be True for download to work.
1088         force_generic_extractor -- force using the generic extractor
1089         """
1090
1091         if not ie_key and force_generic_extractor:
1092             ie_key = 'Generic'
1093
1094         if ie_key:
1095             ies = [self.get_info_extractor(ie_key)]
1096         else:
1097             ies = self._ies
1098
1099         for ie in ies:
1100             if not ie.suitable(url):
1101                 continue
1102
1103             ie_key = ie.ie_key()
1104             ie = self.get_info_extractor(ie_key)
1105             if not ie.working():
1106                 self.report_warning('The program functionality for this site has been marked as broken, '
1107                                     'and will probably not work.')
1108
1109             try:
1110                 temp_id = str_or_none(
1111                     ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1112                     else ie._match_id(url))
1113             except (AssertionError, IndexError, AttributeError):
1114                 temp_id = None
1115             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1116                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1117                                ie_key, temp_id))
1118                 break
1119             return self.__extract_info(url, ie, download, extra_info, process)
1120         else:
1121             self.report_error('no suitable InfoExtractor for URL %s' % url)
1122
1123     def __handle_extraction_exceptions(func):
1124         def wrapper(self, *args, **kwargs):
1125             try:
1126                 return func(self, *args, **kwargs)
1127             except GeoRestrictedError as e:
1128                 msg = e.msg
1129                 if e.countries:
1130                     msg += '\nThis video is available in %s.' % ', '.join(
1131                         map(ISO3166Utils.short2full, e.countries))
1132                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1133                 self.report_error(msg)
1134             except ExtractorError as e:  # An error we somewhat expected
1135                 self.report_error(compat_str(e), e.format_traceback())
1136             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
1137                 raise
1138             except Exception as e:
1139                 if self.params.get('ignoreerrors', False):
1140                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1141                 else:
1142                     raise
1143         return wrapper
1144
1145     @__handle_extraction_exceptions
1146     def __extract_info(self, url, ie, download, extra_info, process):
1147         ie_result = ie.extract(url)
1148         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1149             return
1150         if isinstance(ie_result, list):
1151             # Backwards compatibility: old IE result format
1152             ie_result = {
1153                 '_type': 'compat_list',
1154                 'entries': ie_result,
1155             }
1156         self.add_default_extra_info(ie_result, ie, url)
1157         if process:
1158             return self.process_ie_result(ie_result, download, extra_info)
1159         else:
1160             return ie_result
1161
1162     def add_default_extra_info(self, ie_result, ie, url):
1163         self.add_extra_info(ie_result, {
1164             'extractor': ie.IE_NAME,
1165             'webpage_url': url,
1166             'webpage_url_basename': url_basename(url),
1167             'extractor_key': ie.ie_key(),
1168         })
1169
1170     def process_ie_result(self, ie_result, download=True, extra_info={}):
1171         """
1172         Take the result of the ie(may be modified) and resolve all unresolved
1173         references (URLs, playlist items).
1174
1175         It will also download the videos if 'download'.
1176         Returns the resolved ie_result.
1177         """
1178         result_type = ie_result.get('_type', 'video')
1179
1180         if result_type in ('url', 'url_transparent'):
1181             ie_result['url'] = sanitize_url(ie_result['url'])
1182             extract_flat = self.params.get('extract_flat', False)
1183             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1184                     or extract_flat is True):
1185                 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
1186                 return ie_result
1187
1188         if result_type == 'video':
1189             self.add_extra_info(ie_result, extra_info)
1190             ie_result = self.process_video_result(ie_result, download=download)
1191             additional_urls = ie_result.get('additional_urls')
1192             if additional_urls:
1193                 # TODO: Improve MetadataFromFieldPP to allow setting a list
1194                 if isinstance(additional_urls, compat_str):
1195                     additional_urls = [additional_urls]
1196                 self.to_screen(
1197                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1198                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1199                 ie_result['additional_entries'] = [
1200                     self.extract_info(
1201                         url, download, extra_info,
1202                         force_generic_extractor=self.params.get('force_generic_extractor'))
1203                     for url in additional_urls
1204                 ]
1205             return ie_result
1206         elif result_type == 'url':
1207             # We have to add extra_info to the results because it may be
1208             # contained in a playlist
1209             return self.extract_info(
1210                 ie_result['url'], download,
1211                 ie_key=ie_result.get('ie_key'),
1212                 extra_info=extra_info)
1213         elif result_type == 'url_transparent':
1214             # Use the information from the embedding page
1215             info = self.extract_info(
1216                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1217                 extra_info=extra_info, download=False, process=False)
1218
1219             # extract_info may return None when ignoreerrors is enabled and
1220             # extraction failed with an error, don't crash and return early
1221             # in this case
1222             if not info:
1223                 return info
1224
1225             force_properties = dict(
1226                 (k, v) for k, v in ie_result.items() if v is not None)
1227             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1228                 if f in force_properties:
1229                     del force_properties[f]
1230             new_result = info.copy()
1231             new_result.update(force_properties)
1232
1233             # Extracted info may not be a video result (i.e.
1234             # info.get('_type', 'video') != video) but rather an url or
1235             # url_transparent. In such cases outer metadata (from ie_result)
1236             # should be propagated to inner one (info). For this to happen
1237             # _type of info should be overridden with url_transparent. This
1238             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1239             if new_result.get('_type') == 'url':
1240                 new_result['_type'] = 'url_transparent'
1241
1242             return self.process_ie_result(
1243                 new_result, download=download, extra_info=extra_info)
1244         elif result_type in ('playlist', 'multi_video'):
1245             # Protect from infinite recursion due to recursively nested playlists
1246             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1247             webpage_url = ie_result['webpage_url']
1248             if webpage_url in self._playlist_urls:
1249                 self.to_screen(
1250                     '[download] Skipping already downloaded playlist: %s'
1251                     % ie_result.get('title') or ie_result.get('id'))
1252                 return
1253
1254             self._playlist_level += 1
1255             self._playlist_urls.add(webpage_url)
1256             try:
1257                 return self.__process_playlist(ie_result, download)
1258             finally:
1259                 self._playlist_level -= 1
1260                 if not self._playlist_level:
1261                     self._playlist_urls.clear()
1262         elif result_type == 'compat_list':
1263             self.report_warning(
1264                 'Extractor %s returned a compat_list result. '
1265                 'It needs to be updated.' % ie_result.get('extractor'))
1266
1267             def _fixup(r):
1268                 self.add_extra_info(
1269                     r,
1270                     {
1271                         'extractor': ie_result['extractor'],
1272                         'webpage_url': ie_result['webpage_url'],
1273                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1274                         'extractor_key': ie_result['extractor_key'],
1275                     }
1276                 )
1277                 return r
1278             ie_result['entries'] = [
1279                 self.process_ie_result(_fixup(r), download, extra_info)
1280                 for r in ie_result['entries']
1281             ]
1282             return ie_result
1283         else:
1284             raise Exception('Invalid result type: %s' % result_type)
1285
1286     def _ensure_dir_exists(self, path):
1287         return make_dir(path, self.report_error)
1288
1289     def __process_playlist(self, ie_result, download):
1290         # We process each entry in the playlist
1291         playlist = ie_result.get('title') or ie_result.get('id')
1292         self.to_screen('[download] Downloading playlist: %s' % playlist)
1293
1294         if 'entries' not in ie_result:
1295             raise EntryNotInPlaylist()
1296         incomplete_entries = bool(ie_result.get('requested_entries'))
1297         if incomplete_entries:
1298             def fill_missing_entries(entries, indexes):
1299                 ret = [None] * max(*indexes)
1300                 for i, entry in zip(indexes, entries):
1301                     ret[i - 1] = entry
1302                 return ret
1303             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1304
1305         playlist_results = []
1306
1307         playliststart = self.params.get('playliststart', 1) - 1
1308         playlistend = self.params.get('playlistend')
1309         # For backwards compatibility, interpret -1 as whole list
1310         if playlistend == -1:
1311             playlistend = None
1312
1313         playlistitems_str = self.params.get('playlist_items')
1314         playlistitems = None
1315         if playlistitems_str is not None:
1316             def iter_playlistitems(format):
1317                 for string_segment in format.split(','):
1318                     if '-' in string_segment:
1319                         start, end = string_segment.split('-')
1320                         for item in range(int(start), int(end) + 1):
1321                             yield int(item)
1322                     else:
1323                         yield int(string_segment)
1324             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1325
1326         ie_entries = ie_result['entries']
1327
1328         def make_playlistitems_entries(list_ie_entries):
1329             num_entries = len(list_ie_entries)
1330             for i in playlistitems:
1331                 if -num_entries < i <= num_entries:
1332                     yield list_ie_entries[i - 1]
1333                 elif incomplete_entries:
1334                     raise EntryNotInPlaylist()
1335
1336         if isinstance(ie_entries, list):
1337             n_all_entries = len(ie_entries)
1338             if playlistitems:
1339                 entries = list(make_playlistitems_entries(ie_entries))
1340             else:
1341                 entries = ie_entries[playliststart:playlistend]
1342             n_entries = len(entries)
1343             msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
1344         elif isinstance(ie_entries, PagedList):
1345             if playlistitems:
1346                 entries = []
1347                 for item in playlistitems:
1348                     entries.extend(ie_entries.getslice(
1349                         item - 1, item
1350                     ))
1351             else:
1352                 entries = ie_entries.getslice(
1353                     playliststart, playlistend)
1354             n_entries = len(entries)
1355             msg = 'Downloading %d videos' % n_entries
1356         else:  # iterable
1357             if playlistitems:
1358                 entries = list(make_playlistitems_entries(list(itertools.islice(
1359                     ie_entries, 0, max(playlistitems)))))
1360             else:
1361                 entries = list(itertools.islice(
1362                     ie_entries, playliststart, playlistend))
1363             n_entries = len(entries)
1364             msg = 'Downloading %d videos' % n_entries
1365
1366         if any((entry is None for entry in entries)):
1367             raise EntryNotInPlaylist()
1368         if not playlistitems and (playliststart or playlistend):
1369             playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
1370         ie_result['entries'] = entries
1371         ie_result['requested_entries'] = playlistitems
1372
1373         if self.params.get('allow_playlist_files', True):
1374             ie_copy = {
1375                 'playlist': playlist,
1376                 'playlist_id': ie_result.get('id'),
1377                 'playlist_title': ie_result.get('title'),
1378                 'playlist_uploader': ie_result.get('uploader'),
1379                 'playlist_uploader_id': ie_result.get('uploader_id'),
1380                 'playlist_index': 0,
1381             }
1382             ie_copy.update(dict(ie_result))
1383
1384             if self.params.get('writeinfojson', False):
1385                 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1386                 if not self._ensure_dir_exists(encodeFilename(infofn)):
1387                     return
1388                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1389                     self.to_screen('[info] Playlist metadata is already present')
1390                 else:
1391                     self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1392                     try:
1393                         write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1394                     except (OSError, IOError):
1395                         self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1396
1397             # TODO: This should be passed to ThumbnailsConvertor if necessary
1398             self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1399
1400             if self.params.get('writedescription', False):
1401                 descfn = self.prepare_filename(ie_copy, 'pl_description')
1402                 if not self._ensure_dir_exists(encodeFilename(descfn)):
1403                     return
1404                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1405                     self.to_screen('[info] Playlist description is already present')
1406                 elif ie_result.get('description') is None:
1407                     self.report_warning('There\'s no playlist description to write.')
1408                 else:
1409                     try:
1410                         self.to_screen('[info] Writing playlist description to: ' + descfn)
1411                         with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1412                             descfile.write(ie_result['description'])
1413                     except (OSError, IOError):
1414                         self.report_error('Cannot write playlist description file ' + descfn)
1415                         return
1416
1417         # Save playlist_index before re-ordering
1418         entries = [
1419             ((playlistitems[i - 1] if playlistitems else i), entry)
1420             for i, entry in enumerate(entries, 1)]
1421
1422         if self.params.get('playlistreverse', False):
1423             entries = entries[::-1]
1424         if self.params.get('playlistrandom', False):
1425             random.shuffle(entries)
1426
1427         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1428
1429         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
1430         failures = 0
1431         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1432         for i, entry_tuple in enumerate(entries, 1):
1433             playlist_index, entry = entry_tuple
1434             if 'playlist_index' in self.params.get('compat_options', []):
1435                 playlist_index = playlistitems[i - 1] if playlistitems else i
1436             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1437             # This __x_forwarded_for_ip thing is a bit ugly but requires
1438             # minimal changes
1439             if x_forwarded_for:
1440                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1441             extra = {
1442                 'n_entries': n_entries,
1443                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1444                 'playlist_index': playlist_index,
1445                 'playlist_autonumber': i,
1446                 'playlist': playlist,
1447                 'playlist_id': ie_result.get('id'),
1448                 'playlist_title': ie_result.get('title'),
1449                 'playlist_uploader': ie_result.get('uploader'),
1450                 'playlist_uploader_id': ie_result.get('uploader_id'),
1451                 'extractor': ie_result['extractor'],
1452                 'webpage_url': ie_result['webpage_url'],
1453                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1454                 'extractor_key': ie_result['extractor_key'],
1455             }
1456
1457             if self._match_entry(entry, incomplete=True) is not None:
1458                 continue
1459
1460             entry_result = self.__process_iterable_entry(entry, download, extra)
1461             if not entry_result:
1462                 failures += 1
1463             if failures >= max_failures:
1464                 self.report_error(
1465                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1466                 break
1467             # TODO: skip failed (empty) entries?
1468             playlist_results.append(entry_result)
1469         ie_result['entries'] = playlist_results
1470         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1471         return ie_result
1472
1473     @__handle_extraction_exceptions
1474     def __process_iterable_entry(self, entry, download, extra_info):
1475         return self.process_ie_result(
1476             entry, download=download, extra_info=extra_info)
1477
1478     def _build_format_filter(self, filter_spec):
1479         " Returns a function to filter the formats according to the filter_spec "
1480
1481         OPERATORS = {
1482             '<': operator.lt,
1483             '<=': operator.le,
1484             '>': operator.gt,
1485             '>=': operator.ge,
1486             '=': operator.eq,
1487             '!=': operator.ne,
1488         }
1489         operator_rex = re.compile(r'''(?x)\s*
1490             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1491             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1492             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1493             $
1494             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1495         m = operator_rex.search(filter_spec)
1496         if m:
1497             try:
1498                 comparison_value = int(m.group('value'))
1499             except ValueError:
1500                 comparison_value = parse_filesize(m.group('value'))
1501                 if comparison_value is None:
1502                     comparison_value = parse_filesize(m.group('value') + 'B')
1503                 if comparison_value is None:
1504                     raise ValueError(
1505                         'Invalid value %r in format specification %r' % (
1506                             m.group('value'), filter_spec))
1507             op = OPERATORS[m.group('op')]
1508
1509         if not m:
1510             STR_OPERATORS = {
1511                 '=': operator.eq,
1512                 '^=': lambda attr, value: attr.startswith(value),
1513                 '$=': lambda attr, value: attr.endswith(value),
1514                 '*=': lambda attr, value: value in attr,
1515             }
1516             str_operator_rex = re.compile(r'''(?x)
1517                 \s*(?P<key>[a-zA-Z0-9._-]+)
1518                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1519                 \s*(?P<value>[a-zA-Z0-9._-]+)
1520                 \s*$
1521                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1522             m = str_operator_rex.search(filter_spec)
1523             if m:
1524                 comparison_value = m.group('value')
1525                 str_op = STR_OPERATORS[m.group('op')]
1526                 if m.group('negation'):
1527                     op = lambda attr, value: not str_op(attr, value)
1528                 else:
1529                     op = str_op
1530
1531         if not m:
1532             raise ValueError('Invalid filter specification %r' % filter_spec)
1533
1534         def _filter(f):
1535             actual_value = f.get(m.group('key'))
1536             if actual_value is None:
1537                 return m.group('none_inclusive')
1538             return op(actual_value, comparison_value)
1539         return _filter
1540
1541     def _default_format_spec(self, info_dict, download=True):
1542
1543         def can_merge():
1544             merger = FFmpegMergerPP(self)
1545             return merger.available and merger.can_merge()
1546
1547         prefer_best = (
1548             not self.params.get('simulate', False)
1549             and download
1550             and (
1551                 not can_merge()
1552                 or info_dict.get('is_live', False)
1553                 or self.outtmpl_dict['default'] == '-'))
1554         compat = (
1555             prefer_best
1556             or self.params.get('allow_multiple_audio_streams', False)
1557             or 'format-spec' in self.params.get('compat_opts', []))
1558
1559         return (
1560             'best/bestvideo+bestaudio' if prefer_best
1561             else 'bestvideo*+bestaudio/best' if not compat
1562             else 'bestvideo+bestaudio/best')
1563
1564     def build_format_selector(self, format_spec):
1565         def syntax_error(note, start):
1566             message = (
1567                 'Invalid format specification: '
1568                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1569             return SyntaxError(message)
1570
1571         PICKFIRST = 'PICKFIRST'
1572         MERGE = 'MERGE'
1573         SINGLE = 'SINGLE'
1574         GROUP = 'GROUP'
1575         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1576
1577         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1578                                   'video': self.params.get('allow_multiple_video_streams', False)}
1579
1580         check_formats = self.params.get('check_formats')
1581
1582         def _parse_filter(tokens):
1583             filter_parts = []
1584             for type, string, start, _, _ in tokens:
1585                 if type == tokenize.OP and string == ']':
1586                     return ''.join(filter_parts)
1587                 else:
1588                     filter_parts.append(string)
1589
1590         def _remove_unused_ops(tokens):
1591             # Remove operators that we don't use and join them with the surrounding strings
1592             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1593             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1594             last_string, last_start, last_end, last_line = None, None, None, None
1595             for type, string, start, end, line in tokens:
1596                 if type == tokenize.OP and string == '[':
1597                     if last_string:
1598                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1599                         last_string = None
1600                     yield type, string, start, end, line
1601                     # everything inside brackets will be handled by _parse_filter
1602                     for type, string, start, end, line in tokens:
1603                         yield type, string, start, end, line
1604                         if type == tokenize.OP and string == ']':
1605                             break
1606                 elif type == tokenize.OP and string in ALLOWED_OPS:
1607                     if last_string:
1608                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1609                         last_string = None
1610                     yield type, string, start, end, line
1611                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1612                     if not last_string:
1613                         last_string = string
1614                         last_start = start
1615                         last_end = end
1616                     else:
1617                         last_string += string
1618             if last_string:
1619                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1620
1621         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1622             selectors = []
1623             current_selector = None
1624             for type, string, start, _, _ in tokens:
1625                 # ENCODING is only defined in python 3.x
1626                 if type == getattr(tokenize, 'ENCODING', None):
1627                     continue
1628                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1629                     current_selector = FormatSelector(SINGLE, string, [])
1630                 elif type == tokenize.OP:
1631                     if string == ')':
1632                         if not inside_group:
1633                             # ')' will be handled by the parentheses group
1634                             tokens.restore_last_token()
1635                         break
1636                     elif inside_merge and string in ['/', ',']:
1637                         tokens.restore_last_token()
1638                         break
1639                     elif inside_choice and string == ',':
1640                         tokens.restore_last_token()
1641                         break
1642                     elif string == ',':
1643                         if not current_selector:
1644                             raise syntax_error('"," must follow a format selector', start)
1645                         selectors.append(current_selector)
1646                         current_selector = None
1647                     elif string == '/':
1648                         if not current_selector:
1649                             raise syntax_error('"/" must follow a format selector', start)
1650                         first_choice = current_selector
1651                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1652                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1653                     elif string == '[':
1654                         if not current_selector:
1655                             current_selector = FormatSelector(SINGLE, 'best', [])
1656                         format_filter = _parse_filter(tokens)
1657                         current_selector.filters.append(format_filter)
1658                     elif string == '(':
1659                         if current_selector:
1660                             raise syntax_error('Unexpected "("', start)
1661                         group = _parse_format_selection(tokens, inside_group=True)
1662                         current_selector = FormatSelector(GROUP, group, [])
1663                     elif string == '+':
1664                         if not current_selector:
1665                             raise syntax_error('Unexpected "+"', start)
1666                         selector_1 = current_selector
1667                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1668                         if not selector_2:
1669                             raise syntax_error('Expected a selector', start)
1670                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1671                     else:
1672                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1673                 elif type == tokenize.ENDMARKER:
1674                     break
1675             if current_selector:
1676                 selectors.append(current_selector)
1677             return selectors
1678
1679         def _merge(formats_pair):
1680             format_1, format_2 = formats_pair
1681
1682             formats_info = []
1683             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1684             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1685
1686             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1687                 get_no_more = {"video": False, "audio": False}
1688                 for (i, fmt_info) in enumerate(formats_info):
1689                     for aud_vid in ["audio", "video"]:
1690                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1691                             if get_no_more[aud_vid]:
1692                                 formats_info.pop(i)
1693                             get_no_more[aud_vid] = True
1694
1695             if len(formats_info) == 1:
1696                 return formats_info[0]
1697
1698             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1699             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1700
1701             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1702             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1703
1704             output_ext = self.params.get('merge_output_format')
1705             if not output_ext:
1706                 if the_only_video:
1707                     output_ext = the_only_video['ext']
1708                 elif the_only_audio and not video_fmts:
1709                     output_ext = the_only_audio['ext']
1710                 else:
1711                     output_ext = 'mkv'
1712
1713             new_dict = {
1714                 'requested_formats': formats_info,
1715                 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1716                 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1717                 'ext': output_ext,
1718             }
1719
1720             if the_only_video:
1721                 new_dict.update({
1722                     'width': the_only_video.get('width'),
1723                     'height': the_only_video.get('height'),
1724                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1725                     'fps': the_only_video.get('fps'),
1726                     'vcodec': the_only_video.get('vcodec'),
1727                     'vbr': the_only_video.get('vbr'),
1728                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1729                 })
1730
1731             if the_only_audio:
1732                 new_dict.update({
1733                     'acodec': the_only_audio.get('acodec'),
1734                     'abr': the_only_audio.get('abr'),
1735                 })
1736
1737             return new_dict
1738
1739         def _check_formats(formats):
1740             for f in formats:
1741                 self.to_screen('[info] Testing format %s' % f['format_id'])
1742                 paths = self.params.get('paths', {})
1743                 temp_file = os.path.join(
1744                     expand_path(paths.get('home', '').strip()),
1745                     expand_path(paths.get('temp', '').strip()),
1746                     'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
1747                 try:
1748                     dl, _ = self.dl(temp_file, f, test=True)
1749                 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
1750                     dl = False
1751                 finally:
1752                     if os.path.exists(temp_file):
1753                         os.remove(temp_file)
1754                 if dl:
1755                     yield f
1756                 else:
1757                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1758
1759         def _build_selector_function(selector):
1760             if isinstance(selector, list):  # ,
1761                 fs = [_build_selector_function(s) for s in selector]
1762
1763                 def selector_function(ctx):
1764                     for f in fs:
1765                         for format in f(ctx):
1766                             yield format
1767                 return selector_function
1768
1769             elif selector.type == GROUP:  # ()
1770                 selector_function = _build_selector_function(selector.selector)
1771
1772             elif selector.type == PICKFIRST:  # /
1773                 fs = [_build_selector_function(s) for s in selector.selector]
1774
1775                 def selector_function(ctx):
1776                     for f in fs:
1777                         picked_formats = list(f(ctx))
1778                         if picked_formats:
1779                             return picked_formats
1780                     return []
1781
1782             elif selector.type == SINGLE:  # atom
1783                 format_spec = selector.selector or 'best'
1784
1785                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1786                 if format_spec == 'all':
1787                     def selector_function(ctx):
1788                         formats = list(ctx['formats'])
1789                         if check_formats:
1790                             formats = _check_formats(formats)
1791                         for f in formats:
1792                             yield f
1793                 elif format_spec == 'mergeall':
1794                     def selector_function(ctx):
1795                         formats = list(_check_formats(ctx['formats']))
1796                         if not formats:
1797                             return
1798                         merged_format = formats[-1]
1799                         for f in formats[-2::-1]:
1800                             merged_format = _merge((merged_format, f))
1801                         yield merged_format
1802
1803                 else:
1804                     format_fallback, format_reverse, format_idx = False, True, 1
1805                     mobj = re.match(
1806                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1807                         format_spec)
1808                     if mobj is not None:
1809                         format_idx = int_or_none(mobj.group('n'), default=1)
1810                         format_reverse = mobj.group('bw')[0] == 'b'
1811                         format_type = (mobj.group('type') or [None])[0]
1812                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1813                         format_modified = mobj.group('mod') is not None
1814
1815                         format_fallback = not format_type and not format_modified  # for b, w
1816                         filter_f = (
1817                             (lambda f: f.get('%scodec' % format_type) != 'none')
1818                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1819                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1820                             if format_type  # bv, ba, wv, wa
1821                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1822                             if not format_modified  # b, w
1823                             else None)  # b*, w*
1824                     else:
1825                         filter_f = ((lambda f: f.get('ext') == format_spec)
1826                                     if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
1827                                     else (lambda f: f.get('format_id') == format_spec))  # id
1828
1829                     def selector_function(ctx):
1830                         formats = list(ctx['formats'])
1831                         if not formats:
1832                             return
1833                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
1834                         if format_fallback and ctx['incomplete_formats'] and not matches:
1835                             # for extractors with incomplete formats (audio only (soundcloud)
1836                             # or video only (imgur)) best/worst will fallback to
1837                             # best/worst {video,audio}-only format
1838                             matches = formats
1839                         if format_reverse:
1840                             matches = matches[::-1]
1841                         if check_formats:
1842                             matches = list(itertools.islice(_check_formats(matches), format_idx))
1843                         n = len(matches)
1844                         if -n <= format_idx - 1 < n:
1845                             yield matches[format_idx - 1]
1846
1847             elif selector.type == MERGE:        # +
1848                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1849
1850                 def selector_function(ctx):
1851                     for pair in itertools.product(
1852                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1853                         yield _merge(pair)
1854
1855             filters = [self._build_format_filter(f) for f in selector.filters]
1856
1857             def final_selector(ctx):
1858                 ctx_copy = copy.deepcopy(ctx)
1859                 for _filter in filters:
1860                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1861                 return selector_function(ctx_copy)
1862             return final_selector
1863
1864         stream = io.BytesIO(format_spec.encode('utf-8'))
1865         try:
1866             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1867         except tokenize.TokenError:
1868             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1869
1870         class TokenIterator(object):
1871             def __init__(self, tokens):
1872                 self.tokens = tokens
1873                 self.counter = 0
1874
1875             def __iter__(self):
1876                 return self
1877
1878             def __next__(self):
1879                 if self.counter >= len(self.tokens):
1880                     raise StopIteration()
1881                 value = self.tokens[self.counter]
1882                 self.counter += 1
1883                 return value
1884
1885             next = __next__
1886
1887             def restore_last_token(self):
1888                 self.counter -= 1
1889
1890         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1891         return _build_selector_function(parsed_selector)
1892
1893     def _calc_headers(self, info_dict):
1894         res = std_headers.copy()
1895
1896         add_headers = info_dict.get('http_headers')
1897         if add_headers:
1898             res.update(add_headers)
1899
1900         cookies = self._calc_cookies(info_dict)
1901         if cookies:
1902             res['Cookie'] = cookies
1903
1904         if 'X-Forwarded-For' not in res:
1905             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1906             if x_forwarded_for_ip:
1907                 res['X-Forwarded-For'] = x_forwarded_for_ip
1908
1909         return res
1910
1911     def _calc_cookies(self, info_dict):
1912         pr = sanitized_Request(info_dict['url'])
1913         self.cookiejar.add_cookie_header(pr)
1914         return pr.get_header('Cookie')
1915
1916     def process_video_result(self, info_dict, download=True):
1917         assert info_dict.get('_type', 'video') == 'video'
1918
1919         if 'id' not in info_dict:
1920             raise ExtractorError('Missing "id" field in extractor result')
1921         if 'title' not in info_dict:
1922             raise ExtractorError('Missing "title" field in extractor result')
1923
1924         def report_force_conversion(field, field_not, conversion):
1925             self.report_warning(
1926                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1927                 % (field, field_not, conversion))
1928
1929         def sanitize_string_field(info, string_field):
1930             field = info.get(string_field)
1931             if field is None or isinstance(field, compat_str):
1932                 return
1933             report_force_conversion(string_field, 'a string', 'string')
1934             info[string_field] = compat_str(field)
1935
1936         def sanitize_numeric_fields(info):
1937             for numeric_field in self._NUMERIC_FIELDS:
1938                 field = info.get(numeric_field)
1939                 if field is None or isinstance(field, compat_numeric_types):
1940                     continue
1941                 report_force_conversion(numeric_field, 'numeric', 'int')
1942                 info[numeric_field] = int_or_none(field)
1943
1944         sanitize_string_field(info_dict, 'id')
1945         sanitize_numeric_fields(info_dict)
1946
1947         if 'playlist' not in info_dict:
1948             # It isn't part of a playlist
1949             info_dict['playlist'] = None
1950             info_dict['playlist_index'] = None
1951
1952         thumbnails = info_dict.get('thumbnails')
1953         if thumbnails is None:
1954             thumbnail = info_dict.get('thumbnail')
1955             if thumbnail:
1956                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1957         if thumbnails:
1958             thumbnails.sort(key=lambda t: (
1959                 t.get('preference') if t.get('preference') is not None else -1,
1960                 t.get('width') if t.get('width') is not None else -1,
1961                 t.get('height') if t.get('height') is not None else -1,
1962                 t.get('id') if t.get('id') is not None else '',
1963                 t.get('url')))
1964             for i, t in enumerate(thumbnails):
1965                 t['url'] = sanitize_url(t['url'])
1966                 if t.get('width') and t.get('height'):
1967                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1968                 if t.get('id') is None:
1969                     t['id'] = '%d' % i
1970
1971         if self.params.get('list_thumbnails'):
1972             self.list_thumbnails(info_dict)
1973             return
1974
1975         thumbnail = info_dict.get('thumbnail')
1976         if thumbnail:
1977             info_dict['thumbnail'] = sanitize_url(thumbnail)
1978         elif thumbnails:
1979             info_dict['thumbnail'] = thumbnails[-1]['url']
1980
1981         if 'display_id' not in info_dict and 'id' in info_dict:
1982             info_dict['display_id'] = info_dict['id']
1983
1984         for ts_key, date_key in (
1985                 ('timestamp', 'upload_date'),
1986                 ('release_timestamp', 'release_date'),
1987         ):
1988             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1989                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1990                 # see http://bugs.python.org/issue1646728)
1991                 try:
1992                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1993                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
1994                 except (ValueError, OverflowError, OSError):
1995                     pass
1996
1997         # Auto generate title fields corresponding to the *_number fields when missing
1998         # in order to always have clean titles. This is very common for TV series.
1999         for field in ('chapter', 'season', 'episode'):
2000             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2001                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2002
2003         for cc_kind in ('subtitles', 'automatic_captions'):
2004             cc = info_dict.get(cc_kind)
2005             if cc:
2006                 for _, subtitle in cc.items():
2007                     for subtitle_format in subtitle:
2008                         if subtitle_format.get('url'):
2009                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2010                         if subtitle_format.get('ext') is None:
2011                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2012
2013         automatic_captions = info_dict.get('automatic_captions')
2014         subtitles = info_dict.get('subtitles')
2015
2016         if self.params.get('listsubtitles', False):
2017             if 'automatic_captions' in info_dict:
2018                 self.list_subtitles(
2019                     info_dict['id'], automatic_captions, 'automatic captions')
2020             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2021             return
2022
2023         info_dict['requested_subtitles'] = self.process_subtitles(
2024             info_dict['id'], subtitles, automatic_captions)
2025
2026         # We now pick which formats have to be downloaded
2027         if info_dict.get('formats') is None:
2028             # There's only one format available
2029             formats = [info_dict]
2030         else:
2031             formats = info_dict['formats']
2032
2033         if not formats:
2034             if not self.params.get('ignore_no_formats_error'):
2035                 raise ExtractorError('No video formats found!')
2036             else:
2037                 self.report_warning('No video formats found!')
2038
2039         def is_wellformed(f):
2040             url = f.get('url')
2041             if not url:
2042                 self.report_warning(
2043                     '"url" field is missing or empty - skipping format, '
2044                     'there is an error in extractor')
2045                 return False
2046             if isinstance(url, bytes):
2047                 sanitize_string_field(f, 'url')
2048             return True
2049
2050         # Filter out malformed formats for better extraction robustness
2051         formats = list(filter(is_wellformed, formats))
2052
2053         formats_dict = {}
2054
2055         # We check that all the formats have the format and format_id fields
2056         for i, format in enumerate(formats):
2057             sanitize_string_field(format, 'format_id')
2058             sanitize_numeric_fields(format)
2059             format['url'] = sanitize_url(format['url'])
2060             if not format.get('format_id'):
2061                 format['format_id'] = compat_str(i)
2062             else:
2063                 # Sanitize format_id from characters used in format selector expression
2064                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2065             format_id = format['format_id']
2066             if format_id not in formats_dict:
2067                 formats_dict[format_id] = []
2068             formats_dict[format_id].append(format)
2069
2070         # Make sure all formats have unique format_id
2071         for format_id, ambiguous_formats in formats_dict.items():
2072             if len(ambiguous_formats) > 1:
2073                 for i, format in enumerate(ambiguous_formats):
2074                     format['format_id'] = '%s-%d' % (format_id, i)
2075
2076         for i, format in enumerate(formats):
2077             if format.get('format') is None:
2078                 format['format'] = '{id} - {res}{note}'.format(
2079                     id=format['format_id'],
2080                     res=self.format_resolution(format),
2081                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
2082                 )
2083             # Automatically determine file extension if missing
2084             if format.get('ext') is None:
2085                 format['ext'] = determine_ext(format['url']).lower()
2086             # Automatically determine protocol if missing (useful for format
2087             # selection purposes)
2088             if format.get('protocol') is None:
2089                 format['protocol'] = determine_protocol(format)
2090             # Add HTTP headers, so that external programs can use them from the
2091             # json output
2092             full_format_info = info_dict.copy()
2093             full_format_info.update(format)
2094             format['http_headers'] = self._calc_headers(full_format_info)
2095         # Remove private housekeeping stuff
2096         if '__x_forwarded_for_ip' in info_dict:
2097             del info_dict['__x_forwarded_for_ip']
2098
2099         # TODO Central sorting goes here
2100
2101         if formats and formats[0] is not info_dict:
2102             # only set the 'formats' fields if the original info_dict list them
2103             # otherwise we end up with a circular reference, the first (and unique)
2104             # element in the 'formats' field in info_dict is info_dict itself,
2105             # which can't be exported to json
2106             info_dict['formats'] = formats
2107
2108         info_dict, _ = self.pre_process(info_dict)
2109
2110         if self.params.get('listformats'):
2111             if not info_dict.get('formats'):
2112                 raise ExtractorError('No video formats found', expected=True)
2113             self.list_formats(info_dict)
2114             return
2115
2116         req_format = self.params.get('format')
2117         if req_format is None:
2118             req_format = self._default_format_spec(info_dict, download=download)
2119             self.write_debug('Default format spec: %s' % req_format)
2120
2121         format_selector = self.build_format_selector(req_format)
2122
2123         # While in format selection we may need to have an access to the original
2124         # format set in order to calculate some metrics or do some processing.
2125         # For now we need to be able to guess whether original formats provided
2126         # by extractor are incomplete or not (i.e. whether extractor provides only
2127         # video-only or audio-only formats) for proper formats selection for
2128         # extractors with such incomplete formats (see
2129         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2130         # Since formats may be filtered during format selection and may not match
2131         # the original formats the results may be incorrect. Thus original formats
2132         # or pre-calculated metrics should be passed to format selection routines
2133         # as well.
2134         # We will pass a context object containing all necessary additional data
2135         # instead of just formats.
2136         # This fixes incorrect format selection issue (see
2137         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2138         incomplete_formats = (
2139             # All formats are video-only or
2140             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2141             # all formats are audio-only
2142             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2143
2144         ctx = {
2145             'formats': formats,
2146             'incomplete_formats': incomplete_formats,
2147         }
2148
2149         formats_to_download = list(format_selector(ctx))
2150         if not formats_to_download:
2151             if not self.params.get('ignore_no_formats_error'):
2152                 raise ExtractorError('Requested format is not available', expected=True)
2153             else:
2154                 self.report_warning('Requested format is not available')
2155         elif download:
2156             self.to_screen(
2157                 '[info] %s: Downloading %d format(s): %s' % (
2158                     info_dict['id'], len(formats_to_download),
2159                     ", ".join([f['format_id'] for f in formats_to_download])))
2160             for fmt in formats_to_download:
2161                 new_info = dict(info_dict)
2162                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2163                 new_info['__original_infodict'] = info_dict
2164                 new_info.update(fmt)
2165                 self.process_info(new_info)
2166         # We update the info dict with the best quality format (backwards compatibility)
2167         if formats_to_download:
2168             info_dict.update(formats_to_download[-1])
2169         return info_dict
2170
2171     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2172         """Select the requested subtitles and their format"""
2173         available_subs = {}
2174         if normal_subtitles and self.params.get('writesubtitles'):
2175             available_subs.update(normal_subtitles)
2176         if automatic_captions and self.params.get('writeautomaticsub'):
2177             for lang, cap_info in automatic_captions.items():
2178                 if lang not in available_subs:
2179                     available_subs[lang] = cap_info
2180
2181         if (not self.params.get('writesubtitles') and not
2182                 self.params.get('writeautomaticsub') or not
2183                 available_subs):
2184             return None
2185
2186         all_sub_langs = available_subs.keys()
2187         if self.params.get('allsubtitles', False):
2188             requested_langs = all_sub_langs
2189         elif self.params.get('subtitleslangs', False):
2190             requested_langs = set()
2191             for lang in self.params.get('subtitleslangs'):
2192                 if lang == 'all':
2193                     requested_langs.update(all_sub_langs)
2194                     continue
2195                 discard = lang[0] == '-'
2196                 if discard:
2197                     lang = lang[1:]
2198                 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2199                 if discard:
2200                     for lang in current_langs:
2201                         requested_langs.discard(lang)
2202                 else:
2203                     requested_langs.update(current_langs)
2204         elif 'en' in available_subs:
2205             requested_langs = ['en']
2206         else:
2207             requested_langs = [list(all_sub_langs)[0]]
2208         self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2209
2210         formats_query = self.params.get('subtitlesformat', 'best')
2211         formats_preference = formats_query.split('/') if formats_query else []
2212         subs = {}
2213         for lang in requested_langs:
2214             formats = available_subs.get(lang)
2215             if formats is None:
2216                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2217                 continue
2218             for ext in formats_preference:
2219                 if ext == 'best':
2220                     f = formats[-1]
2221                     break
2222                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2223                 if matches:
2224                     f = matches[-1]
2225                     break
2226             else:
2227                 f = formats[-1]
2228                 self.report_warning(
2229                     'No subtitle format found matching "%s" for language %s, '
2230                     'using %s' % (formats_query, lang, f['ext']))
2231             subs[lang] = f
2232         return subs
2233
2234     def __forced_printings(self, info_dict, filename, incomplete):
2235         def print_mandatory(field, actual_field=None):
2236             if actual_field is None:
2237                 actual_field = field
2238             if (self.params.get('force%s' % field, False)
2239                     and (not incomplete or info_dict.get(actual_field) is not None)):
2240                 self.to_stdout(info_dict[actual_field])
2241
2242         def print_optional(field):
2243             if (self.params.get('force%s' % field, False)
2244                     and info_dict.get(field) is not None):
2245                 self.to_stdout(info_dict[field])
2246
2247         info_dict = info_dict.copy()
2248         if filename is not None:
2249             info_dict['filename'] = filename
2250         if info_dict.get('requested_formats') is not None:
2251             # For RTMP URLs, also include the playpath
2252             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2253         elif 'url' in info_dict:
2254             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2255
2256         for tmpl in self.params.get('forceprint', []):
2257             if re.match(r'\w+$', tmpl):
2258                 tmpl = '%({})s'.format(tmpl)
2259             tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2260             self.to_stdout(tmpl % info_copy)
2261
2262         print_mandatory('title')
2263         print_mandatory('id')
2264         print_mandatory('url', 'urls')
2265         print_optional('thumbnail')
2266         print_optional('description')
2267         print_optional('filename')
2268         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2269             self.to_stdout(formatSeconds(info_dict['duration']))
2270         print_mandatory('format')
2271
2272         if self.params.get('forcejson', False):
2273             self.post_extract(info_dict)
2274             self.to_stdout(json.dumps(info_dict, default=repr))
2275
2276     def dl(self, name, info, subtitle=False, test=False):
2277
2278         if test:
2279             verbose = self.params.get('verbose')
2280             params = {
2281                 'test': True,
2282                 'quiet': not verbose,
2283                 'verbose': verbose,
2284                 'noprogress': not verbose,
2285                 'nopart': True,
2286                 'skip_unavailable_fragments': False,
2287                 'keep_fragments': False,
2288                 'overwrites': True,
2289                 '_no_ytdl_file': True,
2290             }
2291         else:
2292             params = self.params
2293         fd = get_suitable_downloader(info, params)(self, params)
2294         if not test:
2295             for ph in self._progress_hooks:
2296                 fd.add_progress_hook(ph)
2297             self.write_debug('Invoking downloader on %r' % info.get('url'))
2298         new_info = dict(info)
2299         if new_info.get('http_headers') is None:
2300             new_info['http_headers'] = self._calc_headers(new_info)
2301         return fd.download(name, new_info, subtitle)
2302
2303     def process_info(self, info_dict):
2304         """Process a single resolved IE result."""
2305
2306         assert info_dict.get('_type', 'video') == 'video'
2307
2308         info_dict.setdefault('__postprocessors', [])
2309
2310         max_downloads = self.params.get('max_downloads')
2311         if max_downloads is not None:
2312             if self._num_downloads >= int(max_downloads):
2313                 raise MaxDownloadsReached()
2314
2315         # TODO: backward compatibility, to be removed
2316         info_dict['fulltitle'] = info_dict['title']
2317
2318         if 'format' not in info_dict:
2319             info_dict['format'] = info_dict['ext']
2320
2321         if self._match_entry(info_dict, incomplete=False) is not None:
2322             return
2323
2324         self.post_extract(info_dict)
2325         self._num_downloads += 1
2326
2327         # info_dict['_filename'] needs to be set for backward compatibility
2328         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2329         temp_filename = self.prepare_filename(info_dict, 'temp')
2330         files_to_move = {}
2331
2332         # Forced printings
2333         self.__forced_printings(info_dict, full_filename, incomplete=False)
2334
2335         if self.params.get('simulate', False):
2336             if self.params.get('force_write_download_archive', False):
2337                 self.record_download_archive(info_dict)
2338
2339             # Do nothing else if in simulate mode
2340             return
2341
2342         if full_filename is None:
2343             return
2344
2345         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2346             return
2347         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2348             return
2349
2350         if self.params.get('writedescription', False):
2351             descfn = self.prepare_filename(info_dict, 'description')
2352             if not self._ensure_dir_exists(encodeFilename(descfn)):
2353                 return
2354             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
2355                 self.to_screen('[info] Video description is already present')
2356             elif info_dict.get('description') is None:
2357                 self.report_warning('There\'s no description to write.')
2358             else:
2359                 try:
2360                     self.to_screen('[info] Writing video description to: ' + descfn)
2361                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2362                         descfile.write(info_dict['description'])
2363                 except (OSError, IOError):
2364                     self.report_error('Cannot write description file ' + descfn)
2365                     return
2366
2367         if self.params.get('writeannotations', False):
2368             annofn = self.prepare_filename(info_dict, 'annotation')
2369             if not self._ensure_dir_exists(encodeFilename(annofn)):
2370                 return
2371             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2372                 self.to_screen('[info] Video annotations are already present')
2373             elif not info_dict.get('annotations'):
2374                 self.report_warning('There are no annotations to write.')
2375             else:
2376                 try:
2377                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2378                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2379                         annofile.write(info_dict['annotations'])
2380                 except (KeyError, TypeError):
2381                     self.report_warning('There are no annotations to write.')
2382                 except (OSError, IOError):
2383                     self.report_error('Cannot write annotations file: ' + annofn)
2384                     return
2385
2386         subtitles_are_requested = any([self.params.get('writesubtitles', False),
2387                                        self.params.get('writeautomaticsub')])
2388
2389         if subtitles_are_requested and info_dict.get('requested_subtitles'):
2390             # subtitles download errors are already managed as troubles in relevant IE
2391             # that way it will silently go on when used with unsupporting IE
2392             subtitles = info_dict['requested_subtitles']
2393             # ie = self.get_info_extractor(info_dict['extractor_key'])
2394             for sub_lang, sub_info in subtitles.items():
2395                 sub_format = sub_info['ext']
2396                 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2397                 sub_filename_final = subtitles_filename(
2398                     self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
2399                 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
2400                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
2401                     sub_info['filepath'] = sub_filename
2402                     files_to_move[sub_filename] = sub_filename_final
2403                 else:
2404                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
2405                     if sub_info.get('data') is not None:
2406                         try:
2407                             # Use newline='' to prevent conversion of newline characters
2408                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
2409                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2410                                 subfile.write(sub_info['data'])
2411                             sub_info['filepath'] = sub_filename
2412                             files_to_move[sub_filename] = sub_filename_final
2413                         except (OSError, IOError):
2414                             self.report_error('Cannot write subtitles file ' + sub_filename)
2415                             return
2416                     else:
2417                         try:
2418                             self.dl(sub_filename, sub_info.copy(), subtitle=True)
2419                             sub_info['filepath'] = sub_filename
2420                             files_to_move[sub_filename] = sub_filename_final
2421                         except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
2422                             self.report_warning('Unable to download subtitle for "%s": %s' %
2423                                                 (sub_lang, error_to_compat_str(err)))
2424                             continue
2425
2426         if self.params.get('writeinfojson', False):
2427             infofn = self.prepare_filename(info_dict, 'infojson')
2428             if not self._ensure_dir_exists(encodeFilename(infofn)):
2429                 return
2430             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
2431                 self.to_screen('[info] Video metadata is already present')
2432             else:
2433                 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
2434                 try:
2435                     write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
2436                 except (OSError, IOError):
2437                     self.report_error('Cannot write video metadata to JSON file ' + infofn)
2438                     return
2439             info_dict['__infojson_filename'] = infofn
2440
2441         for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2442             thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2443             thumb_filename = replace_extension(
2444                 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
2445             files_to_move[thumb_filename_temp] = thumb_filename
2446
2447         # Write internet shortcut files
2448         url_link = webloc_link = desktop_link = False
2449         if self.params.get('writelink', False):
2450             if sys.platform == "darwin":  # macOS.
2451                 webloc_link = True
2452             elif sys.platform.startswith("linux"):
2453                 desktop_link = True
2454             else:  # if sys.platform in ['win32', 'cygwin']:
2455                 url_link = True
2456         if self.params.get('writeurllink', False):
2457             url_link = True
2458         if self.params.get('writewebloclink', False):
2459             webloc_link = True
2460         if self.params.get('writedesktoplink', False):
2461             desktop_link = True
2462
2463         if url_link or webloc_link or desktop_link:
2464             if 'webpage_url' not in info_dict:
2465                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2466                 return
2467             ascii_url = iri_to_uri(info_dict['webpage_url'])
2468
2469         def _write_link_file(extension, template, newline, embed_filename):
2470             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2471             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2472                 self.to_screen('[info] Internet shortcut is already present')
2473             else:
2474                 try:
2475                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2476                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2477                         template_vars = {'url': ascii_url}
2478                         if embed_filename:
2479                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2480                         linkfile.write(template % template_vars)
2481                 except (OSError, IOError):
2482                     self.report_error('Cannot write internet shortcut ' + linkfn)
2483                     return False
2484             return True
2485
2486         if url_link:
2487             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2488                 return
2489         if webloc_link:
2490             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2491                 return
2492         if desktop_link:
2493             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2494                 return
2495
2496         try:
2497             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2498         except PostProcessingError as err:
2499             self.report_error('Preprocessing: %s' % str(err))
2500             return
2501
2502         must_record_download_archive = False
2503         if self.params.get('skip_download', False):
2504             info_dict['filepath'] = temp_filename
2505             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2506             info_dict['__files_to_move'] = files_to_move
2507             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2508         else:
2509             # Download
2510             try:
2511
2512                 def existing_file(*filepaths):
2513                     ext = info_dict.get('ext')
2514                     final_ext = self.params.get('final_ext', ext)
2515                     existing_files = []
2516                     for file in orderedSet(filepaths):
2517                         if final_ext != ext:
2518                             converted = replace_extension(file, final_ext, ext)
2519                             if os.path.exists(encodeFilename(converted)):
2520                                 existing_files.append(converted)
2521                         if os.path.exists(encodeFilename(file)):
2522                             existing_files.append(file)
2523
2524                     if not existing_files or self.params.get('overwrites', False):
2525                         for file in orderedSet(existing_files):
2526                             self.report_file_delete(file)
2527                             os.remove(encodeFilename(file))
2528                         return None
2529
2530                     self.report_file_already_downloaded(existing_files[0])
2531                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2532                     return existing_files[0]
2533
2534                 success = True
2535                 if info_dict.get('requested_formats') is not None:
2536                     downloaded = []
2537                     merger = FFmpegMergerPP(self)
2538                     if self.params.get('allow_unplayable_formats'):
2539                         self.report_warning(
2540                             'You have requested merging of multiple formats '
2541                             'while also allowing unplayable formats to be downloaded. '
2542                             'The formats won\'t be merged to prevent data corruption.')
2543                     elif not merger.available:
2544                         self.report_warning(
2545                             'You have requested merging of multiple formats but ffmpeg is not installed. '
2546                             'The formats won\'t be merged.')
2547
2548                     def compatible_formats(formats):
2549                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2550                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2551                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2552                         if len(video_formats) > 2 or len(audio_formats) > 2:
2553                             return False
2554
2555                         # Check extension
2556                         exts = set(format.get('ext') for format in formats)
2557                         COMPATIBLE_EXTS = (
2558                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2559                             set(('webm',)),
2560                         )
2561                         for ext_sets in COMPATIBLE_EXTS:
2562                             if ext_sets.issuperset(exts):
2563                                 return True
2564                         # TODO: Check acodec/vcodec
2565                         return False
2566
2567                     requested_formats = info_dict['requested_formats']
2568                     old_ext = info_dict['ext']
2569                     if self.params.get('merge_output_format') is None:
2570                         if not compatible_formats(requested_formats):
2571                             info_dict['ext'] = 'mkv'
2572                             self.report_warning(
2573                                 'Requested formats are incompatible for merge and will be merged into mkv.')
2574                         if (info_dict['ext'] == 'webm'
2575                                 and self.params.get('writethumbnail', False)
2576                                 and info_dict.get('thumbnails')):
2577                             info_dict['ext'] = 'mkv'
2578                             self.report_warning(
2579                                 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
2580
2581                     def correct_ext(filename):
2582                         filename_real_ext = os.path.splitext(filename)[1][1:]
2583                         filename_wo_ext = (
2584                             os.path.splitext(filename)[0]
2585                             if filename_real_ext == old_ext
2586                             else filename)
2587                         return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2588
2589                     # Ensure filename always has a correct extension for successful merge
2590                     full_filename = correct_ext(full_filename)
2591                     temp_filename = correct_ext(temp_filename)
2592                     dl_filename = existing_file(full_filename, temp_filename)
2593                     info_dict['__real_download'] = False
2594                     if dl_filename is None:
2595                         for f in requested_formats:
2596                             new_info = dict(info_dict)
2597                             new_info.update(f)
2598                             fname = prepend_extension(
2599                                 self.prepare_filename(new_info, 'temp'),
2600                                 'f%s' % f['format_id'], new_info['ext'])
2601                             if not self._ensure_dir_exists(fname):
2602                                 return
2603                             downloaded.append(fname)
2604                             partial_success, real_download = self.dl(fname, new_info)
2605                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2606                             success = success and partial_success
2607                         if merger.available and not self.params.get('allow_unplayable_formats'):
2608                             info_dict['__postprocessors'].append(merger)
2609                             info_dict['__files_to_merge'] = downloaded
2610                             # Even if there were no downloads, it is being merged only now
2611                             info_dict['__real_download'] = True
2612                         else:
2613                             for file in downloaded:
2614                                 files_to_move[file] = None
2615                 else:
2616                     # Just a single file
2617                     dl_filename = existing_file(full_filename, temp_filename)
2618                     if dl_filename is None:
2619                         success, real_download = self.dl(temp_filename, info_dict)
2620                         info_dict['__real_download'] = real_download
2621
2622                 dl_filename = dl_filename or temp_filename
2623                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2624
2625             except network_exceptions as err:
2626                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2627                 return
2628             except (OSError, IOError) as err:
2629                 raise UnavailableVideoError(err)
2630             except (ContentTooShortError, ) as err:
2631                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2632                 return
2633
2634             if success and full_filename != '-':
2635                 # Fixup content
2636                 fixup_policy = self.params.get('fixup')
2637                 if fixup_policy is None:
2638                     fixup_policy = 'detect_or_warn'
2639
2640                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
2641
2642                 stretched_ratio = info_dict.get('stretched_ratio')
2643                 if stretched_ratio is not None and stretched_ratio != 1:
2644                     if fixup_policy == 'warn':
2645                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2646                             info_dict['id'], stretched_ratio))
2647                     elif fixup_policy == 'detect_or_warn':
2648                         stretched_pp = FFmpegFixupStretchedPP(self)
2649                         if stretched_pp.available:
2650                             info_dict['__postprocessors'].append(stretched_pp)
2651                         else:
2652                             self.report_warning(
2653                                 '%s: Non-uniform pixel ratio (%s). %s'
2654                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2655                     else:
2656                         assert fixup_policy in ('ignore', 'never')
2657
2658                 if (info_dict.get('requested_formats') is None
2659                         and info_dict.get('container') == 'm4a_dash'
2660                         and info_dict.get('ext') == 'm4a'):
2661                     if fixup_policy == 'warn':
2662                         self.report_warning(
2663                             '%s: writing DASH m4a. '
2664                             'Only some players support this container.'
2665                             % info_dict['id'])
2666                     elif fixup_policy == 'detect_or_warn':
2667                         fixup_pp = FFmpegFixupM4aPP(self)
2668                         if fixup_pp.available:
2669                             info_dict['__postprocessors'].append(fixup_pp)
2670                         else:
2671                             self.report_warning(
2672                                 '%s: writing DASH m4a. '
2673                                 'Only some players support this container. %s'
2674                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2675                     else:
2676                         assert fixup_policy in ('ignore', 'never')
2677
2678                 if ('protocol' in info_dict
2679                         and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
2680                     if fixup_policy == 'warn':
2681                         self.report_warning('%s: malformed AAC bitstream detected.' % (
2682                             info_dict['id']))
2683                     elif fixup_policy == 'detect_or_warn':
2684                         fixup_pp = FFmpegFixupM3u8PP(self)
2685                         if fixup_pp.available:
2686                             info_dict['__postprocessors'].append(fixup_pp)
2687                         else:
2688                             self.report_warning(
2689                                 '%s: malformed AAC bitstream detected. %s'
2690                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2691                     else:
2692                         assert fixup_policy in ('ignore', 'never')
2693
2694                 try:
2695                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2696                 except PostProcessingError as err:
2697                     self.report_error('Postprocessing: %s' % str(err))
2698                     return
2699                 try:
2700                     for ph in self._post_hooks:
2701                         ph(info_dict['filepath'])
2702                 except Exception as err:
2703                     self.report_error('post hooks: %s' % str(err))
2704                     return
2705                 must_record_download_archive = True
2706
2707         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2708             self.record_download_archive(info_dict)
2709         max_downloads = self.params.get('max_downloads')
2710         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2711             raise MaxDownloadsReached()
2712
2713     def download(self, url_list):
2714         """Download a given list of URLs."""
2715         outtmpl = self.outtmpl_dict['default']
2716         if (len(url_list) > 1
2717                 and outtmpl != '-'
2718                 and '%' not in outtmpl
2719                 and self.params.get('max_downloads') != 1):
2720             raise SameFileError(outtmpl)
2721
2722         for url in url_list:
2723             try:
2724                 # It also downloads the videos
2725                 res = self.extract_info(
2726                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2727             except UnavailableVideoError:
2728                 self.report_error('unable to download video')
2729             except MaxDownloadsReached:
2730                 self.to_screen('[info] Maximum number of downloaded files reached')
2731                 raise
2732             except ExistingVideoReached:
2733                 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
2734                 raise
2735             except RejectedVideoReached:
2736                 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
2737                 raise
2738             else:
2739                 if self.params.get('dump_single_json', False):
2740                     self.post_extract(res)
2741                     self.to_stdout(json.dumps(res, default=repr))
2742
2743         return self._download_retcode
2744
2745     def download_with_info_file(self, info_filename):
2746         with contextlib.closing(fileinput.FileInput(
2747                 [info_filename], mode='r',
2748                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2749             # FileInput doesn't have a read method, we can't call json.load
2750             info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2751         try:
2752             self.process_ie_result(info, download=True)
2753         except (DownloadError, EntryNotInPlaylist):
2754             webpage_url = info.get('webpage_url')
2755             if webpage_url is not None:
2756                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2757                 return self.download([webpage_url])
2758             else:
2759                 raise
2760         return self._download_retcode
2761
2762     @staticmethod
2763     def filter_requested_info(info_dict, actually_filter=True):
2764         info_dict.pop('__original_infodict', None)  # Always remove this
2765         if not actually_filter:
2766             info_dict['epoch'] = int(time.time())
2767             return info_dict
2768         exceptions = {
2769             'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
2770             'keep': ['_type'],
2771         }
2772         keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
2773         filter_fn = lambda obj: (
2774             list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
2775             else obj if not isinstance(obj, dict)
2776             else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
2777         return filter_fn(info_dict)
2778
2779     def run_pp(self, pp, infodict):
2780         files_to_delete = []
2781         if '__files_to_move' not in infodict:
2782             infodict['__files_to_move'] = {}
2783         files_to_delete, infodict = pp.run(infodict)
2784         if not files_to_delete:
2785             return infodict
2786
2787         if self.params.get('keepvideo', False):
2788             for f in files_to_delete:
2789                 infodict['__files_to_move'].setdefault(f, '')
2790         else:
2791             for old_filename in set(files_to_delete):
2792                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2793                 try:
2794                     os.remove(encodeFilename(old_filename))
2795                 except (IOError, OSError):
2796                     self.report_warning('Unable to remove downloaded original file')
2797                 if old_filename in infodict['__files_to_move']:
2798                     del infodict['__files_to_move'][old_filename]
2799         return infodict
2800
2801     @staticmethod
2802     def post_extract(info_dict):
2803         def actual_post_extract(info_dict):
2804             if info_dict.get('_type') in ('playlist', 'multi_video'):
2805                 for video_dict in info_dict.get('entries', {}):
2806                     actual_post_extract(video_dict or {})
2807                 return
2808
2809             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
2810             extra = post_extractor().items()
2811             info_dict.update(extra)
2812             info_dict.pop('__post_extractor', None)
2813
2814             original_infodict = info_dict.get('__original_infodict') or {}
2815             original_infodict.update(extra)
2816             original_infodict.pop('__post_extractor', None)
2817
2818         actual_post_extract(info_dict or {})
2819
2820     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
2821         info = dict(ie_info)
2822         info['__files_to_move'] = files_to_move or {}
2823         for pp in self._pps[key]:
2824             info = self.run_pp(pp, info)
2825         return info, info.pop('__files_to_move', None)
2826
2827     def post_process(self, filename, ie_info, files_to_move=None):
2828         """Run all the postprocessors on the given file."""
2829         info = dict(ie_info)
2830         info['filepath'] = filename
2831         info['__files_to_move'] = files_to_move or {}
2832
2833         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
2834             info = self.run_pp(pp, info)
2835         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2836         del info['__files_to_move']
2837         for pp in self._pps['after_move']:
2838             info = self.run_pp(pp, info)
2839         return info
2840
2841     def _make_archive_id(self, info_dict):
2842         video_id = info_dict.get('id')
2843         if not video_id:
2844             return
2845         # Future-proof against any change in case
2846         # and backwards compatibility with prior versions
2847         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2848         if extractor is None:
2849             url = str_or_none(info_dict.get('url'))
2850             if not url:
2851                 return
2852             # Try to find matching extractor for the URL and take its ie_key
2853             for ie in self._ies:
2854                 if ie.suitable(url):
2855                     extractor = ie.ie_key()
2856                     break
2857             else:
2858                 return
2859         return '%s %s' % (extractor.lower(), video_id)
2860
2861     def in_download_archive(self, info_dict):
2862         fn = self.params.get('download_archive')
2863         if fn is None:
2864             return False
2865
2866         vid_id = self._make_archive_id(info_dict)
2867         if not vid_id:
2868             return False  # Incomplete video information
2869
2870         return vid_id in self.archive
2871
2872     def record_download_archive(self, info_dict):
2873         fn = self.params.get('download_archive')
2874         if fn is None:
2875             return
2876         vid_id = self._make_archive_id(info_dict)
2877         assert vid_id
2878         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2879             archive_file.write(vid_id + '\n')
2880         self.archive.add(vid_id)
2881
2882     @staticmethod
2883     def format_resolution(format, default='unknown'):
2884         if format.get('vcodec') == 'none':
2885             return 'audio only'
2886         if format.get('resolution') is not None:
2887             return format['resolution']
2888         if format.get('width') and format.get('height'):
2889             res = '%dx%d' % (format['width'], format['height'])
2890         elif format.get('height'):
2891             res = '%sp' % format['height']
2892         elif format.get('width'):
2893             res = '%dx?' % format['width']
2894         else:
2895             res = default
2896         return res
2897
2898     def _format_note(self, fdict):
2899         res = ''
2900         if fdict.get('ext') in ['f4f', 'f4m']:
2901             res += '(unsupported) '
2902         if fdict.get('language'):
2903             if res:
2904                 res += ' '
2905             res += '[%s] ' % fdict['language']
2906         if fdict.get('format_note') is not None:
2907             res += fdict['format_note'] + ' '
2908         if fdict.get('tbr') is not None:
2909             res += '%4dk ' % fdict['tbr']
2910         if fdict.get('container') is not None:
2911             if res:
2912                 res += ', '
2913             res += '%s container' % fdict['container']
2914         if (fdict.get('vcodec') is not None
2915                 and fdict.get('vcodec') != 'none'):
2916             if res:
2917                 res += ', '
2918             res += fdict['vcodec']
2919             if fdict.get('vbr') is not None:
2920                 res += '@'
2921         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2922             res += 'video@'
2923         if fdict.get('vbr') is not None:
2924             res += '%4dk' % fdict['vbr']
2925         if fdict.get('fps') is not None:
2926             if res:
2927                 res += ', '
2928             res += '%sfps' % fdict['fps']
2929         if fdict.get('acodec') is not None:
2930             if res:
2931                 res += ', '
2932             if fdict['acodec'] == 'none':
2933                 res += 'video only'
2934             else:
2935                 res += '%-5s' % fdict['acodec']
2936         elif fdict.get('abr') is not None:
2937             if res:
2938                 res += ', '
2939             res += 'audio'
2940         if fdict.get('abr') is not None:
2941             res += '@%3dk' % fdict['abr']
2942         if fdict.get('asr') is not None:
2943             res += ' (%5dHz)' % fdict['asr']
2944         if fdict.get('filesize') is not None:
2945             if res:
2946                 res += ', '
2947             res += format_bytes(fdict['filesize'])
2948         elif fdict.get('filesize_approx') is not None:
2949             if res:
2950                 res += ', '
2951             res += '~' + format_bytes(fdict['filesize_approx'])
2952         return res
2953
2954     def _format_note_table(self, f):
2955         def join_fields(*vargs):
2956             return ', '.join((val for val in vargs if val != ''))
2957
2958         return join_fields(
2959             'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2960             format_field(f, 'language', '[%s]'),
2961             format_field(f, 'format_note'),
2962             format_field(f, 'container', ignore=(None, f.get('ext'))),
2963             format_field(f, 'asr', '%5dHz'))
2964
2965     def list_formats(self, info_dict):
2966         formats = info_dict.get('formats', [info_dict])
2967         new_format = (
2968             'list-formats' not in self.params.get('compat_opts', [])
2969             and self.params.get('list_formats_as_table', True) is not False)
2970         if new_format:
2971             table = [
2972                 [
2973                     format_field(f, 'format_id'),
2974                     format_field(f, 'ext'),
2975                     self.format_resolution(f),
2976                     format_field(f, 'fps', '%d'),
2977                     '|',
2978                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2979                     format_field(f, 'tbr', '%4dk'),
2980                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
2981                     '|',
2982                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
2983                     format_field(f, 'vbr', '%4dk'),
2984                     format_field(f, 'acodec', default='unknown').replace('none', ''),
2985                     format_field(f, 'abr', '%3dk'),
2986                     format_field(f, 'asr', '%5dHz'),
2987                     self._format_note_table(f)]
2988                 for f in formats
2989                 if f.get('preference') is None or f['preference'] >= -1000]
2990             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', '  TBR', 'PROTO',
2991                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2992         else:
2993             table = [
2994                 [
2995                     format_field(f, 'format_id'),
2996                     format_field(f, 'ext'),
2997                     self.format_resolution(f),
2998                     self._format_note(f)]
2999                 for f in formats
3000                 if f.get('preference') is None or f['preference'] >= -1000]
3001             header_line = ['format code', 'extension', 'resolution', 'note']
3002
3003         self.to_screen(
3004             '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
3005                 header_line,
3006                 table,
3007                 delim=new_format,
3008                 extraGap=(0 if new_format else 1),
3009                 hideEmpty=new_format)))
3010
3011     def list_thumbnails(self, info_dict):
3012         thumbnails = info_dict.get('thumbnails')
3013         if not thumbnails:
3014             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3015             return
3016
3017         self.to_screen(
3018             '[info] Thumbnails for %s:' % info_dict['id'])
3019         self.to_screen(render_table(
3020             ['ID', 'width', 'height', 'URL'],
3021             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3022
3023     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3024         if not subtitles:
3025             self.to_screen('%s has no %s' % (video_id, name))
3026             return
3027         self.to_screen(
3028             'Available %s for %s:' % (name, video_id))
3029
3030         def _row(lang, formats):
3031             exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
3032             if len(set(names)) == 1:
3033                 names = [] if names[0] == 'unknown' else names[:1]
3034             return [lang, ', '.join(names), ', '.join(exts)]
3035
3036         self.to_screen(render_table(
3037             ['Language', 'Name', 'Formats'],
3038             [_row(lang, formats) for lang, formats in subtitles.items()],
3039             hideEmpty=True))
3040
3041     def urlopen(self, req):
3042         """ Start an HTTP download """
3043         if isinstance(req, compat_basestring):
3044             req = sanitized_Request(req)
3045         return self._opener.open(req, timeout=self._socket_timeout)
3046
3047     def print_debug_header(self):
3048         if not self.params.get('verbose'):
3049             return
3050
3051         if type('') is not compat_str:
3052             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
3053             self.report_warning(
3054                 'Your Python is broken! Update to a newer and supported version')
3055
3056         stdout_encoding = getattr(
3057             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
3058         encoding_str = (
3059             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3060                 locale.getpreferredencoding(),
3061                 sys.getfilesystemencoding(),
3062                 stdout_encoding,
3063                 self.get_encoding()))
3064         write_string(encoding_str, encoding=None)
3065
3066         source = (
3067             '(exe)' if hasattr(sys, 'frozen')
3068             else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3069             else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3070             else '')
3071         self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
3072         if _LAZY_LOADER:
3073             self._write_string('[debug] Lazy loading extractors enabled\n')
3074         if _PLUGIN_CLASSES:
3075             self._write_string(
3076                 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
3077         if self.params.get('compat_opts'):
3078             self._write_string(
3079                 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3080         try:
3081             sp = subprocess.Popen(
3082                 ['git', 'rev-parse', '--short', 'HEAD'],
3083                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3084                 cwd=os.path.dirname(os.path.abspath(__file__)))
3085             out, err = process_communicate_or_kill(sp)
3086             out = out.decode().strip()
3087             if re.match('[0-9a-f]+', out):
3088                 self._write_string('[debug] Git HEAD: %s\n' % out)
3089         except Exception:
3090             try:
3091                 sys.exc_clear()
3092             except Exception:
3093                 pass
3094
3095         def python_implementation():
3096             impl_name = platform.python_implementation()
3097             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3098                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3099             return impl_name
3100
3101         self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3102             platform.python_version(),
3103             python_implementation(),
3104             platform.architecture()[0],
3105             platform_name()))
3106
3107         exe_versions = FFmpegPostProcessor.get_versions(self)
3108         exe_versions['rtmpdump'] = rtmpdump_version()
3109         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3110         exe_str = ', '.join(
3111             '%s %s' % (exe, v)
3112             for exe, v in sorted(exe_versions.items())
3113             if v
3114         )
3115         if not exe_str:
3116             exe_str = 'none'
3117         self._write_string('[debug] exe versions: %s\n' % exe_str)
3118
3119         proxy_map = {}
3120         for handler in self._opener.handlers:
3121             if hasattr(handler, 'proxies'):
3122                 proxy_map.update(handler.proxies)
3123         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
3124
3125         if self.params.get('call_home', False):
3126             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3127             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
3128             return
3129             latest_version = self.urlopen(
3130                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3131             if version_tuple(latest_version) > version_tuple(__version__):
3132                 self.report_warning(
3133                     'You are using an outdated version (newest version: %s)! '
3134                     'See https://yt-dl.org/update if you need help updating.' %
3135                     latest_version)
3136
3137     def _setup_opener(self):
3138         timeout_val = self.params.get('socket_timeout')
3139         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
3140
3141         opts_cookiefile = self.params.get('cookiefile')
3142         opts_proxy = self.params.get('proxy')
3143
3144         if opts_cookiefile is None:
3145             self.cookiejar = compat_cookiejar.CookieJar()
3146         else:
3147             opts_cookiefile = expand_path(opts_cookiefile)
3148             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
3149             if os.access(opts_cookiefile, os.R_OK):
3150                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
3151
3152         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3153         if opts_proxy is not None:
3154             if opts_proxy == '':
3155                 proxies = {}
3156             else:
3157                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3158         else:
3159             proxies = compat_urllib_request.getproxies()
3160             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3161             if 'http' in proxies and 'https' not in proxies:
3162                 proxies['https'] = proxies['http']
3163         proxy_handler = PerRequestProxyHandler(proxies)
3164
3165         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3166         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3167         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3168         redirect_handler = YoutubeDLRedirectHandler()
3169         data_handler = compat_urllib_request_DataHandler()
3170
3171         # When passing our own FileHandler instance, build_opener won't add the
3172         # default FileHandler and allows us to disable the file protocol, which
3173         # can be used for malicious purposes (see
3174         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3175         file_handler = compat_urllib_request.FileHandler()
3176
3177         def file_open(*args, **kwargs):
3178             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3179         file_handler.file_open = file_open
3180
3181         opener = compat_urllib_request.build_opener(
3182             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3183
3184         # Delete the default user-agent header, which would otherwise apply in
3185         # cases where our custom HTTP handler doesn't come into play
3186         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3187         opener.addheaders = []
3188         self._opener = opener
3189
3190     def encode(self, s):
3191         if isinstance(s, bytes):
3192             return s  # Already encoded
3193
3194         try:
3195             return s.encode(self.get_encoding())
3196         except UnicodeEncodeError as err:
3197             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3198             raise
3199
3200     def get_encoding(self):
3201         encoding = self.params.get('encoding')
3202         if encoding is None:
3203             encoding = preferredencoding()
3204         return encoding
3205
3206     def _write_thumbnails(self, info_dict, filename):  # return the extensions
3207         write_all = self.params.get('write_all_thumbnails', False)
3208         thumbnails = []
3209         if write_all or self.params.get('writethumbnail', False):
3210             thumbnails = info_dict.get('thumbnails') or []
3211         multiple = write_all and len(thumbnails) > 1
3212
3213         ret = []
3214         for t in thumbnails[::1 if write_all else -1]:
3215             thumb_ext = determine_ext(t['url'], 'jpg')
3216             suffix = '%s.' % t['id'] if multiple else ''
3217             thumb_display_id = '%s ' % t['id'] if multiple else ''
3218             t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
3219
3220             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
3221                 ret.append(suffix + thumb_ext)
3222                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3223                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3224             else:
3225                 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
3226                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
3227                 try:
3228                     uf = self.urlopen(t['url'])
3229                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3230                         shutil.copyfileobj(uf, thumbf)
3231                     ret.append(suffix + thumb_ext)
3232                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3233                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
3234                 except network_exceptions as err:
3235                     self.report_warning('Unable to download thumbnail "%s": %s' %
3236                                         (t['url'], error_to_compat_str(err)))
3237             if ret and not write_all:
3238                 break
3239         return ret