yt_dlp/YoutubeDL.py

   1 import collections
   2 import contextlib
   3 import copy
   4 import datetime
   5 import errno
   6 import fileinput
   7 import http.cookiejar
   8 import io
   9 import itertools
  10 import json
  11 import locale
  12 import operator
  13 import os
  14 import random
  15 import re
  16 import shutil
  17 import string
  18 import subprocess
  19 import sys
  20 import tempfile
  21 import time
  22 import tokenize
  23 import traceback
  24 import unicodedata
  25
  26 from .cache import Cache
  27 from .compat import functools, urllib  # isort: split
  28 from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
  29 from .cookies import LenientSimpleCookie, load_cookies
  30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  31 from .downloader.rtmp import rtmpdump_version
  32 from .extractor import gen_extractor_classes, get_info_extractor
  33 from .extractor.common import UnsupportedURLIE
  34 from .extractor.openload import PhantomJSwrapper
  35 from .minicurses import format_text
  36 from .networking import HEADRequest, Request, RequestDirector
  37 from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
  38 from .networking.exceptions import (
  39     HTTPError,
  40     NoSupportingHandlers,
  41     RequestError,
  42     SSLError,
  43     network_exceptions,
  44 )
  45 from .plugins import directories as plugin_directories
  46 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
  47 from .postprocessor import (
  48     EmbedThumbnailPP,
  49     FFmpegFixupDuplicateMoovPP,
  50     FFmpegFixupDurationPP,
  51     FFmpegFixupM3u8PP,
  52     FFmpegFixupM4aPP,
  53     FFmpegFixupStretchedPP,
  54     FFmpegFixupTimestampPP,
  55     FFmpegMergerPP,
  56     FFmpegPostProcessor,
  57     FFmpegVideoConvertorPP,
  58     MoveFilesAfterDownloadPP,
  59     get_postprocessor,
  60 )
  61 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
  62 from .update import (
  63     REPOSITORY,
  64     _get_system_deprecation,
  65     _make_label,
  66     current_git_head,
  67     detect_variant,
  68 )
  69 from .utils import (
  70     DEFAULT_OUTTMPL,
  71     IDENTITY,
  72     LINK_TEMPLATES,
  73     MEDIA_EXTENSIONS,
  74     NO_DEFAULT,
  75     NUMBER_RE,
  76     OUTTMPL_TYPES,
  77     POSTPROCESS_WHEN,
  78     STR_FORMAT_RE_TMPL,
  79     STR_FORMAT_TYPES,
  80     ContentTooShortError,
  81     DateRange,
  82     DownloadCancelled,
  83     DownloadError,
  84     EntryNotInPlaylist,
  85     ExistingVideoReached,
  86     ExtractorError,
  87     FormatSorter,
  88     GeoRestrictedError,
  89     ISO3166Utils,
  90     LazyList,
  91     MaxDownloadsReached,
  92     Namespace,
  93     PagedList,
  94     PlaylistEntries,
  95     Popen,
  96     PostProcessingError,
  97     ReExtractInfo,
  98     RejectedVideoReached,
  99     SameFileError,
 100     UnavailableVideoError,
 101     UserNotLive,
 102     age_restricted,
 103     args_to_str,
 104     bug_reports_message,
 105     date_from_str,
 106     deprecation_warning,
 107     determine_ext,
 108     determine_protocol,
 109     encode_compat_str,
 110     encodeFilename,
 111     error_to_compat_str,
 112     escapeHTML,
 113     expand_path,
 114     extract_basic_auth,
 115     filter_dict,
 116     float_or_none,
 117     format_bytes,
 118     format_decimal_suffix,
 119     format_field,
 120     formatSeconds,
 121     get_compatible_ext,
 122     get_domain,
 123     int_or_none,
 124     iri_to_uri,
 125     is_path_like,
 126     join_nonempty,
 127     locked_file,
 128     make_archive_id,
 129     make_dir,
 130     number_of_digits,
 131     orderedSet,
 132     orderedSet_from_options,
 133     parse_filesize,
 134     preferredencoding,
 135     prepend_extension,
 136     remove_terminal_sequences,
 137     render_table,
 138     replace_extension,
 139     sanitize_filename,
 140     sanitize_path,
 141     sanitize_url,
 142     str_or_none,
 143     strftime_or_none,
 144     subtitles_filename,
 145     supports_terminal_sequences,
 146     system_identifier,
 147     timetuple_from_msec,
 148     to_high_limit_path,
 149     traverse_obj,
 150     try_call,
 151     try_get,
 152     url_basename,
 153     variadic,
 154     version_tuple,
 155     windows_enable_vt_mode,
 156     write_json_file,
 157     write_string,
 158 )
 159 from .utils._utils import _YDLLogger
 160 from .utils.networking import (
 161     HTTPHeaderDict,
 162     clean_headers,
 163     clean_proxies,
 164     std_headers,
 165 )
 166 from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
 167
 168 if compat_os_name == 'nt':
 169     import ctypes
 170
 171
 172 class YoutubeDL:
 173     """YoutubeDL class.
 174
 175     YoutubeDL objects are the ones responsible of downloading the
 176     actual video file and writing it to disk if the user has requested
 177     it, among some other tasks. In most cases there should be one per
 178     program. As, given a video URL, the downloader doesn't know how to
 179     extract all the needed information, task that InfoExtractors do, it
 180     has to pass the URL to one of them.
 181
 182     For this, YoutubeDL objects have a method that allows
 183     InfoExtractors to be registered in a given order. When it is passed
 184     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 185     finds that reports being able to handle it. The InfoExtractor extracts
 186     all the information about the video or videos the URL refers to, and
 187     YoutubeDL process the extracted information, possibly using a File
 188     Downloader to download the video.
 189
 190     YoutubeDL objects accept a lot of parameters. In order not to saturate
 191     the object constructor with arguments, it receives a dictionary of
 192     options instead. These options are available through the params
 193     attribute for the InfoExtractors to use. The YoutubeDL also
 194     registers itself as the downloader in charge for the InfoExtractors
 195     that are added to it, so this is a "mutual registration".
 196
 197     Available options:
 198
 199     username:          Username for authentication purposes.
 200     password:          Password for authentication purposes.
 201     videopassword:     Password for accessing a video.
 202     ap_mso:            Adobe Pass multiple-system operator identifier.
 203     ap_username:       Multiple-system operator account username.
 204     ap_password:       Multiple-system operator account password.
 205     usenetrc:          Use netrc for authentication instead.
 206     netrc_location:    Location of the netrc file. Defaults to ~/.netrc.
 207     netrc_cmd:         Use a shell command to get credentials
 208     verbose:           Print additional info to stdout.
 209     quiet:             Do not print messages to stdout.
 210     no_warnings:       Do not print out anything for warnings.
 211     forceprint:        A dict with keys WHEN mapped to a list of templates to
 212                        print to stdout. The allowed keys are video or any of the
 213                        items in utils.POSTPROCESS_WHEN.
 214                        For compatibility, a single list is also accepted
 215     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 216                        a list of tuples with (template, filename)
 217     forcejson:         Force printing info_dict as JSON.
 218     dump_single_json:  Force printing the info_dict of the whole playlist
 219                        (or video) as a single JSON line.
 220     force_write_download_archive: Force writing download archive regardless
 221                        of 'skip_download' or 'simulate'.
 222     simulate:          Do not download the video files. If unset (or None),
 223                        simulate only if listsubtitles, listformats or list_thumbnails is used
 224     format:            Video format code. see "FORMAT SELECTION" for more details.
 225                        You can also pass a function. The function takes 'ctx' as
 226                        argument and returns the formats to download.
 227                        See "build_format_selector" for an implementation
 228     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 229     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 230                        extracting metadata even if the video is not actually
 231                        available for download (experimental)
 232     format_sort:       A list of fields by which to sort the video formats.
 233                        See "Sorting Formats" for more details.
 234     format_sort_force: Force the given format_sort. see "Sorting Formats"
 235                        for more details.
 236     prefer_free_formats: Whether to prefer video formats with free containers
 237                        over non-free ones of same quality.
 238     allow_multiple_video_streams:   Allow multiple video streams to be merged
 239                        into a single file
 240     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 241                        into a single file
 242     check_formats      Whether to test if the formats are downloadable.
 243                        Can be True (check all), False (check none),
 244                        'selected' (check selected formats),
 245                        or None (check only if requested by extractor)
 246     paths:             Dictionary of output paths. The allowed keys are 'home'
 247                        'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
 248     outtmpl:           Dictionary of templates for output names. Allowed keys
 249                        are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
 250                        For compatibility with youtube-dl, a single string can also be used
 251     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 252     restrictfilenames: Do not allow "&" and spaces in file names
 253     trim_file_name:    Limit length of filename (extension excluded)
 254     windowsfilenames:  Force the filenames to be windows compatible
 255     ignoreerrors:      Do not stop on download/postprocessing errors.
 256                        Can be 'only_download' to ignore only download errors.
 257                        Default is 'only_download' for CLI, but False for API
 258     skip_playlist_after_errors: Number of allowed failures until the rest of
 259                        the playlist is skipped
 260     allowed_extractors:  List of regexes to match against extractor names that are allowed
 261     overwrites:        Overwrite all video and metadata files if True,
 262                        overwrite only non-video files if None
 263                        and don't overwrite any file if False
 264     playlist_items:    Specific indices of playlist to download.
 265     playlistrandom:    Download playlist items in random order.
 266     lazy_playlist:     Process playlist entries as they are received.
 267     matchtitle:        Download only matching titles.
 268     rejecttitle:       Reject downloads for matching titles.
 269     logger:            Log messages to a logging.Logger instance.
 270     logtostderr:       Print everything to stderr instead of stdout.
 271     consoletitle:      Display progress in console window's titlebar.
 272     writedescription:  Write the video description to a .description file
 273     writeinfojson:     Write the video description to a .info.json file
 274     clean_infojson:    Remove internal metadata from the infojson
 275     getcomments:       Extract video comments. This will not be written to disk
 276                        unless writeinfojson is also given
 277     writeannotations:  Write the video annotations to a .annotations.xml file
 278     writethumbnail:    Write the thumbnail image to a file
 279     allow_playlist_files: Whether to write playlists' description, infojson etc
 280                        also to disk when using the 'write*' options
 281     write_all_thumbnails:  Write all thumbnail formats to files
 282     writelink:         Write an internet shortcut file, depending on the
 283                        current platform (.url/.webloc/.desktop)
 284     writeurllink:      Write a Windows internet shortcut file (.url)
 285     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 286     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 287     writesubtitles:    Write the video subtitles to a file
 288     writeautomaticsub: Write the automatically generated subtitles to a file
 289     listsubtitles:     Lists all available subtitles for the video
 290     subtitlesformat:   The format code for subtitles
 291     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 292                        The list may contain "all" to refer to all the available
 293                        subtitles. The language can be prefixed with a "-" to
 294                        exclude it from the requested languages, e.g. ['all', '-live_chat']
 295     keepvideo:         Keep the video file after post-processing
 296     daterange:         A utils.DateRange object, download only if the upload_date is in the range.
 297     skip_download:     Skip the actual download of the video file
 298     cachedir:          Location of the cache files in the filesystem.
 299                        False to disable filesystem cache.
 300     noplaylist:        Download single video instead of a playlist if in doubt.
 301     age_limit:         An integer representing the user's age in years.
 302                        Unsuitable videos for the given age are skipped.
 303     min_views:         An integer representing the minimum view count the video
 304                        must have in order to not be skipped.
 305                        Videos without view count information are always
 306                        downloaded. None for no limit.
 307     max_views:         An integer representing the maximum view count.
 308                        Videos that are more popular than that are not
 309                        downloaded.
 310                        Videos without view count information are always
 311                        downloaded. None for no limit.
 312     download_archive:  A set, or the name of a file where all downloads are recorded.
 313                        Videos already present in the file are not downloaded again.
 314     break_on_existing: Stop the download process after attempting to download a
 315                        file that is in the archive.
 316     break_per_url:     Whether break_on_reject and break_on_existing
 317                        should act on each input URL as opposed to for the entire queue
 318     cookiefile:        File name or text stream from where cookies should be read and dumped to
 319     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 320                        name/path from where cookies are loaded, the name of the keyring,
 321                        and the container name, e.g. ('chrome', ) or
 322                        ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
 323     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 324                        support RFC 5746 secure renegotiation
 325     nocheckcertificate:  Do not verify SSL certificates
 326     client_certificate:  Path to client certificate file in PEM format. May include the private key
 327     client_certificate_key:  Path to private key file for client certificate
 328     client_certificate_password:  Password for client certificate private key, if encrypted.
 329                         If not provided and the key is encrypted, yt-dlp will ask interactively
 330     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 331                        (Only supported by some extractors)
 332     enable_file_urls:  Enable file:// URLs. This is disabled by default for security reasons.
 333     http_headers:      A dictionary of custom headers to be used for all requests
 334     proxy:             URL of the proxy server to use
 335     geo_verification_proxy:  URL of the proxy to use for IP address verification
 336                        on geo-restricted sites.
 337     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 338     bidi_workaround:   Work around buggy terminals without bidirectional text
 339                        support, using fridibi
 340     debug_printtraffic:Print out sent and received HTTP traffic
 341     default_search:    Prepend this string if an input url is not valid.
 342                        'auto' for elaborate guessing
 343     encoding:          Use this encoding instead of the system-specified.
 344     extract_flat:      Whether to resolve and process url_results further
 345                        * False:     Always process. Default for API
 346                        * True:      Never process
 347                        * 'in_playlist': Do not process inside playlist/multi_video
 348                        * 'discard': Always process, but don't return the result
 349                                     from inside playlist/multi_video
 350                        * 'discard_in_playlist': Same as "discard", but only for
 351                                     playlists (not multi_video). Default for CLI
 352     wait_for_video:    If given, wait for scheduled streams to become available.
 353                        The value should be a tuple containing the range
 354                        (min_secs, max_secs) to wait between retries
 355     postprocessors:    A list of dictionaries, each with an entry
 356                        * key:  The name of the postprocessor. See
 357                                yt_dlp/postprocessor/__init__.py for a list.
 358                        * when: When to run the postprocessor. Allowed values are
 359                                the entries of utils.POSTPROCESS_WHEN
 360                                Assumed to be 'post_process' if not given
 361     progress_hooks:    A list of functions that get called on download
 362                        progress, with a dictionary with the entries
 363                        * status: One of "downloading", "error", or "finished".
 364                                  Check this first and ignore unknown values.
 365                        * info_dict: The extracted info_dict
 366
 367                        If status is one of "downloading", or "finished", the
 368                        following properties may also be present:
 369                        * filename: The final filename (always present)
 370                        * tmpfilename: The filename we're currently writing to
 371                        * downloaded_bytes: Bytes on disk
 372                        * total_bytes: Size of the whole file, None if unknown
 373                        * total_bytes_estimate: Guess of the eventual file size,
 374                                                None if unavailable.
 375                        * elapsed: The number of seconds since download started.
 376                        * eta: The estimated time in seconds, None if unknown
 377                        * speed: The download speed in bytes/second, None if
 378                                 unknown
 379                        * fragment_index: The counter of the currently
 380                                          downloaded video fragment.
 381                        * fragment_count: The number of fragments (= individual
 382                                          files that will be merged)
 383
 384                        Progress hooks are guaranteed to be called at least once
 385                        (with status "finished") if the download is successful.
 386     postprocessor_hooks:  A list of functions that get called on postprocessing
 387                        progress, with a dictionary with the entries
 388                        * status: One of "started", "processing", or "finished".
 389                                  Check this first and ignore unknown values.
 390                        * postprocessor: Name of the postprocessor
 391                        * info_dict: The extracted info_dict
 392
 393                        Progress hooks are guaranteed to be called at least twice
 394                        (with status "started" and "finished") if the processing is successful.
 395     merge_output_format: "/" separated list of extensions to use when merging formats.
 396     final_ext:         Expected final extension; used to detect when the file was
 397                        already downloaded and converted
 398     fixup:             Automatically correct known faults of the file.
 399                        One of:
 400                        - "never": do nothing
 401                        - "warn": only emit a warning
 402                        - "detect_or_warn": check whether we can do anything
 403                                            about it, warn otherwise (default)
 404     source_address:    Client-side IP address to bind to.
 405     sleep_interval_requests: Number of seconds to sleep between requests
 406                        during extraction
 407     sleep_interval:    Number of seconds to sleep before each download when
 408                        used alone or a lower bound of a range for randomized
 409                        sleep before each download (minimum possible number
 410                        of seconds to sleep) when used along with
 411                        max_sleep_interval.
 412     max_sleep_interval:Upper bound of a range for randomized sleep before each
 413                        download (maximum possible number of seconds to sleep).
 414                        Must only be used along with sleep_interval.
 415                        Actual sleep time will be a random float from range
 416                        [sleep_interval; max_sleep_interval].
 417     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 418     listformats:       Print an overview of available video formats and exit.
 419     list_thumbnails:   Print a table of all thumbnails and exit.
 420     match_filter:      A function that gets called for every video with the signature
 421                        (info_dict, *, incomplete: bool) -> Optional[str]
 422                        For backward compatibility with youtube-dl, the signature
 423                        (info_dict) -> Optional[str] is also allowed.
 424                        - If it returns a message, the video is ignored.
 425                        - If it returns None, the video is downloaded.
 426                        - If it returns utils.NO_DEFAULT, the user is interactively
 427                          asked whether to download the video.
 428                        - Raise utils.DownloadCancelled(msg) to abort remaining
 429                          downloads when a video is rejected.
 430                        match_filter_func in utils/_utils.py is one example for this.
 431     color:             A Dictionary with output stream names as keys
 432                        and their respective color policy as values.
 433                        Can also just be a single color policy,
 434                        in which case it applies to all outputs.
 435                        Valid stream names are 'stdout' and 'stderr'.
 436                        Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
 437     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 438                        HTTP header
 439     geo_bypass_country:
 440                        Two-letter ISO 3166-2 country code that will be used for
 441                        explicit geographic restriction bypassing via faking
 442                        X-Forwarded-For HTTP header
 443     geo_bypass_ip_block:
 444                        IP range in CIDR notation that will be used similarly to
 445                        geo_bypass_country
 446     external_downloader: A dictionary of protocol keys and the executable of the
 447                        external downloader to use for it. The allowed protocols
 448                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 449                        Set the value to 'native' to use the native downloader
 450     compat_opts:       Compatibility options. See "Differences in default behavior".
 451                        The following options do not work when used through the API:
 452                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 453                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 454                        Refer __init__.py for their implementation
 455     progress_template: Dictionary of templates for progress outputs.
 456                        Allowed keys are 'download', 'postprocess',
 457                        'download-title' (console title) and 'postprocess-title'.
 458                        The template is mapped on a dictionary with keys 'progress' and 'info'
 459     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 460                        as argument and returns the time to sleep in seconds.
 461                        Allowed keys are 'http', 'fragment', 'file_access'
 462     download_ranges:   A callback function that gets called for every video with
 463                        the signature (info_dict, ydl) -> Iterable[Section].
 464                        Only the returned sections will be downloaded.
 465                        Each Section is a dict with the following keys:
 466                        * start_time: Start time of the section in seconds
 467                        * end_time: End time of the section in seconds
 468                        * title: Section title (Optional)
 469                        * index: Section number (Optional)
 470     force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
 471     noprogress:        Do not print the progress bar
 472     live_from_start:   Whether to download livestreams videos from the start
 473
 474     The following parameters are not used by YoutubeDL itself, they are used by
 475     the downloader (see yt_dlp/downloader/common.py):
 476     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 477     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 478     continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 479     external_downloader_args, concurrent_fragment_downloads.
 480
 481     The following options are used by the post processors:
 482     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 483                        to the binary or its containing directory.
 484     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 485                        and a list of additional command-line arguments for the
 486                        postprocessor/executable. The dict can also have "PP+EXE" keys
 487                        which are used when the given exe is used by the given PP.
 488                        Use 'default' as the name for arguments to passed to all PP
 489                        For compatibility with youtube-dl, a single list of args
 490                        can also be used
 491
 492     The following options are used by the extractors:
 493     extractor_retries: Number of times to retry for known errors (default: 3)
 494     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 495     hls_split_discontinuity: Split HLS playlists to different formats at
 496                        discontinuities such as ad breaks (default: False)
 497     extractor_args:    A dictionary of arguments to be passed to the extractors.
 498                        See "EXTRACTOR ARGUMENTS" for details.
 499                        E.g. {'youtube': {'skip': ['dash', 'hls']}}
 500     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 501
 502     The following options are deprecated and may be removed in the future:
 503
 504     break_on_reject:   Stop the download process when encountering a video that
 505                        has been filtered out.
 506                        - `raise DownloadCancelled(msg)` in match_filter instead
 507     force_generic_extractor: Force downloader to use the generic extractor
 508                        - Use allowed_extractors = ['generic', 'default']
 509     playliststart:     - Use playlist_items
 510                        Playlist item to start at.
 511     playlistend:       - Use playlist_items
 512                        Playlist item to end at.
 513     playlistreverse:   - Use playlist_items
 514                        Download playlist items in reverse order.
 515     forceurl:          - Use forceprint
 516                        Force printing final URL.
 517     forcetitle:        - Use forceprint
 518                        Force printing title.
 519     forceid:           - Use forceprint
 520                        Force printing ID.
 521     forcethumbnail:    - Use forceprint
 522                        Force printing thumbnail URL.
 523     forcedescription:  - Use forceprint
 524                        Force printing description.
 525     forcefilename:     - Use forceprint
 526                        Force printing final filename.
 527     forceduration:     - Use forceprint
 528                        Force printing duration.
 529     allsubtitles:      - Use subtitleslangs = ['all']
 530                        Downloads all the subtitles of the video
 531                        (requires writesubtitles or writeautomaticsub)
 532     include_ads:       - Doesn't work
 533                        Download ads as well
 534     call_home:         - Not implemented
 535                        Boolean, true iff we are allowed to contact the
 536                        yt-dlp servers for debugging.
 537     post_hooks:        - Register a custom postprocessor
 538                        A list of functions that get called as the final step
 539                        for each video file, after all postprocessors have been
 540                        called. The filename will be passed as the only argument.
 541     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 542                        Use the native HLS downloader instead of ffmpeg/avconv
 543                        if True, otherwise use ffmpeg/avconv if False, otherwise
 544                        use downloader suggested by extractor if None.
 545     prefer_ffmpeg:     - avconv support is deprecated
 546                        If False, use avconv instead of ffmpeg if both are available,
 547                        otherwise prefer ffmpeg.
 548     youtube_include_dash_manifest: - Use extractor_args
 549                        If True (default), DASH manifests and related
 550                        data will be downloaded and processed by extractor.
 551                        You can reduce network I/O by disabling it if you don't
 552                        care about DASH. (only for youtube)
 553     youtube_include_hls_manifest: - Use extractor_args
 554                        If True (default), HLS manifests and related
 555                        data will be downloaded and processed by extractor.
 556                        You can reduce network I/O by disabling it if you don't
 557                        care about HLS. (only for youtube)
 558     no_color:          Same as `color='no_color'`
 559     no_overwrites:     Same as `overwrites=False`
 560     """
 561
 562     _NUMERIC_FIELDS = {
 563         'width', 'height', 'asr', 'audio_channels', 'fps',
 564         'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
 565         'timestamp', 'release_timestamp',
 566         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 567         'average_rating', 'comment_count', 'age_limit',
 568         'start_time', 'end_time',
 569         'chapter_number', 'season_number', 'episode_number',
 570         'track_number', 'disc_number', 'release_year',
 571     }
 572
 573     _format_fields = {
 574         # NB: Keep in sync with the docstring of extractor/common.py
 575         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 576         'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
 577         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
 578         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
 579         'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
 580         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
 581         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 582     }
 583     _deprecated_multivalue_fields = {
 584         'album_artist': 'album_artists',
 585         'artist': 'artists',
 586         'composer': 'composers',
 587         'creator': 'creators',
 588         'genre': 'genres',
 589     }
 590     _format_selection_exts = {
 591         'audio': set(MEDIA_EXTENSIONS.common_audio),
 592         'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
 593         'storyboards': set(MEDIA_EXTENSIONS.storyboards),
 594     }
 595
 596     def __init__(self, params=None, auto_init=True):
 597         """Create a FileDownloader object with the given options.
 598         @param auto_init    Whether to load the default extractors and print header (if verbose).
 599                             Set to 'no_verbose_header' to not print the header
 600         """
 601         if params is None:
 602             params = {}
 603         self.params = params
 604         self._ies = {}
 605         self._ies_instances = {}
 606         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 607         self._printed_messages = set()
 608         self._first_webpage_request = True
 609         self._post_hooks = []
 610         self._progress_hooks = []
 611         self._postprocessor_hooks = []
 612         self._download_retcode = 0
 613         self._num_downloads = 0
 614         self._num_videos = 0
 615         self._playlist_level = 0
 616         self._playlist_urls = set()
 617         self.cache = Cache(self)
 618         self.__header_cookies = []
 619
 620         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 621         self._out_files = Namespace(
 622             out=stdout,
 623             error=sys.stderr,
 624             screen=sys.stderr if self.params.get('quiet') else stdout,
 625             console=None if compat_os_name == 'nt' else next(
 626                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 627         )
 628
 629         try:
 630             windows_enable_vt_mode()
 631         except Exception as e:
 632             self.write_debug(f'Failed to enable VT mode: {e}')
 633
 634         if self.params.get('no_color'):
 635             if self.params.get('color') is not None:
 636                 self.params.setdefault('_warnings', []).append(
 637                     'Overwriting params from "color" with "no_color"')
 638             self.params['color'] = 'no_color'
 639
 640         term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
 641         no_color = bool(os.getenv('NO_COLOR'))
 642
 643         def process_color_policy(stream):
 644             stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
 645             policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
 646             if policy in ('auto', None):
 647                 if term_allow_color and supports_terminal_sequences(stream):
 648                     return 'no_color' if no_color else True
 649                 return False
 650             assert policy in ('always', 'never', 'no_color'), policy
 651             return {'always': True, 'never': False}.get(policy, policy)
 652
 653         self._allow_colors = Namespace(**{
 654             name: process_color_policy(stream)
 655             for name, stream in self._out_files.items_ if name != 'console'
 656         })
 657
 658         system_deprecation = _get_system_deprecation()
 659         if system_deprecation:
 660             self.deprecated_feature(system_deprecation.replace('\n', '\n                    '))
 661
 662         if self.params.get('allow_unplayable_formats'):
 663             self.report_warning(
 664                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 665                 'This is a developer option intended for debugging. \n'
 666                 '         If you experience any issues while using this option, '
 667                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 668
 669         if self.params.get('bidi_workaround', False):
 670             try:
 671                 import pty
 672                 master, slave = pty.openpty()
 673                 width = shutil.get_terminal_size().columns
 674                 width_args = [] if width is None else ['-w', str(width)]
 675                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 676                 try:
 677                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 678                 except OSError:
 679                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 680                 self._output_channel = os.fdopen(master, 'rb')
 681             except OSError as ose:
 682                 if ose.errno == errno.ENOENT:
 683                     self.report_warning(
 684                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 685                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 686                 else:
 687                     raise
 688
 689         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 690         self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
 691         self._load_cookies(self.params['http_headers'].get('Cookie'))  # compat
 692         self.params['http_headers'].pop('Cookie', None)
 693
 694         if auto_init and auto_init != 'no_verbose_header':
 695             self.print_debug_header()
 696
 697         def check_deprecated(param, option, suggestion):
 698             if self.params.get(param) is not None:
 699                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 700                 return True
 701             return False
 702
 703         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 704             if self.params.get('geo_verification_proxy') is None:
 705                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 706
 707         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 708         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 709         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 710
 711         for msg in self.params.get('_warnings', []):
 712             self.report_warning(msg)
 713         for msg in self.params.get('_deprecation_warnings', []):
 714             self.deprecated_feature(msg)
 715
 716         if 'list-formats' in self.params['compat_opts']:
 717             self.params['listformats_table'] = False
 718
 719         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 720             # nooverwrites was unnecessarily changed to overwrites
 721             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 722             # This ensures compatibility with both keys
 723             self.params['overwrites'] = not self.params['nooverwrites']
 724         elif self.params.get('overwrites') is None:
 725             self.params.pop('overwrites', None)
 726         else:
 727             self.params['nooverwrites'] = not self.params['overwrites']
 728
 729         if self.params.get('simulate') is None and any((
 730             self.params.get('list_thumbnails'),
 731             self.params.get('listformats'),
 732             self.params.get('listsubtitles'),
 733         )):
 734             self.params['simulate'] = 'list_only'
 735
 736         self.params.setdefault('forceprint', {})
 737         self.params.setdefault('print_to_file', {})
 738
 739         # Compatibility with older syntax
 740         if not isinstance(params['forceprint'], dict):
 741             self.params['forceprint'] = {'video': params['forceprint']}
 742
 743         if auto_init:
 744             self.add_default_info_extractors()
 745
 746         if (sys.platform != 'win32'
 747                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 748                 and not self.params.get('restrictfilenames', False)):
 749             # Unicode filesystem API will throw errors (#1474, #13027)
 750             self.report_warning(
 751                 'Assuming --restrict-filenames since file system encoding '
 752                 'cannot encode all characters. '
 753                 'Set the LC_ALL environment variable to fix this.')
 754             self.params['restrictfilenames'] = True
 755
 756         self._parse_outtmpl()
 757
 758         # Creating format selector here allows us to catch syntax errors before the extraction
 759         self.format_selector = (
 760             self.params.get('format') if self.params.get('format') in (None, '-')
 761             else self.params['format'] if callable(self.params['format'])
 762             else self.build_format_selector(self.params['format']))
 763
 764         hooks = {
 765             'post_hooks': self.add_post_hook,
 766             'progress_hooks': self.add_progress_hook,
 767             'postprocessor_hooks': self.add_postprocessor_hook,
 768         }
 769         for opt, fn in hooks.items():
 770             for ph in self.params.get(opt, []):
 771                 fn(ph)
 772
 773         for pp_def_raw in self.params.get('postprocessors', []):
 774             pp_def = dict(pp_def_raw)
 775             when = pp_def.pop('when', 'post_process')
 776             self.add_post_processor(
 777                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 778                 when=when)
 779
 780         def preload_download_archive(fn):
 781             """Preload the archive, if any is specified"""
 782             archive = set()
 783             if fn is None:
 784                 return archive
 785             elif not is_path_like(fn):
 786                 return fn
 787
 788             self.write_debug(f'Loading archive file {fn!r}')
 789             try:
 790                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 791                     for line in archive_file:
 792                         archive.add(line.strip())
 793             except OSError as ioe:
 794                 if ioe.errno != errno.ENOENT:
 795                     raise
 796             return archive
 797
 798         self.archive = preload_download_archive(self.params.get('download_archive'))
 799
 800     def warn_if_short_id(self, argv):
 801         # short YouTube ID starting with dash?
 802         idxs = [
 803             i for i, a in enumerate(argv)
 804             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 805         if idxs:
 806             correct_argv = (
 807                 ['yt-dlp']
 808                 + [a for i, a in enumerate(argv) if i not in idxs]
 809                 + ['--'] + [argv[i] for i in idxs]
 810             )
 811             self.report_warning(
 812                 'Long argument string detected. '
 813                 'Use -- to separate parameters and URLs, like this:\n%s' %
 814                 args_to_str(correct_argv))
 815
 816     def add_info_extractor(self, ie):
 817         """Add an InfoExtractor object to the end of the list."""
 818         ie_key = ie.ie_key()
 819         self._ies[ie_key] = ie
 820         if not isinstance(ie, type):
 821             self._ies_instances[ie_key] = ie
 822             ie.set_downloader(self)
 823
 824     def get_info_extractor(self, ie_key):
 825         """
 826         Get an instance of an IE with name ie_key, it will try to get one from
 827         the _ies list, if there's no instance it will create a new one and add
 828         it to the extractor list.
 829         """
 830         ie = self._ies_instances.get(ie_key)
 831         if ie is None:
 832             ie = get_info_extractor(ie_key)()
 833             self.add_info_extractor(ie)
 834         return ie
 835
 836     def add_default_info_extractors(self):
 837         """
 838         Add the InfoExtractors returned by gen_extractors to the end of the list
 839         """
 840         all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
 841         all_ies['end'] = UnsupportedURLIE()
 842         try:
 843             ie_names = orderedSet_from_options(
 844                 self.params.get('allowed_extractors', ['default']), {
 845                     'all': list(all_ies),
 846                     'default': [name for name, ie in all_ies.items() if ie._ENABLED],
 847                 }, use_regex=True)
 848         except re.error as e:
 849             raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
 850         for name in ie_names:
 851             self.add_info_extractor(all_ies[name])
 852         self.write_debug(f'Loaded {len(ie_names)} extractors')
 853
 854     def add_post_processor(self, pp, when='post_process'):
 855         """Add a PostProcessor object to the end of the chain."""
 856         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 857         self._pps[when].append(pp)
 858         pp.set_downloader(self)
 859
 860     def add_post_hook(self, ph):
 861         """Add the post hook"""
 862         self._post_hooks.append(ph)
 863
 864     def add_progress_hook(self, ph):
 865         """Add the download progress hook"""
 866         self._progress_hooks.append(ph)
 867
 868     def add_postprocessor_hook(self, ph):
 869         """Add the postprocessing progress hook"""
 870         self._postprocessor_hooks.append(ph)
 871         for pps in self._pps.values():
 872             for pp in pps:
 873                 pp.add_progress_hook(ph)
 874
 875     def _bidi_workaround(self, message):
 876         if not hasattr(self, '_output_channel'):
 877             return message
 878
 879         assert hasattr(self, '_output_process')
 880         assert isinstance(message, str)
 881         line_count = message.count('\n') + 1
 882         self._output_process.stdin.write((message + '\n').encode())
 883         self._output_process.stdin.flush()
 884         res = ''.join(self._output_channel.readline().decode()
 885                       for _ in range(line_count))
 886         return res[:-len('\n')]
 887
 888     def _write_string(self, message, out=None, only_once=False):
 889         if only_once:
 890             if message in self._printed_messages:
 891                 return
 892             self._printed_messages.add(message)
 893         write_string(message, out=out, encoding=self.params.get('encoding'))
 894
 895     def to_stdout(self, message, skip_eol=False, quiet=None):
 896         """Print message to stdout"""
 897         if quiet is not None:
 898             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
 899                                      'Use "YoutubeDL.to_screen" instead')
 900         if skip_eol is not False:
 901             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
 902                                      'Use "YoutubeDL.to_screen" instead')
 903         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 904
 905     def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
 906         """Print message to screen if not in quiet mode"""
 907         if self.params.get('logger'):
 908             self.params['logger'].debug(message)
 909             return
 910         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 911             return
 912         self._write_string(
 913             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 914             self._out_files.screen, only_once=only_once)
 915
 916     def to_stderr(self, message, only_once=False):
 917         """Print message to stderr"""
 918         assert isinstance(message, str)
 919         if self.params.get('logger'):
 920             self.params['logger'].error(message)
 921         else:
 922             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 923
 924     def _send_console_code(self, code):
 925         if compat_os_name == 'nt' or not self._out_files.console:
 926             return
 927         self._write_string(code, self._out_files.console)
 928
 929     def to_console_title(self, message):
 930         if not self.params.get('consoletitle', False):
 931             return
 932         message = remove_terminal_sequences(message)
 933         if compat_os_name == 'nt':
 934             if ctypes.windll.kernel32.GetConsoleWindow():
 935                 # c_wchar_p() might not be necessary if `message` is
 936                 # already of type unicode()
 937                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 938         else:
 939             self._send_console_code(f'\033]0;{message}\007')
 940
 941     def save_console_title(self):
 942         if not self.params.get('consoletitle') or self.params.get('simulate'):
 943             return
 944         self._send_console_code('\033[22;0t')  # Save the title on stack
 945
 946     def restore_console_title(self):
 947         if not self.params.get('consoletitle') or self.params.get('simulate'):
 948             return
 949         self._send_console_code('\033[23;0t')  # Restore the title from stack
 950
 951     def __enter__(self):
 952         self.save_console_title()
 953         return self
 954
 955     def save_cookies(self):
 956         if self.params.get('cookiefile') is not None:
 957             self.cookiejar.save()
 958
 959     def __exit__(self, *args):
 960         self.restore_console_title()
 961         self.close()
 962
 963     def close(self):
 964         self.save_cookies()
 965         if '_request_director' in self.__dict__:
 966             self._request_director.close()
 967             del self._request_director
 968
 969     def trouble(self, message=None, tb=None, is_error=True):
 970         """Determine action to take when a download problem appears.
 971
 972         Depending on if the downloader has been configured to ignore
 973         download errors or not, this method may throw an exception or
 974         not when errors are found, after printing the message.
 975
 976         @param tb          If given, is additional traceback information
 977         @param is_error    Whether to raise error according to ignorerrors
 978         """
 979         if message is not None:
 980             self.to_stderr(message)
 981         if self.params.get('verbose'):
 982             if tb is None:
 983                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 984                     tb = ''
 985                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 986                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 987                     tb += encode_compat_str(traceback.format_exc())
 988                 else:
 989                     tb_data = traceback.format_list(traceback.extract_stack())
 990                     tb = ''.join(tb_data)
 991             if tb:
 992                 self.to_stderr(tb)
 993         if not is_error:
 994             return
 995         if not self.params.get('ignoreerrors'):
 996             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 997                 exc_info = sys.exc_info()[1].exc_info
 998             else:
 999                 exc_info = sys.exc_info()
1000             raise DownloadError(message, exc_info)
1001         self._download_retcode = 1
1002
1003     Styles = Namespace(
1004         HEADERS='yellow',
1005         EMPHASIS='light blue',
1006         FILENAME='green',
1007         ID='green',
1008         DELIM='blue',
1009         ERROR='red',
1010         BAD_FORMAT='light red',
1011         WARNING='yellow',
1012         SUPPRESS='light black',
1013     )
1014
1015     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1016         text = str(text)
1017         if test_encoding:
1018             original_text = text
1019             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1020             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1021             text = text.encode(encoding, 'ignore').decode(encoding)
1022             if fallback is not None and text != original_text:
1023                 text = fallback
1024         return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1025
1026     def _format_out(self, *args, **kwargs):
1027         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1028
1029     def _format_screen(self, *args, **kwargs):
1030         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1031
1032     def _format_err(self, *args, **kwargs):
1033         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1034
1035     def report_warning(self, message, only_once=False):
1036         '''
1037         Print the message to stderr, it will be prefixed with 'WARNING:'
1038         If stderr is a tty file the 'WARNING:' will be colored
1039         '''
1040         if self.params.get('logger') is not None:
1041             self.params['logger'].warning(message)
1042         else:
1043             if self.params.get('no_warnings'):
1044                 return
1045             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1046
1047     def deprecation_warning(self, message, *, stacklevel=0):
1048         deprecation_warning(
1049             message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1050
1051     def deprecated_feature(self, message):
1052         if self.params.get('logger') is not None:
1053             self.params['logger'].warning(f'Deprecated Feature: {message}')
1054         self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1055
1056     def report_error(self, message, *args, **kwargs):
1057         '''
1058         Do the same as trouble, but prefixes the message with 'ERROR:', colored
1059         in red if stderr is a tty file.
1060         '''
1061         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1062
1063     def write_debug(self, message, only_once=False):
1064         '''Log debug message or Print message to stderr'''
1065         if not self.params.get('verbose', False):
1066             return
1067         message = f'[debug] {message}'
1068         if self.params.get('logger'):
1069             self.params['logger'].debug(message)
1070         else:
1071             self.to_stderr(message, only_once)
1072
1073     def report_file_already_downloaded(self, file_name):
1074         """Report file has already been fully downloaded."""
1075         try:
1076             self.to_screen('[download] %s has already been downloaded' % file_name)
1077         except UnicodeEncodeError:
1078             self.to_screen('[download] The file has already been downloaded')
1079
1080     def report_file_delete(self, file_name):
1081         """Report that existing file will be deleted."""
1082         try:
1083             self.to_screen('Deleting existing file %s' % file_name)
1084         except UnicodeEncodeError:
1085             self.to_screen('Deleting existing file')
1086
1087     def raise_no_formats(self, info, forced=False, *, msg=None):
1088         has_drm = info.get('_has_drm')
1089         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1090         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1091         if forced or not ignored:
1092             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1093                                  expected=has_drm or ignored or expected)
1094         else:
1095             self.report_warning(msg)
1096
1097     def parse_outtmpl(self):
1098         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1099         self._parse_outtmpl()
1100         return self.params['outtmpl']
1101
1102     def _parse_outtmpl(self):
1103         sanitize = IDENTITY
1104         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1105             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1106
1107         outtmpl = self.params.setdefault('outtmpl', {})
1108         if not isinstance(outtmpl, dict):
1109             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1110         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1111
1112     def get_output_path(self, dir_type='', filename=None):
1113         paths = self.params.get('paths', {})
1114         assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1115         path = os.path.join(
1116             expand_path(paths.get('home', '').strip()),
1117             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1118             filename or '')
1119         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1120
1121     @staticmethod
1122     def _outtmpl_expandpath(outtmpl):
1123         # expand_path translates '%%' into '%' and '$$' into '$'
1124         # correspondingly that is not what we want since we need to keep
1125         # '%%' intact for template dict substitution step. Working around
1126         # with boundary-alike separator hack.
1127         sep = ''.join(random.choices(string.ascii_letters, k=32))
1128         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1129
1130         # outtmpl should be expand_path'ed before template dict substitution
1131         # because meta fields may contain env variables we don't want to
1132         # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1133         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1134         return expand_path(outtmpl).replace(sep, '')
1135
1136     @staticmethod
1137     def escape_outtmpl(outtmpl):
1138         ''' Escape any remaining strings like %s, %abc% etc. '''
1139         return re.sub(
1140             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1141             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1142             outtmpl)
1143
1144     @classmethod
1145     def validate_outtmpl(cls, outtmpl):
1146         ''' @return None or Exception object '''
1147         outtmpl = re.sub(
1148             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1149             lambda mobj: f'{mobj.group(0)[:-1]}s',
1150             cls._outtmpl_expandpath(outtmpl))
1151         try:
1152             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1153             return None
1154         except ValueError as err:
1155             return err
1156
1157     @staticmethod
1158     def _copy_infodict(info_dict):
1159         info_dict = dict(info_dict)
1160         info_dict.pop('__postprocessors', None)
1161         info_dict.pop('__pending_error', None)
1162         return info_dict
1163
1164     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1165         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1166         @param sanitize    Whether to sanitize the output as a filename.
1167                            For backward compatibility, a function can also be passed
1168         """
1169
1170         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1171
1172         info_dict = self._copy_infodict(info_dict)
1173         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1174             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1175             if info_dict.get('duration', None) is not None
1176             else None)
1177         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1178         info_dict['video_autonumber'] = self._num_videos
1179         if info_dict.get('resolution') is None:
1180             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1181
1182         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1183         # of %(field)s to %(field)0Nd for backward compatibility
1184         field_size_compat_map = {
1185             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1186             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1187             'autonumber': self.params.get('autonumber_size') or 5,
1188         }
1189
1190         TMPL_DICT = {}
1191         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1192         MATH_FUNCTIONS = {
1193             '+': float.__add__,
1194             '-': float.__sub__,
1195             '*': float.__mul__,
1196         }
1197         # Field is of the form key1.key2...
1198         # where keys (except first) can be string, int, slice or "{field, ...}"
1199         FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1200         FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1201             'inner': FIELD_INNER_RE,
1202             'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1203         }
1204         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1205         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1206         INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1207             (?P<negate>-)?
1208             (?P<fields>{FIELD_RE})
1209             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1210             (?:>(?P<strf_format>.+?))?
1211             (?P<remaining>
1212                 (?P<alternate>(?<!\\),[^|&)]+)?
1213                 (?:&(?P<replacement>.*?))?
1214                 (?:\|(?P<default>.*?))?
1215             )$''')
1216
1217         def _from_user_input(field):
1218             if field == ':':
1219                 return ...
1220             elif ':' in field:
1221                 return slice(*map(int_or_none, field.split(':')))
1222             elif int_or_none(field) is not None:
1223                 return int(field)
1224             return field
1225
1226         def _traverse_infodict(fields):
1227             fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1228                       for f in ([x] if x.startswith('{') else x.split('.'))]
1229             for i in (0, -1):
1230                 if fields and not fields[i]:
1231                     fields.pop(i)
1232
1233             for i, f in enumerate(fields):
1234                 if not f.startswith('{'):
1235                     fields[i] = _from_user_input(f)
1236                     continue
1237                 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1238                 fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
1239
1240             return traverse_obj(info_dict, fields, traverse_string=True)
1241
1242         def get_value(mdict):
1243             # Object traversal
1244             value = _traverse_infodict(mdict['fields'])
1245             # Negative
1246             if mdict['negate']:
1247                 value = float_or_none(value)
1248                 if value is not None:
1249                     value *= -1
1250             # Do maths
1251             offset_key = mdict['maths']
1252             if offset_key:
1253                 value = float_or_none(value)
1254                 operator = None
1255                 while offset_key:
1256                     item = re.match(
1257                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1258                         offset_key).group(0)
1259                     offset_key = offset_key[len(item):]
1260                     if operator is None:
1261                         operator = MATH_FUNCTIONS[item]
1262                         continue
1263                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1264                     offset = float_or_none(item)
1265                     if offset is None:
1266                         offset = float_or_none(_traverse_infodict(item))
1267                     try:
1268                         value = operator(value, multiplier * offset)
1269                     except (TypeError, ZeroDivisionError):
1270                         return None
1271                     operator = None
1272             # Datetime formatting
1273             if mdict['strf_format']:
1274                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1275
1276             # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1277             if sanitize and value == '':
1278                 value = None
1279             return value
1280
1281         na = self.params.get('outtmpl_na_placeholder', 'NA')
1282
1283         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1284             return sanitize_filename(str(value), restricted=restricted, is_id=(
1285                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1286                 if 'filename-sanitization' in self.params['compat_opts']
1287                 else NO_DEFAULT))
1288
1289         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1290         sanitize = bool(sanitize)
1291
1292         def _dumpjson_default(obj):
1293             if isinstance(obj, (set, LazyList)):
1294                 return list(obj)
1295             return repr(obj)
1296
1297         class _ReplacementFormatter(string.Formatter):
1298             def get_field(self, field_name, args, kwargs):
1299                 if field_name.isdigit():
1300                     return args[0], -1
1301                 raise ValueError('Unsupported field')
1302
1303         replacement_formatter = _ReplacementFormatter()
1304
1305         def create_key(outer_mobj):
1306             if not outer_mobj.group('has_key'):
1307                 return outer_mobj.group(0)
1308             key = outer_mobj.group('key')
1309             mobj = re.match(INTERNAL_FORMAT_RE, key)
1310             value, replacement, default, last_field = None, None, na, ''
1311             while mobj:
1312                 mobj = mobj.groupdict()
1313                 default = mobj['default'] if mobj['default'] is not None else default
1314                 value = get_value(mobj)
1315                 last_field, replacement = mobj['fields'], mobj['replacement']
1316                 if value is None and mobj['alternate']:
1317                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1318                 else:
1319                     break
1320
1321             if None not in (value, replacement):
1322                 try:
1323                     value = replacement_formatter.format(replacement, value)
1324                 except ValueError:
1325                     value, default = None, na
1326
1327             fmt = outer_mobj.group('format')
1328             if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
1329                 fmt = f'0{field_size_compat_map[last_field]:d}d'
1330
1331             flags = outer_mobj.group('conversion') or ''
1332             str_fmt = f'{fmt[:-1]}s'
1333             if value is None:
1334                 value, fmt = default, 's'
1335             elif fmt[-1] == 'l':  # list
1336                 delim = '\n' if '#' in flags else ', '
1337                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1338             elif fmt[-1] == 'j':  # json
1339                 value, fmt = json.dumps(
1340                     value, default=_dumpjson_default,
1341                     indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1342             elif fmt[-1] == 'h':  # html
1343                 value, fmt = escapeHTML(str(value)), str_fmt
1344             elif fmt[-1] == 'q':  # quoted
1345                 value = map(str, variadic(value) if '#' in flags else [value])
1346                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1347             elif fmt[-1] == 'B':  # bytes
1348                 value = f'%{str_fmt}'.encode() % str(value).encode()
1349                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1350             elif fmt[-1] == 'U':  # unicode normalized
1351                 value, fmt = unicodedata.normalize(
1352                     # "+" = compatibility equivalence, "#" = NFD
1353                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1354                     value), str_fmt
1355             elif fmt[-1] == 'D':  # decimal suffix
1356                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1357                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1358                                               factor=1024 if '#' in flags else 1000)
1359             elif fmt[-1] == 'S':  # filename sanitization
1360                 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1361             elif fmt[-1] == 'c':
1362                 if value:
1363                     value = str(value)[0]
1364                 else:
1365                     fmt = str_fmt
1366             elif fmt[-1] not in 'rsa':  # numeric
1367                 value = float_or_none(value)
1368                 if value is None:
1369                     value, fmt = default, 's'
1370
1371             if sanitize:
1372                 # If value is an object, sanitize might convert it to a string
1373                 # So we convert it to repr first
1374                 if fmt[-1] == 'r':
1375                     value, fmt = repr(value), str_fmt
1376                 elif fmt[-1] == 'a':
1377                     value, fmt = ascii(value), str_fmt
1378                 if fmt[-1] in 'csra':
1379                     value = sanitizer(last_field, value)
1380
1381             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1382             TMPL_DICT[key] = value
1383             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1384
1385         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1386
1387     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1388         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1389         return self.escape_outtmpl(outtmpl) % info_dict
1390
1391     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1392         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1393         if outtmpl is None:
1394             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1395         try:
1396             outtmpl = self._outtmpl_expandpath(outtmpl)
1397             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1398             if not filename:
1399                 return None
1400
1401             if tmpl_type in ('', 'temp'):
1402                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1403                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1404                     filename = replace_extension(filename, ext, final_ext)
1405             elif tmpl_type:
1406                 force_ext = OUTTMPL_TYPES[tmpl_type]
1407                 if force_ext:
1408                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1409
1410             # https://github.com/blackjack4494/youtube-dlc/issues/85
1411             trim_file_name = self.params.get('trim_file_name', False)
1412             if trim_file_name:
1413                 no_ext, *ext = filename.rsplit('.', 2)
1414                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1415
1416             return filename
1417         except ValueError as err:
1418             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1419             return None
1420
1421     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1422         """Generate the output filename"""
1423         if outtmpl:
1424             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1425             dir_type = None
1426         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1427         if not filename and dir_type not in ('', 'temp'):
1428             return ''
1429
1430         if warn:
1431             if not self.params.get('paths'):
1432                 pass
1433             elif filename == '-':
1434                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1435             elif os.path.isabs(filename):
1436                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1437         if filename == '-' or not filename:
1438             return filename
1439
1440         return self.get_output_path(dir_type, filename)
1441
1442     def _match_entry(self, info_dict, incomplete=False, silent=False):
1443         """Returns None if the file should be downloaded"""
1444         _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1445         assert incomplete or _type == 'video', 'Only video result can be considered complete'
1446
1447         video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1448
1449         def check_filter():
1450             if _type in ('playlist', 'multi_video'):
1451                 return
1452             elif _type in ('url', 'url_transparent') and not try_call(
1453                     lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1454                 return
1455
1456             if 'title' in info_dict:
1457                 # This can happen when we're just evaluating the playlist
1458                 title = info_dict['title']
1459                 matchtitle = self.params.get('matchtitle', False)
1460                 if matchtitle:
1461                     if not re.search(matchtitle, title, re.IGNORECASE):
1462                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1463                 rejecttitle = self.params.get('rejecttitle', False)
1464                 if rejecttitle:
1465                     if re.search(rejecttitle, title, re.IGNORECASE):
1466                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1467
1468             date = info_dict.get('upload_date')
1469             if date is not None:
1470                 dateRange = self.params.get('daterange', DateRange())
1471                 if date not in dateRange:
1472                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1473             view_count = info_dict.get('view_count')
1474             if view_count is not None:
1475                 min_views = self.params.get('min_views')
1476                 if min_views is not None and view_count < min_views:
1477                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1478                 max_views = self.params.get('max_views')
1479                 if max_views is not None and view_count > max_views:
1480                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1481             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1482                 return 'Skipping "%s" because it is age restricted' % video_title
1483
1484             match_filter = self.params.get('match_filter')
1485             if match_filter is None:
1486                 return None
1487
1488             cancelled = None
1489             try:
1490                 try:
1491                     ret = match_filter(info_dict, incomplete=incomplete)
1492                 except TypeError:
1493                     # For backward compatibility
1494                     ret = None if incomplete else match_filter(info_dict)
1495             except DownloadCancelled as err:
1496                 if err.msg is not NO_DEFAULT:
1497                     raise
1498                 ret, cancelled = err.msg, err
1499
1500             if ret is NO_DEFAULT:
1501                 while True:
1502                     filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1503                     reply = input(self._format_screen(
1504                         f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1505                     if reply in {'y', ''}:
1506                         return None
1507                     elif reply == 'n':
1508                         if cancelled:
1509                             raise type(cancelled)(f'Skipping {video_title}')
1510                         return f'Skipping {video_title}'
1511             return ret
1512
1513         if self.in_download_archive(info_dict):
1514             reason = ''.join((
1515                 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1516                 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1517                 'has already been recorded in the archive'))
1518             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1519         else:
1520             try:
1521                 reason = check_filter()
1522             except DownloadCancelled as e:
1523                 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1524             else:
1525                 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1526         if reason is not None:
1527             if not silent:
1528                 self.to_screen('[download] ' + reason)
1529             if self.params.get(break_opt, False):
1530                 raise break_err()
1531         return reason
1532
1533     @staticmethod
1534     def add_extra_info(info_dict, extra_info):
1535         '''Set the keys from extra_info in info dict if they are missing'''
1536         for key, value in extra_info.items():
1537             info_dict.setdefault(key, value)
1538
1539     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1540                      process=True, force_generic_extractor=False):
1541         """
1542         Extract and return the information dictionary of the URL
1543
1544         Arguments:
1545         @param url          URL to extract
1546
1547         Keyword arguments:
1548         @param download     Whether to download videos
1549         @param process      Whether to resolve all unresolved references (URLs, playlist items).
1550                             Must be True for download to work
1551         @param ie_key       Use only the extractor with this key
1552
1553         @param extra_info   Dictionary containing the extra values to add to the info (For internal use only)
1554         @force_generic_extractor  Force using the generic extractor (Deprecated; use ie_key='Generic')
1555         """
1556
1557         if extra_info is None:
1558             extra_info = {}
1559
1560         if not ie_key and force_generic_extractor:
1561             ie_key = 'Generic'
1562
1563         if ie_key:
1564             ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1565         else:
1566             ies = self._ies
1567
1568         for key, ie in ies.items():
1569             if not ie.suitable(url):
1570                 continue
1571
1572             if not ie.working():
1573                 self.report_warning('The program functionality for this site has been marked as broken, '
1574                                     'and will probably not work.')
1575
1576             temp_id = ie.get_temp_id(url)
1577             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1578                 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1579                                'has already been recorded in the archive')
1580                 if self.params.get('break_on_existing', False):
1581                     raise ExistingVideoReached()
1582                 break
1583             return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1584         else:
1585             extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1586             self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1587                               tb=False if extractors_restricted else None)
1588
1589     def _handle_extraction_exceptions(func):
1590         @functools.wraps(func)
1591         def wrapper(self, *args, **kwargs):
1592             while True:
1593                 try:
1594                     return func(self, *args, **kwargs)
1595                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1596                     raise
1597                 except ReExtractInfo as e:
1598                     if e.expected:
1599                         self.to_screen(f'{e}; Re-extracting data')
1600                     else:
1601                         self.to_stderr('\r')
1602                         self.report_warning(f'{e}; Re-extracting data')
1603                     continue
1604                 except GeoRestrictedError as e:
1605                     msg = e.msg
1606                     if e.countries:
1607                         msg += '\nThis video is available in %s.' % ', '.join(
1608                             map(ISO3166Utils.short2full, e.countries))
1609                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1610                     self.report_error(msg)
1611                 except ExtractorError as e:  # An error we somewhat expected
1612                     self.report_error(str(e), e.format_traceback())
1613                 except Exception as e:
1614                     if self.params.get('ignoreerrors'):
1615                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1616                     else:
1617                         raise
1618                 break
1619         return wrapper
1620
1621     def _wait_for_video(self, ie_result={}):
1622         if (not self.params.get('wait_for_video')
1623                 or ie_result.get('_type', 'video') != 'video'
1624                 or ie_result.get('formats') or ie_result.get('url')):
1625             return
1626
1627         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1628         last_msg = ''
1629
1630         def progress(msg):
1631             nonlocal last_msg
1632             full_msg = f'{msg}\n'
1633             if not self.params.get('noprogress'):
1634                 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1635             elif last_msg:
1636                 return
1637             self.to_screen(full_msg, skip_eol=True)
1638             last_msg = msg
1639
1640         min_wait, max_wait = self.params.get('wait_for_video')
1641         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1642         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1643             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1644             self.report_warning('Release time of video is not known')
1645         elif ie_result and (diff or 0) <= 0:
1646             self.report_warning('Video should already be available according to extracted info')
1647         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1648         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1649
1650         wait_till = time.time() + diff
1651         try:
1652             while True:
1653                 diff = wait_till - time.time()
1654                 if diff <= 0:
1655                     progress('')
1656                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1657                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1658                 time.sleep(1)
1659         except KeyboardInterrupt:
1660             progress('')
1661             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1662         except BaseException as e:
1663             if not isinstance(e, ReExtractInfo):
1664                 self.to_screen('')
1665             raise
1666
1667     def _load_cookies(self, data, *, autoscope=True):
1668         """Loads cookies from a `Cookie` header
1669
1670         This tries to work around the security vulnerability of passing cookies to every domain.
1671         See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1672
1673         @param data         The Cookie header as string to load the cookies from
1674         @param autoscope    If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1675                             If `True`, save cookies for later to be stored in the jar with a limited scope
1676                             If a URL, save cookies in the jar with the domain of the URL
1677         """
1678         for cookie in LenientSimpleCookie(data).values():
1679             if autoscope and any(cookie.values()):
1680                 raise ValueError('Invalid syntax in Cookie Header')
1681
1682             domain = cookie.get('domain') or ''
1683             expiry = cookie.get('expires')
1684             if expiry == '':  # 0 is valid
1685                 expiry = None
1686             prepared_cookie = http.cookiejar.Cookie(
1687                 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1688                 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1689                 cookie.get('secure') or False, expiry, False, None, None, {})
1690
1691             if domain:
1692                 self.cookiejar.set_cookie(prepared_cookie)
1693             elif autoscope is True:
1694                 self.deprecated_feature(
1695                     'Passing cookies as a header is a potential security risk; '
1696                     'they will be scoped to the domain of the downloaded urls. '
1697                     'Please consider loading cookies from a file or browser instead.')
1698                 self.__header_cookies.append(prepared_cookie)
1699             elif autoscope:
1700                 self.report_warning(
1701                     'The extractor result contains an unscoped cookie as an HTTP header. '
1702                     f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1703                     only_once=True)
1704                 self._apply_header_cookies(autoscope, [prepared_cookie])
1705             else:
1706                 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1707                                   tb=False, is_error=False)
1708
1709     def _apply_header_cookies(self, url, cookies=None):
1710         """Applies stray header cookies to the provided url
1711
1712         This loads header cookies and scopes them to the domain provided in `url`.
1713         While this is not ideal, it helps reduce the risk of them being sent
1714         to an unintended destination while mostly maintaining compatibility.
1715         """
1716         parsed = urllib.parse.urlparse(url)
1717         if not parsed.hostname:
1718             return
1719
1720         for cookie in map(copy.copy, cookies or self.__header_cookies):
1721             cookie.domain = f'.{parsed.hostname}'
1722             self.cookiejar.set_cookie(cookie)
1723
1724     @_handle_extraction_exceptions
1725     def __extract_info(self, url, ie, download, extra_info, process):
1726         self._apply_header_cookies(url)
1727
1728         try:
1729             ie_result = ie.extract(url)
1730         except UserNotLive as e:
1731             if process:
1732                 if self.params.get('wait_for_video'):
1733                     self.report_warning(e)
1734                 self._wait_for_video()
1735             raise
1736         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1737             self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1738             return
1739         if isinstance(ie_result, list):
1740             # Backwards compatibility: old IE result format
1741             ie_result = {
1742                 '_type': 'compat_list',
1743                 'entries': ie_result,
1744             }
1745         if extra_info.get('original_url'):
1746             ie_result.setdefault('original_url', extra_info['original_url'])
1747         self.add_default_extra_info(ie_result, ie, url)
1748         if process:
1749             self._wait_for_video(ie_result)
1750             return self.process_ie_result(ie_result, download, extra_info)
1751         else:
1752             return ie_result
1753
1754     def add_default_extra_info(self, ie_result, ie, url):
1755         if url is not None:
1756             self.add_extra_info(ie_result, {
1757                 'webpage_url': url,
1758                 'original_url': url,
1759             })
1760         webpage_url = ie_result.get('webpage_url')
1761         if webpage_url:
1762             self.add_extra_info(ie_result, {
1763                 'webpage_url_basename': url_basename(webpage_url),
1764                 'webpage_url_domain': get_domain(webpage_url),
1765             })
1766         if ie is not None:
1767             self.add_extra_info(ie_result, {
1768                 'extractor': ie.IE_NAME,
1769                 'extractor_key': ie.ie_key(),
1770             })
1771
1772     def process_ie_result(self, ie_result, download=True, extra_info=None):
1773         """
1774         Take the result of the ie(may be modified) and resolve all unresolved
1775         references (URLs, playlist items).
1776
1777         It will also download the videos if 'download'.
1778         Returns the resolved ie_result.
1779         """
1780         if extra_info is None:
1781             extra_info = {}
1782         result_type = ie_result.get('_type', 'video')
1783
1784         if result_type in ('url', 'url_transparent'):
1785             ie_result['url'] = sanitize_url(
1786                 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1787             if ie_result.get('original_url') and not extra_info.get('original_url'):
1788                 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1789
1790             extract_flat = self.params.get('extract_flat', False)
1791             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1792                     or extract_flat is True):
1793                 info_copy = ie_result.copy()
1794                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1795                 if ie and not ie_result.get('id'):
1796                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1797                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1798                 self.add_extra_info(info_copy, extra_info)
1799                 info_copy, _ = self.pre_process(info_copy)
1800                 self._fill_common_fields(info_copy, False)
1801                 self.__forced_printings(info_copy)
1802                 self._raise_pending_errors(info_copy)
1803                 if self.params.get('force_write_download_archive', False):
1804                     self.record_download_archive(info_copy)
1805                 return ie_result
1806
1807         if result_type == 'video':
1808             self.add_extra_info(ie_result, extra_info)
1809             ie_result = self.process_video_result(ie_result, download=download)
1810             self._raise_pending_errors(ie_result)
1811             additional_urls = (ie_result or {}).get('additional_urls')
1812             if additional_urls:
1813                 # TODO: Improve MetadataParserPP to allow setting a list
1814                 if isinstance(additional_urls, str):
1815                     additional_urls = [additional_urls]
1816                 self.to_screen(
1817                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1818                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1819                 ie_result['additional_entries'] = [
1820                     self.extract_info(
1821                         url, download, extra_info=extra_info,
1822                         force_generic_extractor=self.params.get('force_generic_extractor'))
1823                     for url in additional_urls
1824                 ]
1825             return ie_result
1826         elif result_type == 'url':
1827             # We have to add extra_info to the results because it may be
1828             # contained in a playlist
1829             return self.extract_info(
1830                 ie_result['url'], download,
1831                 ie_key=ie_result.get('ie_key'),
1832                 extra_info=extra_info)
1833         elif result_type == 'url_transparent':
1834             # Use the information from the embedding page
1835             info = self.extract_info(
1836                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1837                 extra_info=extra_info, download=False, process=False)
1838
1839             # extract_info may return None when ignoreerrors is enabled and
1840             # extraction failed with an error, don't crash and return early
1841             # in this case
1842             if not info:
1843                 return info
1844
1845             exempted_fields = {'_type', 'url', 'ie_key'}
1846             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1847                 # For video clips, the id etc of the clip extractor should be used
1848                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1849
1850             new_result = info.copy()
1851             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1852
1853             # Extracted info may not be a video result (i.e.
1854             # info.get('_type', 'video') != video) but rather an url or
1855             # url_transparent. In such cases outer metadata (from ie_result)
1856             # should be propagated to inner one (info). For this to happen
1857             # _type of info should be overridden with url_transparent. This
1858             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1859             if new_result.get('_type') == 'url':
1860                 new_result['_type'] = 'url_transparent'
1861
1862             return self.process_ie_result(
1863                 new_result, download=download, extra_info=extra_info)
1864         elif result_type in ('playlist', 'multi_video'):
1865             # Protect from infinite recursion due to recursively nested playlists
1866             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1867             webpage_url = ie_result.get('webpage_url')  # Playlists maynot have webpage_url
1868             if webpage_url and webpage_url in self._playlist_urls:
1869                 self.to_screen(
1870                     '[download] Skipping already downloaded playlist: %s'
1871                     % ie_result.get('title') or ie_result.get('id'))
1872                 return
1873
1874             self._playlist_level += 1
1875             self._playlist_urls.add(webpage_url)
1876             self._fill_common_fields(ie_result, False)
1877             self._sanitize_thumbnails(ie_result)
1878             try:
1879                 return self.__process_playlist(ie_result, download)
1880             finally:
1881                 self._playlist_level -= 1
1882                 if not self._playlist_level:
1883                     self._playlist_urls.clear()
1884         elif result_type == 'compat_list':
1885             self.report_warning(
1886                 'Extractor %s returned a compat_list result. '
1887                 'It needs to be updated.' % ie_result.get('extractor'))
1888
1889             def _fixup(r):
1890                 self.add_extra_info(r, {
1891                     'extractor': ie_result['extractor'],
1892                     'webpage_url': ie_result['webpage_url'],
1893                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1894                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1895                     'extractor_key': ie_result['extractor_key'],
1896                 })
1897                 return r
1898             ie_result['entries'] = [
1899                 self.process_ie_result(_fixup(r), download, extra_info)
1900                 for r in ie_result['entries']
1901             ]
1902             return ie_result
1903         else:
1904             raise Exception('Invalid result type: %s' % result_type)
1905
1906     def _ensure_dir_exists(self, path):
1907         return make_dir(path, self.report_error)
1908
1909     @staticmethod
1910     def _playlist_infodict(ie_result, strict=False, **kwargs):
1911         info = {
1912             'playlist_count': ie_result.get('playlist_count'),
1913             'playlist': ie_result.get('title') or ie_result.get('id'),
1914             'playlist_id': ie_result.get('id'),
1915             'playlist_title': ie_result.get('title'),
1916             'playlist_uploader': ie_result.get('uploader'),
1917             'playlist_uploader_id': ie_result.get('uploader_id'),
1918             **kwargs,
1919         }
1920         if strict:
1921             return info
1922         if ie_result.get('webpage_url'):
1923             info.update({
1924                 'webpage_url': ie_result['webpage_url'],
1925                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1926                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1927             })
1928         return {
1929             **info,
1930             'playlist_index': 0,
1931             '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1932             'extractor': ie_result['extractor'],
1933             'extractor_key': ie_result['extractor_key'],
1934         }
1935
1936     def __process_playlist(self, ie_result, download):
1937         """Process each entry in the playlist"""
1938         assert ie_result['_type'] in ('playlist', 'multi_video')
1939
1940         common_info = self._playlist_infodict(ie_result, strict=True)
1941         title = common_info.get('playlist') or '<Untitled>'
1942         if self._match_entry(common_info, incomplete=True) is not None:
1943             return
1944         self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1945
1946         all_entries = PlaylistEntries(self, ie_result)
1947         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1948
1949         lazy = self.params.get('lazy_playlist')
1950         if lazy:
1951             resolved_entries, n_entries = [], 'N/A'
1952             ie_result['requested_entries'], ie_result['entries'] = None, None
1953         else:
1954             entries = resolved_entries = list(entries)
1955             n_entries = len(resolved_entries)
1956             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1957         if not ie_result.get('playlist_count'):
1958             # Better to do this after potentially exhausting entries
1959             ie_result['playlist_count'] = all_entries.get_full_count()
1960
1961         extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1962         ie_copy = collections.ChainMap(ie_result, extra)
1963
1964         _infojson_written = False
1965         write_playlist_files = self.params.get('allow_playlist_files', True)
1966         if write_playlist_files and self.params.get('list_thumbnails'):
1967             self.list_thumbnails(ie_result)
1968         if write_playlist_files and not self.params.get('simulate'):
1969             _infojson_written = self._write_info_json(
1970                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1971             if _infojson_written is None:
1972                 return
1973             if self._write_description('playlist', ie_result,
1974                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1975                 return
1976             # TODO: This should be passed to ThumbnailsConvertor if necessary
1977             self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1978
1979         if lazy:
1980             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1981                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1982         elif self.params.get('playlistreverse'):
1983             entries.reverse()
1984         elif self.params.get('playlistrandom'):
1985             random.shuffle(entries)
1986
1987         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1988                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1989
1990         keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1991         if self.params.get('extract_flat') == 'discard_in_playlist':
1992             keep_resolved_entries = ie_result['_type'] != 'playlist'
1993         if keep_resolved_entries:
1994             self.write_debug('The information of all playlist entries will be held in memory')
1995
1996         failures = 0
1997         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1998         for i, (playlist_index, entry) in enumerate(entries):
1999             if lazy:
2000                 resolved_entries.append((playlist_index, entry))
2001             if not entry:
2002                 continue
2003
2004             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
2005             if not lazy and 'playlist-index' in self.params['compat_opts']:
2006                 playlist_index = ie_result['requested_entries'][i]
2007
2008             entry_copy = collections.ChainMap(entry, {
2009                 **common_info,
2010                 'n_entries': int_or_none(n_entries),
2011                 'playlist_index': playlist_index,
2012                 'playlist_autonumber': i + 1,
2013             })
2014
2015             if self._match_entry(entry_copy, incomplete=True) is not None:
2016                 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
2017                 resolved_entries[i] = (playlist_index, NO_DEFAULT)
2018                 continue
2019
2020             self.to_screen('[download] Downloading item %s of %s' % (
2021                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
2022
2023             entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2024                 'playlist_index': playlist_index,
2025                 'playlist_autonumber': i + 1,
2026             }, extra))
2027             if not entry_result:
2028                 failures += 1
2029             if failures >= max_failures:
2030                 self.report_error(
2031                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2032                 break
2033             if keep_resolved_entries:
2034                 resolved_entries[i] = (playlist_index, entry_result)
2035
2036         # Update with processed data
2037         ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2038         ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2039         if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2040             # Do not set for full playlist
2041             ie_result.pop('requested_entries')
2042
2043         # Write the updated info to json
2044         if _infojson_written is True and self._write_info_json(
2045                 'updated playlist', ie_result,
2046                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2047             return
2048
2049         ie_result = self.run_all_pps('playlist', ie_result)
2050         self.to_screen(f'[download] Finished downloading playlist: {title}')
2051         return ie_result
2052
2053     @_handle_extraction_exceptions
2054     def __process_iterable_entry(self, entry, download, extra_info):
2055         return self.process_ie_result(
2056             entry, download=download, extra_info=extra_info)
2057
2058     def _build_format_filter(self, filter_spec):
2059         " Returns a function to filter the formats according to the filter_spec "
2060
2061         OPERATORS = {
2062             '<': operator.lt,
2063             '<=': operator.le,
2064             '>': operator.gt,
2065             '>=': operator.ge,
2066             '=': operator.eq,
2067             '!=': operator.ne,
2068         }
2069         operator_rex = re.compile(r'''(?x)\s*
2070             (?P<key>[\w.-]+)\s*
2071             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2072             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2073             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2074         m = operator_rex.fullmatch(filter_spec)
2075         if m:
2076             try:
2077                 comparison_value = int(m.group('value'))
2078             except ValueError:
2079                 comparison_value = parse_filesize(m.group('value'))
2080                 if comparison_value is None:
2081                     comparison_value = parse_filesize(m.group('value') + 'B')
2082                 if comparison_value is None:
2083                     raise ValueError(
2084                         'Invalid value %r in format specification %r' % (
2085                             m.group('value'), filter_spec))
2086             op = OPERATORS[m.group('op')]
2087
2088         if not m:
2089             STR_OPERATORS = {
2090                 '=': operator.eq,
2091                 '^=': lambda attr, value: attr.startswith(value),
2092                 '$=': lambda attr, value: attr.endswith(value),
2093                 '*=': lambda attr, value: value in attr,
2094                 '~=': lambda attr, value: value.search(attr) is not None
2095             }
2096             str_operator_rex = re.compile(r'''(?x)\s*
2097                 (?P<key>[a-zA-Z0-9._-]+)\s*
2098                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2099                 (?P<quote>["'])?
2100                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2101                 (?(quote)(?P=quote))\s*
2102                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2103             m = str_operator_rex.fullmatch(filter_spec)
2104             if m:
2105                 if m.group('op') == '~=':
2106                     comparison_value = re.compile(m.group('value'))
2107                 else:
2108                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2109                 str_op = STR_OPERATORS[m.group('op')]
2110                 if m.group('negation'):
2111                     op = lambda attr, value: not str_op(attr, value)
2112                 else:
2113                     op = str_op
2114
2115         if not m:
2116             raise SyntaxError('Invalid filter specification %r' % filter_spec)
2117
2118         def _filter(f):
2119             actual_value = f.get(m.group('key'))
2120             if actual_value is None:
2121                 return m.group('none_inclusive')
2122             return op(actual_value, comparison_value)
2123         return _filter
2124
2125     def _check_formats(self, formats):
2126         for f in formats:
2127             self.to_screen('[info] Testing format %s' % f['format_id'])
2128             path = self.get_output_path('temp')
2129             if not self._ensure_dir_exists(f'{path}/'):
2130                 continue
2131             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2132             temp_file.close()
2133             try:
2134                 success, _ = self.dl(temp_file.name, f, test=True)
2135             except (DownloadError, OSError, ValueError) + network_exceptions:
2136                 success = False
2137             finally:
2138                 if os.path.exists(temp_file.name):
2139                     try:
2140                         os.remove(temp_file.name)
2141                     except OSError:
2142                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2143             if success:
2144                 yield f
2145             else:
2146                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2147
2148     def _default_format_spec(self, info_dict, download=True):
2149
2150         def can_merge():
2151             merger = FFmpegMergerPP(self)
2152             return merger.available and merger.can_merge()
2153
2154         prefer_best = (
2155             not self.params.get('simulate')
2156             and download
2157             and (
2158                 not can_merge()
2159                 or info_dict.get('is_live') and not self.params.get('live_from_start')
2160                 or self.params['outtmpl']['default'] == '-'))
2161         compat = (
2162             prefer_best
2163             or self.params.get('allow_multiple_audio_streams', False)
2164             or 'format-spec' in self.params['compat_opts'])
2165
2166         return (
2167             'best/bestvideo+bestaudio' if prefer_best
2168             else 'bestvideo*+bestaudio/best' if not compat
2169             else 'bestvideo+bestaudio/best')
2170
2171     def build_format_selector(self, format_spec):
2172         def syntax_error(note, start):
2173             message = (
2174                 'Invalid format specification: '
2175                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2176             return SyntaxError(message)
2177
2178         PICKFIRST = 'PICKFIRST'
2179         MERGE = 'MERGE'
2180         SINGLE = 'SINGLE'
2181         GROUP = 'GROUP'
2182         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2183
2184         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2185                                   'video': self.params.get('allow_multiple_video_streams', False)}
2186
2187         def _parse_filter(tokens):
2188             filter_parts = []
2189             for type, string_, start, _, _ in tokens:
2190                 if type == tokenize.OP and string_ == ']':
2191                     return ''.join(filter_parts)
2192                 else:
2193                     filter_parts.append(string_)
2194
2195         def _remove_unused_ops(tokens):
2196             # Remove operators that we don't use and join them with the surrounding strings.
2197             # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2198             ALLOWED_OPS = ('/', '+', ',', '(', ')')
2199             last_string, last_start, last_end, last_line = None, None, None, None
2200             for type, string_, start, end, line in tokens:
2201                 if type == tokenize.OP and string_ == '[':
2202                     if last_string:
2203                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2204                         last_string = None
2205                     yield type, string_, start, end, line
2206                     # everything inside brackets will be handled by _parse_filter
2207                     for type, string_, start, end, line in tokens:
2208                         yield type, string_, start, end, line
2209                         if type == tokenize.OP and string_ == ']':
2210                             break
2211                 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2212                     if last_string:
2213                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2214                         last_string = None
2215                     yield type, string_, start, end, line
2216                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2217                     if not last_string:
2218                         last_string = string_
2219                         last_start = start
2220                         last_end = end
2221                     else:
2222                         last_string += string_
2223             if last_string:
2224                 yield tokenize.NAME, last_string, last_start, last_end, last_line
2225
2226         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2227             selectors = []
2228             current_selector = None
2229             for type, string_, start, _, _ in tokens:
2230                 # ENCODING is only defined in Python 3.x
2231                 if type == getattr(tokenize, 'ENCODING', None):
2232                     continue
2233                 elif type in [tokenize.NAME, tokenize.NUMBER]:
2234                     current_selector = FormatSelector(SINGLE, string_, [])
2235                 elif type == tokenize.OP:
2236                     if string_ == ')':
2237                         if not inside_group:
2238                             # ')' will be handled by the parentheses group
2239                             tokens.restore_last_token()
2240                         break
2241                     elif inside_merge and string_ in ['/', ',']:
2242                         tokens.restore_last_token()
2243                         break
2244                     elif inside_choice and string_ == ',':
2245                         tokens.restore_last_token()
2246                         break
2247                     elif string_ == ',':
2248                         if not current_selector:
2249                             raise syntax_error('"," must follow a format selector', start)
2250                         selectors.append(current_selector)
2251                         current_selector = None
2252                     elif string_ == '/':
2253                         if not current_selector:
2254                             raise syntax_error('"/" must follow a format selector', start)
2255                         first_choice = current_selector
2256                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2257                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2258                     elif string_ == '[':
2259                         if not current_selector:
2260                             current_selector = FormatSelector(SINGLE, 'best', [])
2261                         format_filter = _parse_filter(tokens)
2262                         current_selector.filters.append(format_filter)
2263                     elif string_ == '(':
2264                         if current_selector:
2265                             raise syntax_error('Unexpected "("', start)
2266                         group = _parse_format_selection(tokens, inside_group=True)
2267                         current_selector = FormatSelector(GROUP, group, [])
2268                     elif string_ == '+':
2269                         if not current_selector:
2270                             raise syntax_error('Unexpected "+"', start)
2271                         selector_1 = current_selector
2272                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2273                         if not selector_2:
2274                             raise syntax_error('Expected a selector', start)
2275                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2276                     else:
2277                         raise syntax_error(f'Operator not recognized: "{string_}"', start)
2278                 elif type == tokenize.ENDMARKER:
2279                     break
2280             if current_selector:
2281                 selectors.append(current_selector)
2282             return selectors
2283
2284         def _merge(formats_pair):
2285             format_1, format_2 = formats_pair
2286
2287             formats_info = []
2288             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2289             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2290
2291             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2292                 get_no_more = {'video': False, 'audio': False}
2293                 for (i, fmt_info) in enumerate(formats_info):
2294                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2295                         formats_info.pop(i)
2296                         continue
2297                     for aud_vid in ['audio', 'video']:
2298                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2299                             if get_no_more[aud_vid]:
2300                                 formats_info.pop(i)
2301                                 break
2302                             get_no_more[aud_vid] = True
2303
2304             if len(formats_info) == 1:
2305                 return formats_info[0]
2306
2307             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2308             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2309
2310             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2311             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2312
2313             output_ext = get_compatible_ext(
2314                 vcodecs=[f.get('vcodec') for f in video_fmts],
2315                 acodecs=[f.get('acodec') for f in audio_fmts],
2316                 vexts=[f['ext'] for f in video_fmts],
2317                 aexts=[f['ext'] for f in audio_fmts],
2318                 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2319                              or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2320
2321             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2322
2323             new_dict = {
2324                 'requested_formats': formats_info,
2325                 'format': '+'.join(filtered('format')),
2326                 'format_id': '+'.join(filtered('format_id')),
2327                 'ext': output_ext,
2328                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2329                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2330                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2331                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2332                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2333             }
2334
2335             if the_only_video:
2336                 new_dict.update({
2337                     'width': the_only_video.get('width'),
2338                     'height': the_only_video.get('height'),
2339                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2340                     'fps': the_only_video.get('fps'),
2341                     'dynamic_range': the_only_video.get('dynamic_range'),
2342                     'vcodec': the_only_video.get('vcodec'),
2343                     'vbr': the_only_video.get('vbr'),
2344                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2345                     'aspect_ratio': the_only_video.get('aspect_ratio'),
2346                 })
2347
2348             if the_only_audio:
2349                 new_dict.update({
2350                     'acodec': the_only_audio.get('acodec'),
2351                     'abr': the_only_audio.get('abr'),
2352                     'asr': the_only_audio.get('asr'),
2353                     'audio_channels': the_only_audio.get('audio_channels')
2354                 })
2355
2356             return new_dict
2357
2358         def _check_formats(formats):
2359             if self.params.get('check_formats') == 'selected':
2360                 yield from self._check_formats(formats)
2361                 return
2362             elif (self.params.get('check_formats') is not None
2363                     or self.params.get('allow_unplayable_formats')):
2364                 yield from formats
2365                 return
2366
2367             for f in formats:
2368                 if f.get('has_drm') or f.get('__needs_testing'):
2369                     yield from self._check_formats([f])
2370                 else:
2371                     yield f
2372
2373         def _build_selector_function(selector):
2374             if isinstance(selector, list):  # ,
2375                 fs = [_build_selector_function(s) for s in selector]
2376
2377                 def selector_function(ctx):
2378                     for f in fs:
2379                         yield from f(ctx)
2380                 return selector_function
2381
2382             elif selector.type == GROUP:  # ()
2383                 selector_function = _build_selector_function(selector.selector)
2384
2385             elif selector.type == PICKFIRST:  # /
2386                 fs = [_build_selector_function(s) for s in selector.selector]
2387
2388                 def selector_function(ctx):
2389                     for f in fs:
2390                         picked_formats = list(f(ctx))
2391                         if picked_formats:
2392                             return picked_formats
2393                     return []
2394
2395             elif selector.type == MERGE:  # +
2396                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2397
2398                 def selector_function(ctx):
2399                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2400                         yield _merge(pair)
2401
2402             elif selector.type == SINGLE:  # atom
2403                 format_spec = selector.selector or 'best'
2404
2405                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2406                 if format_spec == 'all':
2407                     def selector_function(ctx):
2408                         yield from _check_formats(ctx['formats'][::-1])
2409                 elif format_spec == 'mergeall':
2410                     def selector_function(ctx):
2411                         formats = list(_check_formats(
2412                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2413                         if not formats:
2414                             return
2415                         merged_format = formats[-1]
2416                         for f in formats[-2::-1]:
2417                             merged_format = _merge((merged_format, f))
2418                         yield merged_format
2419
2420                 else:
2421                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2422                     mobj = re.match(
2423                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2424                         format_spec)
2425                     if mobj is not None:
2426                         format_idx = int_or_none(mobj.group('n'), default=1)
2427                         format_reverse = mobj.group('bw')[0] == 'b'
2428                         format_type = (mobj.group('type') or [None])[0]
2429                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2430                         format_modified = mobj.group('mod') is not None
2431
2432                         format_fallback = not format_type and not format_modified  # for b, w
2433                         _filter_f = (
2434                             (lambda f: f.get('%scodec' % format_type) != 'none')
2435                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2436                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2437                             if format_type  # bv, ba, wv, wa
2438                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2439                             if not format_modified  # b, w
2440                             else lambda f: True)  # b*, w*
2441                         filter_f = lambda f: _filter_f(f) and (
2442                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2443                     else:
2444                         if format_spec in self._format_selection_exts['audio']:
2445                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2446                         elif format_spec in self._format_selection_exts['video']:
2447                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2448                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2449                         elif format_spec in self._format_selection_exts['storyboards']:
2450                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2451                         else:
2452                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2453
2454                     def selector_function(ctx):
2455                         formats = list(ctx['formats'])
2456                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2457                         if not matches:
2458                             if format_fallback and ctx['incomplete_formats']:
2459                                 # for extractors with incomplete formats (audio only (soundcloud)
2460                                 # or video only (imgur)) best/worst will fallback to
2461                                 # best/worst {video,audio}-only format
2462                                 matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
2463                             elif seperate_fallback and not ctx['has_merged_format']:
2464                                 # for compatibility with youtube-dl when there is no pre-merged format
2465                                 matches = list(filter(seperate_fallback, formats))
2466                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2467                         try:
2468                             yield matches[format_idx - 1]
2469                         except LazyList.IndexError:
2470                             return
2471
2472             filters = [self._build_format_filter(f) for f in selector.filters]
2473
2474             def final_selector(ctx):
2475                 ctx_copy = dict(ctx)
2476                 for _filter in filters:
2477                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2478                 return selector_function(ctx_copy)
2479             return final_selector
2480
2481         # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
2482         #       Prefix numbers with random letters to avoid it being classified as a number
2483         #       See: https://github.com/yt-dlp/yt-dlp/pulls/8797
2484         # TODO: Implement parser not reliant on tokenize.tokenize
2485         prefix = ''.join(random.choices(string.ascii_letters, k=32))
2486         stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
2487         try:
2488             tokens = list(_remove_unused_ops(
2489                 token._replace(string=token.string.replace(prefix, ''))
2490                 for token in tokenize.tokenize(stream.readline)))
2491         except tokenize.TokenError:
2492             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2493
2494         class TokenIterator:
2495             def __init__(self, tokens):
2496                 self.tokens = tokens
2497                 self.counter = 0
2498
2499             def __iter__(self):
2500                 return self
2501
2502             def __next__(self):
2503                 if self.counter >= len(self.tokens):
2504                     raise StopIteration()
2505                 value = self.tokens[self.counter]
2506                 self.counter += 1
2507                 return value
2508
2509             next = __next__
2510
2511             def restore_last_token(self):
2512                 self.counter -= 1
2513
2514         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2515         return _build_selector_function(parsed_selector)
2516
2517     def _calc_headers(self, info_dict, load_cookies=False):
2518         res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2519         clean_headers(res)
2520
2521         if load_cookies:  # For --load-info-json
2522             self._load_cookies(res.get('Cookie'), autoscope=info_dict['url'])  # compat
2523             self._load_cookies(info_dict.get('cookies'), autoscope=False)
2524         # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2525         # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2526         res.pop('Cookie', None)
2527         cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2528         if cookies:
2529             encoder = LenientSimpleCookie()
2530             values = []
2531             for cookie in cookies:
2532                 _, value = encoder.value_encode(cookie.value)
2533                 values.append(f'{cookie.name}={value}')
2534                 if cookie.domain:
2535                     values.append(f'Domain={cookie.domain}')
2536                 if cookie.path:
2537                     values.append(f'Path={cookie.path}')
2538                 if cookie.secure:
2539                     values.append('Secure')
2540                 if cookie.expires:
2541                     values.append(f'Expires={cookie.expires}')
2542                 if cookie.version:
2543                     values.append(f'Version={cookie.version}')
2544             info_dict['cookies'] = '; '.join(values)
2545
2546         if 'X-Forwarded-For' not in res:
2547             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2548             if x_forwarded_for_ip:
2549                 res['X-Forwarded-For'] = x_forwarded_for_ip
2550
2551         return res
2552
2553     def _calc_cookies(self, url):
2554         self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2555         return self.cookiejar.get_cookie_header(url)
2556
2557     def _sort_thumbnails(self, thumbnails):
2558         thumbnails.sort(key=lambda t: (
2559             t.get('preference') if t.get('preference') is not None else -1,
2560             t.get('width') if t.get('width') is not None else -1,
2561             t.get('height') if t.get('height') is not None else -1,
2562             t.get('id') if t.get('id') is not None else '',
2563             t.get('url')))
2564
2565     def _sanitize_thumbnails(self, info_dict):
2566         thumbnails = info_dict.get('thumbnails')
2567         if thumbnails is None:
2568             thumbnail = info_dict.get('thumbnail')
2569             if thumbnail:
2570                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2571         if not thumbnails:
2572             return
2573
2574         def check_thumbnails(thumbnails):
2575             for t in thumbnails:
2576                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2577                 try:
2578                     self.urlopen(HEADRequest(t['url']))
2579                 except network_exceptions as err:
2580                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2581                     continue
2582                 yield t
2583
2584         self._sort_thumbnails(thumbnails)
2585         for i, t in enumerate(thumbnails):
2586             if t.get('id') is None:
2587                 t['id'] = '%d' % i
2588             if t.get('width') and t.get('height'):
2589                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2590             t['url'] = sanitize_url(t['url'])
2591
2592         if self.params.get('check_formats') is True:
2593             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2594         else:
2595             info_dict['thumbnails'] = thumbnails
2596
2597     def _fill_common_fields(self, info_dict, final=True):
2598         # TODO: move sanitization here
2599         if final:
2600             title = info_dict['fulltitle'] = info_dict.get('title')
2601             if not title:
2602                 if title == '':
2603                     self.write_debug('Extractor gave empty title. Creating a generic title')
2604                 else:
2605                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2606                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2607
2608         if info_dict.get('duration') is not None:
2609             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2610
2611         for ts_key, date_key in (
2612                 ('timestamp', 'upload_date'),
2613                 ('release_timestamp', 'release_date'),
2614                 ('modified_timestamp', 'modified_date'),
2615         ):
2616             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2617                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2618                 # see http://bugs.python.org/issue1646728)
2619                 with contextlib.suppress(ValueError, OverflowError, OSError):
2620                     upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
2621                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2622
2623         if not info_dict.get('release_year'):
2624             info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
2625
2626         live_keys = ('is_live', 'was_live')
2627         live_status = info_dict.get('live_status')
2628         if live_status is None:
2629             for key in live_keys:
2630                 if info_dict.get(key) is False:
2631                     continue
2632                 if info_dict.get(key):
2633                     live_status = key
2634                 break
2635             if all(info_dict.get(key) is False for key in live_keys):
2636                 live_status = 'not_live'
2637         if live_status:
2638             info_dict['live_status'] = live_status
2639             for key in live_keys:
2640                 if info_dict.get(key) is None:
2641                     info_dict[key] = (live_status == key)
2642         if live_status == 'post_live':
2643             info_dict['was_live'] = True
2644
2645         # Auto generate title fields corresponding to the *_number fields when missing
2646         # in order to always have clean titles. This is very common for TV series.
2647         for field in ('chapter', 'season', 'episode'):
2648             if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2649                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2650
2651         for old_key, new_key in self._deprecated_multivalue_fields.items():
2652             if new_key in info_dict and old_key in info_dict:
2653                 if '_version' not in info_dict:  # HACK: Do not warn when using --load-info-json
2654                     self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
2655             elif old_value := info_dict.get(old_key):
2656                 info_dict[new_key] = old_value.split(', ')
2657             elif new_value := info_dict.get(new_key):
2658                 info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
2659
2660     def _raise_pending_errors(self, info):
2661         err = info.pop('__pending_error', None)
2662         if err:
2663             self.report_error(err, tb=False)
2664
2665     def sort_formats(self, info_dict):
2666         formats = self._get_formats(info_dict)
2667         formats.sort(key=FormatSorter(
2668             self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2669
2670     def process_video_result(self, info_dict, download=True):
2671         assert info_dict.get('_type', 'video') == 'video'
2672         self._num_videos += 1
2673
2674         if 'id' not in info_dict:
2675             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2676         elif not info_dict.get('id'):
2677             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2678
2679         def report_force_conversion(field, field_not, conversion):
2680             self.report_warning(
2681                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2682                 % (field, field_not, conversion))
2683
2684         def sanitize_string_field(info, string_field):
2685             field = info.get(string_field)
2686             if field is None or isinstance(field, str):
2687                 return
2688             report_force_conversion(string_field, 'a string', 'string')
2689             info[string_field] = str(field)
2690
2691         def sanitize_numeric_fields(info):
2692             for numeric_field in self._NUMERIC_FIELDS:
2693                 field = info.get(numeric_field)
2694                 if field is None or isinstance(field, (int, float)):
2695                     continue
2696                 report_force_conversion(numeric_field, 'numeric', 'int')
2697                 info[numeric_field] = int_or_none(field)
2698
2699         sanitize_string_field(info_dict, 'id')
2700         sanitize_numeric_fields(info_dict)
2701         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2702             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2703         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2704             self.report_warning('"duration" field is negative, there is an error in extractor')
2705
2706         chapters = info_dict.get('chapters') or []
2707         if chapters and chapters[0].get('start_time'):
2708             chapters.insert(0, {'start_time': 0})
2709
2710         dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2711         for idx, (prev, current, next_) in enumerate(zip(
2712                 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2713             if current.get('start_time') is None:
2714                 current['start_time'] = prev.get('end_time')
2715             if not current.get('end_time'):
2716                 current['end_time'] = next_.get('start_time')
2717             if not current.get('title'):
2718                 current['title'] = f'<Untitled Chapter {idx}>'
2719
2720         if 'playlist' not in info_dict:
2721             # It isn't part of a playlist
2722             info_dict['playlist'] = None
2723             info_dict['playlist_index'] = None
2724
2725         self._sanitize_thumbnails(info_dict)
2726
2727         thumbnail = info_dict.get('thumbnail')
2728         thumbnails = info_dict.get('thumbnails')
2729         if thumbnail:
2730             info_dict['thumbnail'] = sanitize_url(thumbnail)
2731         elif thumbnails:
2732             info_dict['thumbnail'] = thumbnails[-1]['url']
2733
2734         if info_dict.get('display_id') is None and 'id' in info_dict:
2735             info_dict['display_id'] = info_dict['id']
2736
2737         self._fill_common_fields(info_dict)
2738
2739         for cc_kind in ('subtitles', 'automatic_captions'):
2740             cc = info_dict.get(cc_kind)
2741             if cc:
2742                 for _, subtitle in cc.items():
2743                     for subtitle_format in subtitle:
2744                         if subtitle_format.get('url'):
2745                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2746                         if subtitle_format.get('ext') is None:
2747                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2748
2749         automatic_captions = info_dict.get('automatic_captions')
2750         subtitles = info_dict.get('subtitles')
2751
2752         info_dict['requested_subtitles'] = self.process_subtitles(
2753             info_dict['id'], subtitles, automatic_captions)
2754
2755         formats = self._get_formats(info_dict)
2756
2757         # Backward compatibility with InfoExtractor._sort_formats
2758         field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2759         if field_preference:
2760             info_dict['_format_sort_fields'] = field_preference
2761
2762         info_dict['_has_drm'] = any(  # or None ensures --clean-infojson removes it
2763             f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2764         if not self.params.get('allow_unplayable_formats'):
2765             formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2766
2767         if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2768             self.report_warning(
2769                 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2770                 'only images are available for download. Use --list-formats to see them'.capitalize())
2771
2772         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2773         if not get_from_start:
2774             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2775         if info_dict.get('is_live') and formats:
2776             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2777             if get_from_start and not formats:
2778                 self.raise_no_formats(info_dict, msg=(
2779                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2780                     'If you want to download from the current time, use --no-live-from-start'))
2781
2782         def is_wellformed(f):
2783             url = f.get('url')
2784             if not url:
2785                 self.report_warning(
2786                     '"url" field is missing or empty - skipping format, '
2787                     'there is an error in extractor')
2788                 return False
2789             if isinstance(url, bytes):
2790                 sanitize_string_field(f, 'url')
2791             return True
2792
2793         # Filter out malformed formats for better extraction robustness
2794         formats = list(filter(is_wellformed, formats or []))
2795
2796         if not formats:
2797             self.raise_no_formats(info_dict)
2798
2799         for format in formats:
2800             sanitize_string_field(format, 'format_id')
2801             sanitize_numeric_fields(format)
2802             format['url'] = sanitize_url(format['url'])
2803             if format.get('ext') is None:
2804                 format['ext'] = determine_ext(format['url']).lower()
2805             if format.get('protocol') is None:
2806                 format['protocol'] = determine_protocol(format)
2807             if format.get('resolution') is None:
2808                 format['resolution'] = self.format_resolution(format, default=None)
2809             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2810                 format['dynamic_range'] = 'SDR'
2811             if format.get('aspect_ratio') is None:
2812                 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2813             # For fragmented formats, "tbr" is often max bitrate and not average
2814             if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
2815                     and info_dict.get('duration') and format.get('tbr')
2816                     and not format.get('filesize') and not format.get('filesize_approx')):
2817                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2818             format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
2819
2820         # Safeguard against old/insecure infojson when using --load-info-json
2821         if info_dict.get('http_headers'):
2822             info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2823             info_dict['http_headers'].pop('Cookie', None)
2824
2825         # This is copied to http_headers by the above _calc_headers and can now be removed
2826         if '__x_forwarded_for_ip' in info_dict:
2827             del info_dict['__x_forwarded_for_ip']
2828
2829         self.sort_formats({
2830             'formats': formats,
2831             '_format_sort_fields': info_dict.get('_format_sort_fields')
2832         })
2833
2834         # Sanitize and group by format_id
2835         formats_dict = {}
2836         for i, format in enumerate(formats):
2837             if not format.get('format_id'):
2838                 format['format_id'] = str(i)
2839             else:
2840                 # Sanitize format_id from characters used in format selector expression
2841                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2842             formats_dict.setdefault(format['format_id'], []).append(format)
2843
2844         # Make sure all formats have unique format_id
2845         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2846         for format_id, ambiguous_formats in formats_dict.items():
2847             ambigious_id = len(ambiguous_formats) > 1
2848             for i, format in enumerate(ambiguous_formats):
2849                 if ambigious_id:
2850                     format['format_id'] = '%s-%d' % (format_id, i)
2851                 # Ensure there is no conflict between id and ext in format selection
2852                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2853                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2854                     format['format_id'] = 'f%s' % format['format_id']
2855
2856                 if format.get('format') is None:
2857                     format['format'] = '{id} - {res}{note}'.format(
2858                         id=format['format_id'],
2859                         res=self.format_resolution(format),
2860                         note=format_field(format, 'format_note', ' (%s)'),
2861                     )
2862
2863         if self.params.get('check_formats') is True:
2864             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2865
2866         if not formats or formats[0] is not info_dict:
2867             # only set the 'formats' fields if the original info_dict list them
2868             # otherwise we end up with a circular reference, the first (and unique)
2869             # element in the 'formats' field in info_dict is info_dict itself,
2870             # which can't be exported to json
2871             info_dict['formats'] = formats
2872
2873         info_dict, _ = self.pre_process(info_dict)
2874
2875         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2876             return info_dict
2877
2878         self.post_extract(info_dict)
2879         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2880
2881         # The pre-processors may have modified the formats
2882         formats = self._get_formats(info_dict)
2883
2884         list_only = self.params.get('simulate') == 'list_only'
2885         interactive_format_selection = not list_only and self.format_selector == '-'
2886         if self.params.get('list_thumbnails'):
2887             self.list_thumbnails(info_dict)
2888         if self.params.get('listsubtitles'):
2889             if 'automatic_captions' in info_dict:
2890                 self.list_subtitles(
2891                     info_dict['id'], automatic_captions, 'automatic captions')
2892             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2893         if self.params.get('listformats') or interactive_format_selection:
2894             self.list_formats(info_dict)
2895         if list_only:
2896             # Without this printing, -F --print-json will not work
2897             self.__forced_printings(info_dict)
2898             return info_dict
2899
2900         format_selector = self.format_selector
2901         while True:
2902             if interactive_format_selection:
2903                 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2904                                    + '(Press ENTER for default, or Ctrl+C to quit)'
2905                                    + self._format_screen(': ', self.Styles.EMPHASIS))
2906                 try:
2907                     format_selector = self.build_format_selector(req_format) if req_format else None
2908                 except SyntaxError as err:
2909                     self.report_error(err, tb=False, is_error=False)
2910                     continue
2911
2912             if format_selector is None:
2913                 req_format = self._default_format_spec(info_dict, download=download)
2914                 self.write_debug(f'Default format spec: {req_format}')
2915                 format_selector = self.build_format_selector(req_format)
2916
2917             formats_to_download = list(format_selector({
2918                 'formats': formats,
2919                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2920                 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
2921                                        or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
2922             }))
2923             if interactive_format_selection and not formats_to_download:
2924                 self.report_error('Requested format is not available', tb=False, is_error=False)
2925                 continue
2926             break
2927
2928         if not formats_to_download:
2929             if not self.params.get('ignore_no_formats_error'):
2930                 raise ExtractorError(
2931                     'Requested format is not available. Use --list-formats for a list of available formats',
2932                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2933             self.report_warning('Requested format is not available')
2934             # Process what we can, even without any available formats.
2935             formats_to_download = [{}]
2936
2937         requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2938         best_format, downloaded_formats = formats_to_download[-1], []
2939         if download:
2940             if best_format and requested_ranges:
2941                 def to_screen(*msg):
2942                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2943
2944                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2945                           (f['format_id'] for f in formats_to_download))
2946                 if requested_ranges != ({}, ):
2947                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2948                               (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2949             max_downloads_reached = False
2950
2951             for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2952                 new_info = self._copy_infodict(info_dict)
2953                 new_info.update(fmt)
2954                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2955                 end_time = offset + min(chapter.get('end_time', duration), duration)
2956                 # duration may not be accurate. So allow deviations <1sec
2957                 if end_time == float('inf') or end_time > offset + duration + 1:
2958                     end_time = None
2959                 if chapter or offset:
2960                     new_info.update({
2961                         'section_start': offset + chapter.get('start_time', 0),
2962                         'section_end': end_time,
2963                         'section_title': chapter.get('title'),
2964                         'section_number': chapter.get('index'),
2965                     })
2966                 downloaded_formats.append(new_info)
2967                 try:
2968                     self.process_info(new_info)
2969                 except MaxDownloadsReached:
2970                     max_downloads_reached = True
2971                 self._raise_pending_errors(new_info)
2972                 # Remove copied info
2973                 for key, val in tuple(new_info.items()):
2974                     if info_dict.get(key) == val:
2975                         new_info.pop(key)
2976                 if max_downloads_reached:
2977                     break
2978
2979             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2980             assert write_archive.issubset({True, False, 'ignore'})
2981             if True in write_archive and False not in write_archive:
2982                 self.record_download_archive(info_dict)
2983
2984             info_dict['requested_downloads'] = downloaded_formats
2985             info_dict = self.run_all_pps('after_video', info_dict)
2986             if max_downloads_reached:
2987                 raise MaxDownloadsReached()
2988
2989         # We update the info dict with the selected best quality format (backwards compatibility)
2990         info_dict.update(best_format)
2991         return info_dict
2992
2993     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2994         """Select the requested subtitles and their format"""
2995         available_subs, normal_sub_langs = {}, []
2996         if normal_subtitles and self.params.get('writesubtitles'):
2997             available_subs.update(normal_subtitles)
2998             normal_sub_langs = tuple(normal_subtitles.keys())
2999         if automatic_captions and self.params.get('writeautomaticsub'):
3000             for lang, cap_info in automatic_captions.items():
3001                 if lang not in available_subs:
3002                     available_subs[lang] = cap_info
3003
3004         if not available_subs or (
3005                 not self.params.get('writesubtitles')
3006                 and not self.params.get('writeautomaticsub')):
3007             return None
3008
3009         all_sub_langs = tuple(available_subs.keys())
3010         if self.params.get('allsubtitles', False):
3011             requested_langs = all_sub_langs
3012         elif self.params.get('subtitleslangs', False):
3013             try:
3014                 requested_langs = orderedSet_from_options(
3015                     self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
3016             except re.error as e:
3017                 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
3018         else:
3019             requested_langs = LazyList(itertools.chain(
3020                 ['en'] if 'en' in normal_sub_langs else [],
3021                 filter(lambda f: f.startswith('en'), normal_sub_langs),
3022                 ['en'] if 'en' in all_sub_langs else [],
3023                 filter(lambda f: f.startswith('en'), all_sub_langs),
3024                 normal_sub_langs, all_sub_langs,
3025             ))[:1]
3026         if requested_langs:
3027             self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
3028
3029         formats_query = self.params.get('subtitlesformat', 'best')
3030         formats_preference = formats_query.split('/') if formats_query else []
3031         subs = {}
3032         for lang in requested_langs:
3033             formats = available_subs.get(lang)
3034             if formats is None:
3035                 self.report_warning(f'{lang} subtitles not available for {video_id}')
3036                 continue
3037             for ext in formats_preference:
3038                 if ext == 'best':
3039                     f = formats[-1]
3040                     break
3041                 matches = list(filter(lambda f: f['ext'] == ext, formats))
3042                 if matches:
3043                     f = matches[-1]
3044                     break
3045             else:
3046                 f = formats[-1]
3047                 self.report_warning(
3048                     'No subtitle format found matching "%s" for language %s, '
3049                     'using %s' % (formats_query, lang, f['ext']))
3050             subs[lang] = f
3051         return subs
3052
3053     def _forceprint(self, key, info_dict):
3054         if info_dict is None:
3055             return
3056         info_copy = info_dict.copy()
3057         info_copy.setdefault('filename', self.prepare_filename(info_dict))
3058         if info_dict.get('requested_formats') is not None:
3059             # For RTMP URLs, also include the playpath
3060             info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3061         elif info_dict.get('url'):
3062             info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3063         info_copy['formats_table'] = self.render_formats_table(info_dict)
3064         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3065         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3066         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3067
3068         def format_tmpl(tmpl):
3069             mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3070             if not mobj:
3071                 return tmpl
3072
3073             fmt = '%({})s'
3074             if tmpl.startswith('{'):
3075                 tmpl, fmt = f'.{tmpl}', '%({})j'
3076             if tmpl.endswith('='):
3077                 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3078             return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3079
3080         for tmpl in self.params['forceprint'].get(key, []):
3081             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3082
3083         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3084             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3085             tmpl = format_tmpl(tmpl)
3086             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3087             if self._ensure_dir_exists(filename):
3088                 with open(filename, 'a', encoding='utf-8', newline='') as f:
3089                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3090
3091         return info_copy
3092
3093     def __forced_printings(self, info_dict, filename=None, incomplete=True):
3094         if (self.params.get('forcejson')
3095                 or self.params['forceprint'].get('video')
3096                 or self.params['print_to_file'].get('video')):
3097             self.post_extract(info_dict)
3098         if filename:
3099             info_dict['filename'] = filename
3100         info_copy = self._forceprint('video', info_dict)
3101
3102         def print_field(field, actual_field=None, optional=False):
3103             if actual_field is None:
3104                 actual_field = field
3105             if self.params.get(f'force{field}') and (
3106                     info_copy.get(field) is not None or (not optional and not incomplete)):
3107                 self.to_stdout(info_copy[actual_field])
3108
3109         print_field('title')
3110         print_field('id')
3111         print_field('url', 'urls')
3112         print_field('thumbnail', optional=True)
3113         print_field('description', optional=True)
3114         print_field('filename')
3115         if self.params.get('forceduration') and info_copy.get('duration') is not None:
3116             self.to_stdout(formatSeconds(info_copy['duration']))
3117         print_field('format')
3118
3119         if self.params.get('forcejson'):
3120             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3121
3122     def dl(self, name, info, subtitle=False, test=False):
3123         if not info.get('url'):
3124             self.raise_no_formats(info, True)
3125
3126         if test:
3127             verbose = self.params.get('verbose')
3128             params = {
3129                 'test': True,
3130                 'quiet': self.params.get('quiet') or not verbose,
3131                 'verbose': verbose,
3132                 'noprogress': not verbose,
3133                 'nopart': True,
3134                 'skip_unavailable_fragments': False,
3135                 'keep_fragments': False,
3136                 'overwrites': True,
3137                 '_no_ytdl_file': True,
3138             }
3139         else:
3140             params = self.params
3141         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3142         if not test:
3143             for ph in self._progress_hooks:
3144                 fd.add_progress_hook(ph)
3145             urls = '", "'.join(
3146                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3147                 for f in info.get('requested_formats', []) or [info])
3148             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3149
3150         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3151         # But it may contain objects that are not deep-copyable
3152         new_info = self._copy_infodict(info)
3153         if new_info.get('http_headers') is None:
3154             new_info['http_headers'] = self._calc_headers(new_info)
3155         return fd.download(name, new_info, subtitle)
3156
3157     def existing_file(self, filepaths, *, default_overwrite=True):
3158         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3159         if existing_files and not self.params.get('overwrites', default_overwrite):
3160             return existing_files[0]
3161
3162         for file in existing_files:
3163             self.report_file_delete(file)
3164             os.remove(file)
3165         return None
3166
3167     def process_info(self, info_dict):
3168         """Process a single resolved IE result. (Modifies it in-place)"""
3169
3170         assert info_dict.get('_type', 'video') == 'video'
3171         original_infodict = info_dict
3172
3173         if 'format' not in info_dict and 'ext' in info_dict:
3174             info_dict['format'] = info_dict['ext']
3175
3176         if self._match_entry(info_dict) is not None:
3177             info_dict['__write_download_archive'] = 'ignore'
3178             return
3179
3180         # Does nothing under normal operation - for backward compatibility of process_info
3181         self.post_extract(info_dict)
3182
3183         def replace_info_dict(new_info):
3184             nonlocal info_dict
3185             if new_info == info_dict:
3186                 return
3187             info_dict.clear()
3188             info_dict.update(new_info)
3189
3190         new_info, _ = self.pre_process(info_dict, 'video')
3191         replace_info_dict(new_info)
3192         self._num_downloads += 1
3193
3194         # info_dict['_filename'] needs to be set for backward compatibility
3195         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3196         temp_filename = self.prepare_filename(info_dict, 'temp')
3197         files_to_move = {}
3198
3199         # Forced printings
3200         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3201
3202         def check_max_downloads():
3203             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3204                 raise MaxDownloadsReached()
3205
3206         if self.params.get('simulate'):
3207             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3208             check_max_downloads()
3209             return
3210
3211         if full_filename is None:
3212             return
3213         if not self._ensure_dir_exists(encodeFilename(full_filename)):
3214             return
3215         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3216             return
3217
3218         if self._write_description('video', info_dict,
3219                                    self.prepare_filename(info_dict, 'description')) is None:
3220             return
3221
3222         sub_files = self._write_subtitles(info_dict, temp_filename)
3223         if sub_files is None:
3224             return
3225         files_to_move.update(dict(sub_files))
3226
3227         thumb_files = self._write_thumbnails(
3228             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3229         if thumb_files is None:
3230             return
3231         files_to_move.update(dict(thumb_files))
3232
3233         infofn = self.prepare_filename(info_dict, 'infojson')
3234         _infojson_written = self._write_info_json('video', info_dict, infofn)
3235         if _infojson_written:
3236             info_dict['infojson_filename'] = infofn
3237             # For backward compatibility, even though it was a private field
3238             info_dict['__infojson_filename'] = infofn
3239         elif _infojson_written is None:
3240             return
3241
3242         # Note: Annotations are deprecated
3243         annofn = None
3244         if self.params.get('writeannotations', False):
3245             annofn = self.prepare_filename(info_dict, 'annotation')
3246         if annofn:
3247             if not self._ensure_dir_exists(encodeFilename(annofn)):
3248                 return
3249             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3250                 self.to_screen('[info] Video annotations are already present')
3251             elif not info_dict.get('annotations'):
3252                 self.report_warning('There are no annotations to write.')
3253             else:
3254                 try:
3255                     self.to_screen('[info] Writing video annotations to: ' + annofn)
3256                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3257                         annofile.write(info_dict['annotations'])
3258                 except (KeyError, TypeError):
3259                     self.report_warning('There are no annotations to write.')
3260                 except OSError:
3261                     self.report_error('Cannot write annotations file: ' + annofn)
3262                     return
3263
3264         # Write internet shortcut files
3265         def _write_link_file(link_type):
3266             url = try_get(info_dict['webpage_url'], iri_to_uri)
3267             if not url:
3268                 self.report_warning(
3269                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3270                 return True
3271             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3272             if not self._ensure_dir_exists(encodeFilename(linkfn)):
3273                 return False
3274             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3275                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3276                 return True
3277             try:
3278                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3279                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3280                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3281                     template_vars = {'url': url}
3282                     if link_type == 'desktop':
3283                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3284                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3285             except OSError:
3286                 self.report_error(f'Cannot write internet shortcut {linkfn}')
3287                 return False
3288             return True
3289
3290         write_links = {
3291             'url': self.params.get('writeurllink'),
3292             'webloc': self.params.get('writewebloclink'),
3293             'desktop': self.params.get('writedesktoplink'),
3294         }
3295         if self.params.get('writelink'):
3296             link_type = ('webloc' if sys.platform == 'darwin'
3297                          else 'desktop' if sys.platform.startswith('linux')
3298                          else 'url')
3299             write_links[link_type] = True
3300
3301         if any(should_write and not _write_link_file(link_type)
3302                for link_type, should_write in write_links.items()):
3303             return
3304
3305         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3306         replace_info_dict(new_info)
3307
3308         if self.params.get('skip_download'):
3309             info_dict['filepath'] = temp_filename
3310             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3311             info_dict['__files_to_move'] = files_to_move
3312             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3313             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3314         else:
3315             # Download
3316             info_dict.setdefault('__postprocessors', [])
3317             try:
3318
3319                 def existing_video_file(*filepaths):
3320                     ext = info_dict.get('ext')
3321                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3322                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3323                                               default_overwrite=False)
3324                     if file:
3325                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3326                     return file
3327
3328                 fd, success = None, True
3329                 if info_dict.get('protocol') or info_dict.get('url'):
3330                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3331                     if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3332                             info_dict.get('section_start') or info_dict.get('section_end')):
3333                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3334                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
3335                         self.report_error(f'{msg}. Aborting')
3336                         return
3337
3338                 if info_dict.get('requested_formats') is not None:
3339                     old_ext = info_dict['ext']
3340                     if self.params.get('merge_output_format') is None:
3341                         if (info_dict['ext'] == 'webm'
3342                                 and info_dict.get('thumbnails')
3343                                 # check with type instead of pp_key, __name__, or isinstance
3344                                 # since we dont want any custom PPs to trigger this
3345                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3346                             info_dict['ext'] = 'mkv'
3347                             self.report_warning(
3348                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3349                     new_ext = info_dict['ext']
3350
3351                     def correct_ext(filename, ext=new_ext):
3352                         if filename == '-':
3353                             return filename
3354                         filename_real_ext = os.path.splitext(filename)[1][1:]
3355                         filename_wo_ext = (
3356                             os.path.splitext(filename)[0]
3357                             if filename_real_ext in (old_ext, new_ext)
3358                             else filename)
3359                         return f'{filename_wo_ext}.{ext}'
3360
3361                     # Ensure filename always has a correct extension for successful merge
3362                     full_filename = correct_ext(full_filename)
3363                     temp_filename = correct_ext(temp_filename)
3364                     dl_filename = existing_video_file(full_filename, temp_filename)
3365
3366                     info_dict['__real_download'] = False
3367                     # NOTE: Copy so that original format dicts are not modified
3368                     info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3369
3370                     merger = FFmpegMergerPP(self)
3371                     downloaded = []
3372                     if dl_filename is not None:
3373                         self.report_file_already_downloaded(dl_filename)
3374                     elif fd:
3375                         for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3376                             f['filepath'] = fname = prepend_extension(
3377                                 correct_ext(temp_filename, info_dict['ext']),
3378                                 'f%s' % f['format_id'], info_dict['ext'])
3379                             downloaded.append(fname)
3380                         info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3381                         success, real_download = self.dl(temp_filename, info_dict)
3382                         info_dict['__real_download'] = real_download
3383                     else:
3384                         if self.params.get('allow_unplayable_formats'):
3385                             self.report_warning(
3386                                 'You have requested merging of multiple formats '
3387                                 'while also allowing unplayable formats to be downloaded. '
3388                                 'The formats won\'t be merged to prevent data corruption.')
3389                         elif not merger.available:
3390                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3391                             if not self.params.get('ignoreerrors'):
3392                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3393                                 return
3394                             self.report_warning(f'{msg}. The formats won\'t be merged')
3395
3396                         if temp_filename == '-':
3397                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3398                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3399                                       else 'but ffmpeg is not installed')
3400                             self.report_warning(
3401                                 f'You have requested downloading multiple formats to stdout {reason}. '
3402                                 'The formats will be streamed one after the other')
3403                             fname = temp_filename
3404                         for f in info_dict['requested_formats']:
3405                             new_info = dict(info_dict)
3406                             del new_info['requested_formats']
3407                             new_info.update(f)
3408                             if temp_filename != '-':
3409                                 fname = prepend_extension(
3410                                     correct_ext(temp_filename, new_info['ext']),
3411                                     'f%s' % f['format_id'], new_info['ext'])
3412                                 if not self._ensure_dir_exists(fname):
3413                                     return
3414                                 f['filepath'] = fname
3415                                 downloaded.append(fname)
3416                             partial_success, real_download = self.dl(fname, new_info)
3417                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3418                             success = success and partial_success
3419
3420                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3421                         info_dict['__postprocessors'].append(merger)
3422                         info_dict['__files_to_merge'] = downloaded
3423                         # Even if there were no downloads, it is being merged only now
3424                         info_dict['__real_download'] = True
3425                     else:
3426                         for file in downloaded:
3427                             files_to_move[file] = None
3428                 else:
3429                     # Just a single file
3430                     dl_filename = existing_video_file(full_filename, temp_filename)
3431                     if dl_filename is None or dl_filename == temp_filename:
3432                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3433                         # So we should try to resume the download
3434                         success, real_download = self.dl(temp_filename, info_dict)
3435                         info_dict['__real_download'] = real_download
3436                     else:
3437                         self.report_file_already_downloaded(dl_filename)
3438
3439                 dl_filename = dl_filename or temp_filename
3440                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3441
3442             except network_exceptions as err:
3443                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3444                 return
3445             except OSError as err:
3446                 raise UnavailableVideoError(err)
3447             except (ContentTooShortError, ) as err:
3448                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3449                 return
3450
3451             self._raise_pending_errors(info_dict)
3452             if success and full_filename != '-':
3453
3454                 def fixup():
3455                     do_fixup = True
3456                     fixup_policy = self.params.get('fixup')
3457                     vid = info_dict['id']
3458
3459                     if fixup_policy in ('ignore', 'never'):
3460                         return
3461                     elif fixup_policy == 'warn':
3462                         do_fixup = 'warn'
3463                     elif fixup_policy != 'force':
3464                         assert fixup_policy in ('detect_or_warn', None)
3465                         if not info_dict.get('__real_download'):
3466                             do_fixup = False
3467
3468                     def ffmpeg_fixup(cndn, msg, cls):
3469                         if not (do_fixup and cndn):
3470                             return
3471                         elif do_fixup == 'warn':
3472                             self.report_warning(f'{vid}: {msg}')
3473                             return
3474                         pp = cls(self)
3475                         if pp.available:
3476                             info_dict['__postprocessors'].append(pp)
3477                         else:
3478                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3479
3480                     stretched_ratio = info_dict.get('stretched_ratio')
3481                     ffmpeg_fixup(stretched_ratio not in (1, None),
3482                                  f'Non-uniform pixel ratio {stretched_ratio}',
3483                                  FFmpegFixupStretchedPP)
3484
3485                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3486                     downloader = downloader.FD_NAME if downloader else None
3487
3488                     ext = info_dict.get('ext')
3489                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3490                         isinstance(pp, FFmpegVideoConvertorPP)
3491                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3492                     ) for pp in self._pps['post_process'])
3493
3494                     if not postprocessed_by_ffmpeg:
3495                         ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
3496                                      and info_dict.get('container') == 'm4a_dash',
3497                                      'writing DASH m4a. Only some players support this container',
3498                                      FFmpegFixupM4aPP)
3499                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3500                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3501                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3502                                      FFmpegFixupM3u8PP)
3503                         ffmpeg_fixup(downloader == 'dashsegments'
3504                                      and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
3505                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3506
3507                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3508                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3509
3510                 fixup()
3511                 try:
3512                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3513                 except PostProcessingError as err:
3514                     self.report_error('Postprocessing: %s' % str(err))
3515                     return
3516                 try:
3517                     for ph in self._post_hooks:
3518                         ph(info_dict['filepath'])
3519                 except Exception as err:
3520                     self.report_error('post hooks: %s' % str(err))
3521                     return
3522                 info_dict['__write_download_archive'] = True
3523
3524         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3525         if self.params.get('force_write_download_archive'):
3526             info_dict['__write_download_archive'] = True
3527         check_max_downloads()
3528
3529     def __download_wrapper(self, func):
3530         @functools.wraps(func)
3531         def wrapper(*args, **kwargs):
3532             try:
3533                 res = func(*args, **kwargs)
3534             except UnavailableVideoError as e:
3535                 self.report_error(e)
3536             except DownloadCancelled as e:
3537                 self.to_screen(f'[info] {e}')
3538                 if not self.params.get('break_per_url'):
3539                     raise
3540                 self._num_downloads = 0
3541             else:
3542                 if self.params.get('dump_single_json', False):
3543                     self.post_extract(res)
3544                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3545         return wrapper
3546
3547     def download(self, url_list):
3548         """Download a given list of URLs."""
3549         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3550         outtmpl = self.params['outtmpl']['default']
3551         if (len(url_list) > 1
3552                 and outtmpl != '-'
3553                 and '%' not in outtmpl
3554                 and self.params.get('max_downloads') != 1):
3555             raise SameFileError(outtmpl)
3556
3557         for url in url_list:
3558             self.__download_wrapper(self.extract_info)(
3559                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3560
3561         return self._download_retcode
3562
3563     def download_with_info_file(self, info_filename):
3564         with contextlib.closing(fileinput.FileInput(
3565                 [info_filename], mode='r',
3566                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3567             # FileInput doesn't have a read method, we can't call json.load
3568             infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3569                      for info in variadic(json.loads('\n'.join(f)))]
3570         for info in infos:
3571             try:
3572                 self.__download_wrapper(self.process_ie_result)(info, download=True)
3573             except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3574                 if not isinstance(e, EntryNotInPlaylist):
3575                     self.to_stderr('\r')
3576                 webpage_url = info.get('webpage_url')
3577                 if webpage_url is None:
3578                     raise
3579                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3580                 self.download([webpage_url])
3581             except ExtractorError as e:
3582                 self.report_error(e)
3583         return self._download_retcode
3584
3585     @staticmethod
3586     def sanitize_info(info_dict, remove_private_keys=False):
3587         ''' Sanitize the infodict for converting to json '''
3588         if info_dict is None:
3589             return info_dict
3590         info_dict.setdefault('epoch', int(time.time()))
3591         info_dict.setdefault('_type', 'video')
3592         info_dict.setdefault('_version', {
3593             'version': __version__,
3594             'current_git_head': current_git_head(),
3595             'release_git_head': RELEASE_GIT_HEAD,
3596             'repository': ORIGIN,
3597         })
3598
3599         if remove_private_keys:
3600             reject = lambda k, v: v is None or k.startswith('__') or k in {
3601                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3602                 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3603                 'playlist_autonumber',
3604             }
3605         else:
3606             reject = lambda k, v: False
3607
3608         def filter_fn(obj):
3609             if isinstance(obj, dict):
3610                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3611             elif isinstance(obj, (list, tuple, set, LazyList)):
3612                 return list(map(filter_fn, obj))
3613             elif obj is None or isinstance(obj, (str, int, float, bool)):
3614                 return obj
3615             else:
3616                 return repr(obj)
3617
3618         return filter_fn(info_dict)
3619
3620     @staticmethod
3621     def filter_requested_info(info_dict, actually_filter=True):
3622         ''' Alias of sanitize_info for backward compatibility '''
3623         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3624
3625     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3626         for filename in set(filter(None, files_to_delete)):
3627             if msg:
3628                 self.to_screen(msg % filename)
3629             try:
3630                 os.remove(filename)
3631             except OSError:
3632                 self.report_warning(f'Unable to delete file {filename}')
3633             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3634                 del info['__files_to_move'][filename]
3635
3636     @staticmethod
3637     def post_extract(info_dict):
3638         def actual_post_extract(info_dict):
3639             if info_dict.get('_type') in ('playlist', 'multi_video'):
3640                 for video_dict in info_dict.get('entries', {}):
3641                     actual_post_extract(video_dict or {})
3642                 return
3643
3644             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3645             info_dict.update(post_extractor())
3646
3647         actual_post_extract(info_dict or {})
3648
3649     def run_pp(self, pp, infodict):
3650         files_to_delete = []
3651         if '__files_to_move' not in infodict:
3652             infodict['__files_to_move'] = {}
3653         try:
3654             files_to_delete, infodict = pp.run(infodict)
3655         except PostProcessingError as e:
3656             # Must be True and not 'only_download'
3657             if self.params.get('ignoreerrors') is True:
3658                 self.report_error(e)
3659                 return infodict
3660             raise
3661
3662         if not files_to_delete:
3663             return infodict
3664         if self.params.get('keepvideo', False):
3665             for f in files_to_delete:
3666                 infodict['__files_to_move'].setdefault(f, '')
3667         else:
3668             self._delete_downloaded_files(
3669                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3670         return infodict
3671
3672     def run_all_pps(self, key, info, *, additional_pps=None):
3673         if key != 'video':
3674             self._forceprint(key, info)
3675         for pp in (additional_pps or []) + self._pps[key]:
3676             info = self.run_pp(pp, info)
3677         return info
3678
3679     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3680         info = dict(ie_info)
3681         info['__files_to_move'] = files_to_move or {}
3682         try:
3683             info = self.run_all_pps(key, info)
3684         except PostProcessingError as err:
3685             msg = f'Preprocessing: {err}'
3686             info.setdefault('__pending_error', msg)
3687             self.report_error(msg, is_error=False)
3688         return info, info.pop('__files_to_move', None)
3689
3690     def post_process(self, filename, info, files_to_move=None):
3691         """Run all the postprocessors on the given file."""
3692         info['filepath'] = filename
3693         info['__files_to_move'] = files_to_move or {}
3694         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3695         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3696         del info['__files_to_move']
3697         return self.run_all_pps('after_move', info)
3698
3699     def _make_archive_id(self, info_dict):
3700         video_id = info_dict.get('id')
3701         if not video_id:
3702             return
3703         # Future-proof against any change in case
3704         # and backwards compatibility with prior versions
3705         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3706         if extractor is None:
3707             url = str_or_none(info_dict.get('url'))
3708             if not url:
3709                 return
3710             # Try to find matching extractor for the URL and take its ie_key
3711             for ie_key, ie in self._ies.items():
3712                 if ie.suitable(url):
3713                     extractor = ie_key
3714                     break
3715             else:
3716                 return
3717         return make_archive_id(extractor, video_id)
3718
3719     def in_download_archive(self, info_dict):
3720         if not self.archive:
3721             return False
3722
3723         vid_ids = [self._make_archive_id(info_dict)]
3724         vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3725         return any(id_ in self.archive for id_ in vid_ids)
3726
3727     def record_download_archive(self, info_dict):
3728         fn = self.params.get('download_archive')
3729         if fn is None:
3730             return
3731         vid_id = self._make_archive_id(info_dict)
3732         assert vid_id
3733
3734         self.write_debug(f'Adding to archive: {vid_id}')
3735         if is_path_like(fn):
3736             with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3737                 archive_file.write(vid_id + '\n')
3738         self.archive.add(vid_id)
3739
3740     @staticmethod
3741     def format_resolution(format, default='unknown'):
3742         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3743             return 'audio only'
3744         if format.get('resolution') is not None:
3745             return format['resolution']
3746         if format.get('width') and format.get('height'):
3747             return '%dx%d' % (format['width'], format['height'])
3748         elif format.get('height'):
3749             return '%sp' % format['height']
3750         elif format.get('width'):
3751             return '%dx?' % format['width']
3752         return default
3753
3754     def _list_format_headers(self, *headers):
3755         if self.params.get('listformats_table', True) is not False:
3756             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3757         return headers
3758
3759     def _format_note(self, fdict):
3760         res = ''
3761         if fdict.get('ext') in ['f4f', 'f4m']:
3762             res += '(unsupported)'
3763         if fdict.get('language'):
3764             if res:
3765                 res += ' '
3766             res += '[%s]' % fdict['language']
3767         if fdict.get('format_note') is not None:
3768             if res:
3769                 res += ' '
3770             res += fdict['format_note']
3771         if fdict.get('tbr') is not None:
3772             if res:
3773                 res += ', '
3774             res += '%4dk' % fdict['tbr']
3775         if fdict.get('container') is not None:
3776             if res:
3777                 res += ', '
3778             res += '%s container' % fdict['container']
3779         if (fdict.get('vcodec') is not None
3780                 and fdict.get('vcodec') != 'none'):
3781             if res:
3782                 res += ', '
3783             res += fdict['vcodec']
3784             if fdict.get('vbr') is not None:
3785                 res += '@'
3786         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3787             res += 'video@'
3788         if fdict.get('vbr') is not None:
3789             res += '%4dk' % fdict['vbr']
3790         if fdict.get('fps') is not None:
3791             if res:
3792                 res += ', '
3793             res += '%sfps' % fdict['fps']
3794         if fdict.get('acodec') is not None:
3795             if res:
3796                 res += ', '
3797             if fdict['acodec'] == 'none':
3798                 res += 'video only'
3799             else:
3800                 res += '%-5s' % fdict['acodec']
3801         elif fdict.get('abr') is not None:
3802             if res:
3803                 res += ', '
3804             res += 'audio'
3805         if fdict.get('abr') is not None:
3806             res += '@%3dk' % fdict['abr']
3807         if fdict.get('asr') is not None:
3808             res += ' (%5dHz)' % fdict['asr']
3809         if fdict.get('filesize') is not None:
3810             if res:
3811                 res += ', '
3812             res += format_bytes(fdict['filesize'])
3813         elif fdict.get('filesize_approx') is not None:
3814             if res:
3815                 res += ', '
3816             res += '~' + format_bytes(fdict['filesize_approx'])
3817         return res
3818
3819     def _get_formats(self, info_dict):
3820         if info_dict.get('formats') is None:
3821             if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3822                 return [info_dict]
3823             return []
3824         return info_dict['formats']
3825
3826     def render_formats_table(self, info_dict):
3827         formats = self._get_formats(info_dict)
3828         if not formats:
3829             return
3830         if not self.params.get('listformats_table', True) is not False:
3831             table = [
3832                 [
3833                     format_field(f, 'format_id'),
3834                     format_field(f, 'ext'),
3835                     self.format_resolution(f),
3836                     self._format_note(f)
3837                 ] for f in formats if (f.get('preference') or 0) >= -1000]
3838             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3839
3840         def simplified_codec(f, field):
3841             assert field in ('acodec', 'vcodec')
3842             codec = f.get(field)
3843             if not codec:
3844                 return 'unknown'
3845             elif codec != 'none':
3846                 return '.'.join(codec.split('.')[:4])
3847
3848             if field == 'vcodec' and f.get('acodec') == 'none':
3849                 return 'images'
3850             elif field == 'acodec' and f.get('vcodec') == 'none':
3851                 return ''
3852             return self._format_out('audio only' if field == 'vcodec' else 'video only',
3853                                     self.Styles.SUPPRESS)
3854
3855         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3856         table = [
3857             [
3858                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3859                 format_field(f, 'ext'),
3860                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3861                 format_field(f, 'fps', '\t%d', func=round),
3862                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3863                 format_field(f, 'audio_channels', '\t%s'),
3864                 delim, (
3865                     format_field(f, 'filesize', ' \t%s', func=format_bytes)
3866                     or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3867                     or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3868                                     None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
3869                 format_field(f, 'tbr', '\t%dk', func=round),
3870                 shorten_protocol_name(f.get('protocol', '')),
3871                 delim,
3872                 simplified_codec(f, 'vcodec'),
3873                 format_field(f, 'vbr', '\t%dk', func=round),
3874                 simplified_codec(f, 'acodec'),
3875                 format_field(f, 'abr', '\t%dk', func=round),
3876                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3877                 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3878                     self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3879                     (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3880                      else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3881                     format_field(f, 'format_note'),
3882                     format_field(f, 'container', ignore=(None, f.get('ext'))),
3883                     delim=', '), delim=' '),
3884             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3885         header_line = self._list_format_headers(
3886             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3887             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3888
3889         return render_table(
3890             header_line, table, hide_empty=True,
3891             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3892
3893     def render_thumbnails_table(self, info_dict):
3894         thumbnails = list(info_dict.get('thumbnails') or [])
3895         if not thumbnails:
3896             return None
3897         return render_table(
3898             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3899             [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3900
3901     def render_subtitles_table(self, video_id, subtitles):
3902         def _row(lang, formats):
3903             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3904             if len(set(names)) == 1:
3905                 names = [] if names[0] == 'unknown' else names[:1]
3906             return [lang, ', '.join(names), ', '.join(exts)]
3907
3908         if not subtitles:
3909             return None
3910         return render_table(
3911             self._list_format_headers('Language', 'Name', 'Formats'),
3912             [_row(lang, formats) for lang, formats in subtitles.items()],
3913             hide_empty=True)
3914
3915     def __list_table(self, video_id, name, func, *args):
3916         table = func(*args)
3917         if not table:
3918             self.to_screen(f'{video_id} has no {name}')
3919             return
3920         self.to_screen(f'[info] Available {name} for {video_id}:')
3921         self.to_stdout(table)
3922
3923     def list_formats(self, info_dict):
3924         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3925
3926     def list_thumbnails(self, info_dict):
3927         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3928
3929     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3930         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3931
3932     def print_debug_header(self):
3933         if not self.params.get('verbose'):
3934             return
3935
3936         from . import _IN_CLI  # Must be delayed import
3937
3938         # These imports can be slow. So import them only as needed
3939         from .extractor.extractors import _LAZY_LOADER
3940         from .extractor.extractors import (
3941             _PLUGIN_CLASSES as plugin_ies,
3942             _PLUGIN_OVERRIDES as plugin_ie_overrides
3943         )
3944
3945         def get_encoding(stream):
3946             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3947             additional_info = []
3948             if os.environ.get('TERM', '').lower() == 'dumb':
3949                 additional_info.append('dumb')
3950             if not supports_terminal_sequences(stream):
3951                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3952                 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3953             if additional_info:
3954                 ret = f'{ret} ({",".join(additional_info)})'
3955             return ret
3956
3957         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3958             locale.getpreferredencoding(),
3959             sys.getfilesystemencoding(),
3960             self.get_encoding(),
3961             ', '.join(
3962                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3963                 if stream is not None and key != 'console')
3964         )
3965
3966         logger = self.params.get('logger')
3967         if logger:
3968             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3969             write_debug(encoding_str)
3970         else:
3971             write_string(f'[debug] {encoding_str}\n', encoding=None)
3972             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3973
3974         source = detect_variant()
3975         if VARIANT not in (None, 'pip'):
3976             source += '*'
3977         klass = type(self)
3978         write_debug(join_nonempty(
3979             f'{REPOSITORY.rpartition("/")[2]} version',
3980             _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
3981             f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3982             '' if source == 'unknown' else f'({source})',
3983             '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3984             delim=' '))
3985
3986         if not _IN_CLI:
3987             write_debug(f'params: {self.params}')
3988
3989         if not _LAZY_LOADER:
3990             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3991                 write_debug('Lazy loading extractors is forcibly disabled')
3992             else:
3993                 write_debug('Lazy loading extractors is disabled')
3994         if self.params['compat_opts']:
3995             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3996
3997         if current_git_head():
3998             write_debug(f'Git HEAD: {current_git_head()}')
3999         write_debug(system_identifier())
4000
4001         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
4002         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
4003         if ffmpeg_features:
4004             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
4005
4006         exe_versions['rtmpdump'] = rtmpdump_version()
4007         exe_versions['phantomjs'] = PhantomJSwrapper._version()
4008         exe_str = ', '.join(
4009             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
4010         ) or 'none'
4011         write_debug('exe versions: %s' % exe_str)
4012
4013         from .compat.compat_utils import get_package_info
4014         from .dependencies import available_dependencies
4015
4016         write_debug('Optional libraries: %s' % (', '.join(sorted({
4017             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
4018         })) or 'none'))
4019
4020         write_debug(f'Proxy map: {self.proxies}')
4021         write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
4022         for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
4023             display_list = ['%s%s' % (
4024                 klass.__name__, '' if klass.__name__ == name else f' as {name}')
4025                 for name, klass in plugins.items()]
4026             if plugin_type == 'Extractor':
4027                 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
4028                                     for parent, plugins in plugin_ie_overrides.items())
4029             if not display_list:
4030                 continue
4031             write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
4032
4033         plugin_dirs = plugin_directories()
4034         if plugin_dirs:
4035             write_debug(f'Plugin directories: {plugin_dirs}')
4036
4037         # Not implemented
4038         if False and self.params.get('call_home'):
4039             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
4040             write_debug('Public IP address: %s' % ipaddr)
4041             latest_version = self.urlopen(
4042                 'https://yt-dl.org/latest/version').read().decode()
4043             if version_tuple(latest_version) > version_tuple(__version__):
4044                 self.report_warning(
4045                     'You are using an outdated version (newest version: %s)! '
4046                     'See https://yt-dl.org/update if you need help updating.' %
4047                     latest_version)
4048
4049     @functools.cached_property
4050     def proxies(self):
4051         """Global proxy configuration"""
4052         opts_proxy = self.params.get('proxy')
4053         if opts_proxy is not None:
4054             if opts_proxy == '':
4055                 opts_proxy = '__noproxy__'
4056             proxies = {'all': opts_proxy}
4057         else:
4058             proxies = urllib.request.getproxies()
4059             # compat. Set HTTPS_PROXY to __noproxy__ to revert
4060             if 'http' in proxies and 'https' not in proxies:
4061                 proxies['https'] = proxies['http']
4062
4063         return proxies
4064
4065     @functools.cached_property
4066     def cookiejar(self):
4067         """Global cookiejar instance"""
4068         return load_cookies(
4069             self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4070
4071     @property
4072     def _opener(self):
4073         """
4074         Get a urllib OpenerDirector from the Urllib handler (deprecated).
4075         """
4076         self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4077         handler = self._request_director.handlers['Urllib']
4078         return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4079
4080     def urlopen(self, req):
4081         """ Start an HTTP download """
4082         if isinstance(req, str):
4083             req = Request(req)
4084         elif isinstance(req, urllib.request.Request):
4085             self.deprecation_warning(
4086                 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4087                 'Use yt_dlp.networking.common.Request instead.')
4088             req = urllib_req_to_req(req)
4089         assert isinstance(req, Request)
4090
4091         # compat: Assume user:pass url params are basic auth
4092         url, basic_auth_header = extract_basic_auth(req.url)
4093         if basic_auth_header:
4094             req.headers['Authorization'] = basic_auth_header
4095         req.url = sanitize_url(url)
4096
4097         clean_proxies(proxies=req.proxies, headers=req.headers)
4098         clean_headers(req.headers)
4099
4100         try:
4101             return self._request_director.send(req)
4102         except NoSupportingHandlers as e:
4103             for ue in e.unsupported_errors:
4104                 # FIXME: This depends on the order of errors.
4105                 if not (ue.handler and ue.msg):
4106                     continue
4107                 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4108                     raise RequestError(
4109                         'file:// URLs are disabled by default in yt-dlp for security reasons. '
4110                         'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4111                 if 'unsupported proxy type: "https"' in ue.msg.lower():
4112                     raise RequestError(
4113                         'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
4114
4115                 elif (
4116                     re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
4117                     and 'websockets' not in self._request_director.handlers
4118                 ):
4119                     raise RequestError(
4120                         'This request requires WebSocket support. '
4121                         'Ensure one of the following dependencies are installed: websockets',
4122                         cause=ue) from ue
4123             raise
4124         except SSLError as e:
4125             if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4126                 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4127             elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4128                 raise RequestError(
4129                     'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4130                     'Try using --legacy-server-connect', cause=e) from e
4131             raise
4132
4133     def build_request_director(self, handlers, preferences=None):
4134         logger = _YDLLogger(self)
4135         headers = self.params['http_headers'].copy()
4136         proxies = self.proxies.copy()
4137         clean_headers(headers)
4138         clean_proxies(proxies, headers)
4139
4140         director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4141         for handler in handlers:
4142             director.add_handler(handler(
4143                 logger=logger,
4144                 headers=headers,
4145                 cookiejar=self.cookiejar,
4146                 proxies=proxies,
4147                 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4148                 verify=not self.params.get('nocheckcertificate'),
4149                 **traverse_obj(self.params, {
4150                     'verbose': 'debug_printtraffic',
4151                     'source_address': 'source_address',
4152                     'timeout': 'socket_timeout',
4153                     'legacy_ssl_support': 'legacyserverconnect',
4154                     'enable_file_urls': 'enable_file_urls',
4155                     'client_cert': {
4156                         'client_certificate': 'client_certificate',
4157                         'client_certificate_key': 'client_certificate_key',
4158                         'client_certificate_password': 'client_certificate_password',
4159                     },
4160                 }),
4161             ))
4162         director.preferences.update(preferences or [])
4163         if 'prefer-legacy-http-handler' in self.params['compat_opts']:
4164             director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
4165         return director
4166
4167     @functools.cached_property
4168     def _request_director(self):
4169         return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
4170
4171     def encode(self, s):
4172         if isinstance(s, bytes):
4173             return s  # Already encoded
4174
4175         try:
4176             return s.encode(self.get_encoding())
4177         except UnicodeEncodeError as err:
4178             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4179             raise
4180
4181     def get_encoding(self):
4182         encoding = self.params.get('encoding')
4183         if encoding is None:
4184             encoding = preferredencoding()
4185         return encoding
4186
4187     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4188         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4189         if overwrite is None:
4190             overwrite = self.params.get('overwrites', True)
4191         if not self.params.get('writeinfojson'):
4192             return False
4193         elif not infofn:
4194             self.write_debug(f'Skipping writing {label} infojson')
4195             return False
4196         elif not self._ensure_dir_exists(infofn):
4197             return None
4198         elif not overwrite and os.path.exists(infofn):
4199             self.to_screen(f'[info] {label.title()} metadata is already present')
4200             return 'exists'
4201
4202         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4203         try:
4204             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4205             return True
4206         except OSError:
4207             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4208             return None
4209
4210     def _write_description(self, label, ie_result, descfn):
4211         ''' Write description and returns True = written, False = skip, None = error '''
4212         if not self.params.get('writedescription'):
4213             return False
4214         elif not descfn:
4215             self.write_debug(f'Skipping writing {label} description')
4216             return False
4217         elif not self._ensure_dir_exists(descfn):
4218             return None
4219         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4220             self.to_screen(f'[info] {label.title()} description is already present')
4221         elif ie_result.get('description') is None:
4222             self.to_screen(f'[info] There\'s no {label} description to write')
4223             return False
4224         else:
4225             try:
4226                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4227                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4228                     descfile.write(ie_result['description'])
4229             except OSError:
4230                 self.report_error(f'Cannot write {label} description file {descfn}')
4231                 return None
4232         return True
4233
4234     def _write_subtitles(self, info_dict, filename):
4235         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4236         ret = []
4237         subtitles = info_dict.get('requested_subtitles')
4238         if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4239             # subtitles download errors are already managed as troubles in relevant IE
4240             # that way it will silently go on when used with unsupporting IE
4241             return ret
4242         elif not subtitles:
4243             self.to_screen('[info] There are no subtitles for the requested languages')
4244             return ret
4245         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4246         if not sub_filename_base:
4247             self.to_screen('[info] Skipping writing video subtitles')
4248             return ret
4249
4250         for sub_lang, sub_info in subtitles.items():
4251             sub_format = sub_info['ext']
4252             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4253             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4254             existing_sub = self.existing_file((sub_filename_final, sub_filename))
4255             if existing_sub:
4256                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4257                 sub_info['filepath'] = existing_sub
4258                 ret.append((existing_sub, sub_filename_final))
4259                 continue
4260
4261             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4262             if sub_info.get('data') is not None:
4263                 try:
4264                     # Use newline='' to prevent conversion of newline characters
4265                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
4266                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4267                         subfile.write(sub_info['data'])
4268                     sub_info['filepath'] = sub_filename
4269                     ret.append((sub_filename, sub_filename_final))
4270                     continue
4271                 except OSError:
4272                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
4273                     return None
4274
4275             try:
4276                 sub_copy = sub_info.copy()
4277                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4278                 self.dl(sub_filename, sub_copy, subtitle=True)
4279                 sub_info['filepath'] = sub_filename
4280                 ret.append((sub_filename, sub_filename_final))
4281             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4282                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4283                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
4284                     if not self.params.get('ignoreerrors'):
4285                         self.report_error(msg)
4286                     raise DownloadError(msg)
4287                 self.report_warning(msg)
4288         return ret
4289
4290     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4291         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
4292         write_all = self.params.get('write_all_thumbnails', False)
4293         thumbnails, ret = [], []
4294         if write_all or self.params.get('writethumbnail', False):
4295             thumbnails = info_dict.get('thumbnails') or []
4296             if not thumbnails:
4297                 self.to_screen(f'[info] There are no {label} thumbnails to download')
4298                 return ret
4299         multiple = write_all and len(thumbnails) > 1
4300
4301         if thumb_filename_base is None:
4302             thumb_filename_base = filename
4303         if thumbnails and not thumb_filename_base:
4304             self.write_debug(f'Skipping writing {label} thumbnail')
4305             return ret
4306
4307         if thumbnails and not self._ensure_dir_exists(filename):
4308             return None
4309
4310         for idx, t in list(enumerate(thumbnails))[::-1]:
4311             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4312             thumb_display_id = f'{label} thumbnail {t["id"]}'
4313             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4314             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4315
4316             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4317             if existing_thumb:
4318                 self.to_screen('[info] %s is already present' % (
4319                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4320                 t['filepath'] = existing_thumb
4321                 ret.append((existing_thumb, thumb_filename_final))
4322             else:
4323                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4324                 try:
4325                     uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4326                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4327                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4328                         shutil.copyfileobj(uf, thumbf)
4329                     ret.append((thumb_filename, thumb_filename_final))
4330                     t['filepath'] = thumb_filename
4331                 except network_exceptions as err:
4332                     if isinstance(err, HTTPError) and err.status == 404:
4333                         self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4334                     else:
4335                         self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4336                     thumbnails.pop(idx)
4337             if ret and not write_all:
4338                 break
4339         return ret