yt_dlp/YoutubeDL.py

   1 import collections
   2 import contextlib
   3 import copy
   4 import datetime
   5 import errno
   6 import fileinput
   7 import http.cookiejar
   8 import io
   9 import itertools
  10 import json
  11 import locale
  12 import operator
  13 import os
  14 import random
  15 import re
  16 import shutil
  17 import string
  18 import subprocess
  19 import sys
  20 import tempfile
  21 import time
  22 import tokenize
  23 import traceback
  24 import unicodedata
  25
  26 from .cache import Cache
  27 from .compat import functools, urllib  # isort: split
  28 from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
  29 from .cookies import LenientSimpleCookie, load_cookies
  30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  31 from .downloader.rtmp import rtmpdump_version
  32 from .extractor import gen_extractor_classes, get_info_extractor
  33 from .extractor.common import UnsupportedURLIE
  34 from .extractor.openload import PhantomJSwrapper
  35 from .minicurses import format_text
  36 from .networking import HEADRequest, Request, RequestDirector
  37 from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
  38 from .networking.exceptions import (
  39     HTTPError,
  40     NoSupportingHandlers,
  41     RequestError,
  42     SSLError,
  43     _CompatHTTPError,
  44     network_exceptions,
  45 )
  46 from .plugins import directories as plugin_directories
  47 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
  48 from .postprocessor import (
  49     EmbedThumbnailPP,
  50     FFmpegFixupDuplicateMoovPP,
  51     FFmpegFixupDurationPP,
  52     FFmpegFixupM3u8PP,
  53     FFmpegFixupM4aPP,
  54     FFmpegFixupStretchedPP,
  55     FFmpegFixupTimestampPP,
  56     FFmpegMergerPP,
  57     FFmpegPostProcessor,
  58     FFmpegVideoConvertorPP,
  59     MoveFilesAfterDownloadPP,
  60     get_postprocessor,
  61 )
  62 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
  63 from .update import REPOSITORY, _get_system_deprecation, _make_label, current_git_head, detect_variant
  64 from .utils import (
  65     DEFAULT_OUTTMPL,
  66     IDENTITY,
  67     LINK_TEMPLATES,
  68     MEDIA_EXTENSIONS,
  69     NO_DEFAULT,
  70     NUMBER_RE,
  71     OUTTMPL_TYPES,
  72     POSTPROCESS_WHEN,
  73     STR_FORMAT_RE_TMPL,
  74     STR_FORMAT_TYPES,
  75     ContentTooShortError,
  76     DateRange,
  77     DownloadCancelled,
  78     DownloadError,
  79     EntryNotInPlaylist,
  80     ExistingVideoReached,
  81     ExtractorError,
  82     FormatSorter,
  83     GeoRestrictedError,
  84     ISO3166Utils,
  85     LazyList,
  86     MaxDownloadsReached,
  87     Namespace,
  88     PagedList,
  89     PlaylistEntries,
  90     Popen,
  91     PostProcessingError,
  92     ReExtractInfo,
  93     RejectedVideoReached,
  94     SameFileError,
  95     UnavailableVideoError,
  96     UserNotLive,
  97     age_restricted,
  98     args_to_str,
  99     bug_reports_message,
 100     date_from_str,
 101     deprecation_warning,
 102     determine_ext,
 103     determine_protocol,
 104     encode_compat_str,
 105     encodeFilename,
 106     error_to_compat_str,
 107     escapeHTML,
 108     expand_path,
 109     extract_basic_auth,
 110     filter_dict,
 111     float_or_none,
 112     format_bytes,
 113     format_decimal_suffix,
 114     format_field,
 115     formatSeconds,
 116     get_compatible_ext,
 117     get_domain,
 118     int_or_none,
 119     iri_to_uri,
 120     is_path_like,
 121     join_nonempty,
 122     locked_file,
 123     make_archive_id,
 124     make_dir,
 125     number_of_digits,
 126     orderedSet,
 127     orderedSet_from_options,
 128     parse_filesize,
 129     preferredencoding,
 130     prepend_extension,
 131     remove_terminal_sequences,
 132     render_table,
 133     replace_extension,
 134     sanitize_filename,
 135     sanitize_path,
 136     sanitize_url,
 137     str_or_none,
 138     strftime_or_none,
 139     subtitles_filename,
 140     supports_terminal_sequences,
 141     system_identifier,
 142     timetuple_from_msec,
 143     to_high_limit_path,
 144     traverse_obj,
 145     try_call,
 146     try_get,
 147     url_basename,
 148     variadic,
 149     version_tuple,
 150     windows_enable_vt_mode,
 151     write_json_file,
 152     write_string,
 153 )
 154 from .utils._utils import _YDLLogger
 155 from .utils.networking import (
 156     HTTPHeaderDict,
 157     clean_headers,
 158     clean_proxies,
 159     std_headers,
 160 )
 161 from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
 162
 163 if compat_os_name == 'nt':
 164     import ctypes
 165
 166
 167 class YoutubeDL:
 168     """YoutubeDL class.
 169
 170     YoutubeDL objects are the ones responsible of downloading the
 171     actual video file and writing it to disk if the user has requested
 172     it, among some other tasks. In most cases there should be one per
 173     program. As, given a video URL, the downloader doesn't know how to
 174     extract all the needed information, task that InfoExtractors do, it
 175     has to pass the URL to one of them.
 176
 177     For this, YoutubeDL objects have a method that allows
 178     InfoExtractors to be registered in a given order. When it is passed
 179     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 180     finds that reports being able to handle it. The InfoExtractor extracts
 181     all the information about the video or videos the URL refers to, and
 182     YoutubeDL process the extracted information, possibly using a File
 183     Downloader to download the video.
 184
 185     YoutubeDL objects accept a lot of parameters. In order not to saturate
 186     the object constructor with arguments, it receives a dictionary of
 187     options instead. These options are available through the params
 188     attribute for the InfoExtractors to use. The YoutubeDL also
 189     registers itself as the downloader in charge for the InfoExtractors
 190     that are added to it, so this is a "mutual registration".
 191
 192     Available options:
 193
 194     username:          Username for authentication purposes.
 195     password:          Password for authentication purposes.
 196     videopassword:     Password for accessing a video.
 197     ap_mso:            Adobe Pass multiple-system operator identifier.
 198     ap_username:       Multiple-system operator account username.
 199     ap_password:       Multiple-system operator account password.
 200     usenetrc:          Use netrc for authentication instead.
 201     netrc_location:    Location of the netrc file. Defaults to ~/.netrc.
 202     netrc_cmd:         Use a shell command to get credentials
 203     verbose:           Print additional info to stdout.
 204     quiet:             Do not print messages to stdout.
 205     no_warnings:       Do not print out anything for warnings.
 206     forceprint:        A dict with keys WHEN mapped to a list of templates to
 207                        print to stdout. The allowed keys are video or any of the
 208                        items in utils.POSTPROCESS_WHEN.
 209                        For compatibility, a single list is also accepted
 210     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 211                        a list of tuples with (template, filename)
 212     forcejson:         Force printing info_dict as JSON.
 213     dump_single_json:  Force printing the info_dict of the whole playlist
 214                        (or video) as a single JSON line.
 215     force_write_download_archive: Force writing download archive regardless
 216                        of 'skip_download' or 'simulate'.
 217     simulate:          Do not download the video files. If unset (or None),
 218                        simulate only if listsubtitles, listformats or list_thumbnails is used
 219     format:            Video format code. see "FORMAT SELECTION" for more details.
 220                        You can also pass a function. The function takes 'ctx' as
 221                        argument and returns the formats to download.
 222                        See "build_format_selector" for an implementation
 223     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 224     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 225                        extracting metadata even if the video is not actually
 226                        available for download (experimental)
 227     format_sort:       A list of fields by which to sort the video formats.
 228                        See "Sorting Formats" for more details.
 229     format_sort_force: Force the given format_sort. see "Sorting Formats"
 230                        for more details.
 231     prefer_free_formats: Whether to prefer video formats with free containers
 232                        over non-free ones of same quality.
 233     allow_multiple_video_streams:   Allow multiple video streams to be merged
 234                        into a single file
 235     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 236                        into a single file
 237     check_formats      Whether to test if the formats are downloadable.
 238                        Can be True (check all), False (check none),
 239                        'selected' (check selected formats),
 240                        or None (check only if requested by extractor)
 241     paths:             Dictionary of output paths. The allowed keys are 'home'
 242                        'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
 243     outtmpl:           Dictionary of templates for output names. Allowed keys
 244                        are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
 245                        For compatibility with youtube-dl, a single string can also be used
 246     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 247     restrictfilenames: Do not allow "&" and spaces in file names
 248     trim_file_name:    Limit length of filename (extension excluded)
 249     windowsfilenames:  Force the filenames to be windows compatible
 250     ignoreerrors:      Do not stop on download/postprocessing errors.
 251                        Can be 'only_download' to ignore only download errors.
 252                        Default is 'only_download' for CLI, but False for API
 253     skip_playlist_after_errors: Number of allowed failures until the rest of
 254                        the playlist is skipped
 255     allowed_extractors:  List of regexes to match against extractor names that are allowed
 256     overwrites:        Overwrite all video and metadata files if True,
 257                        overwrite only non-video files if None
 258                        and don't overwrite any file if False
 259     playlist_items:    Specific indices of playlist to download.
 260     playlistrandom:    Download playlist items in random order.
 261     lazy_playlist:     Process playlist entries as they are received.
 262     matchtitle:        Download only matching titles.
 263     rejecttitle:       Reject downloads for matching titles.
 264     logger:            Log messages to a logging.Logger instance.
 265     logtostderr:       Print everything to stderr instead of stdout.
 266     consoletitle:      Display progress in console window's titlebar.
 267     writedescription:  Write the video description to a .description file
 268     writeinfojson:     Write the video description to a .info.json file
 269     clean_infojson:    Remove internal metadata from the infojson
 270     getcomments:       Extract video comments. This will not be written to disk
 271                        unless writeinfojson is also given
 272     writeannotations:  Write the video annotations to a .annotations.xml file
 273     writethumbnail:    Write the thumbnail image to a file
 274     allow_playlist_files: Whether to write playlists' description, infojson etc
 275                        also to disk when using the 'write*' options
 276     write_all_thumbnails:  Write all thumbnail formats to files
 277     writelink:         Write an internet shortcut file, depending on the
 278                        current platform (.url/.webloc/.desktop)
 279     writeurllink:      Write a Windows internet shortcut file (.url)
 280     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 281     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 282     writesubtitles:    Write the video subtitles to a file
 283     writeautomaticsub: Write the automatically generated subtitles to a file
 284     listsubtitles:     Lists all available subtitles for the video
 285     subtitlesformat:   The format code for subtitles
 286     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 287                        The list may contain "all" to refer to all the available
 288                        subtitles. The language can be prefixed with a "-" to
 289                        exclude it from the requested languages, e.g. ['all', '-live_chat']
 290     keepvideo:         Keep the video file after post-processing
 291     daterange:         A utils.DateRange object, download only if the upload_date is in the range.
 292     skip_download:     Skip the actual download of the video file
 293     cachedir:          Location of the cache files in the filesystem.
 294                        False to disable filesystem cache.
 295     noplaylist:        Download single video instead of a playlist if in doubt.
 296     age_limit:         An integer representing the user's age in years.
 297                        Unsuitable videos for the given age are skipped.
 298     min_views:         An integer representing the minimum view count the video
 299                        must have in order to not be skipped.
 300                        Videos without view count information are always
 301                        downloaded. None for no limit.
 302     max_views:         An integer representing the maximum view count.
 303                        Videos that are more popular than that are not
 304                        downloaded.
 305                        Videos without view count information are always
 306                        downloaded. None for no limit.
 307     download_archive:  A set, or the name of a file where all downloads are recorded.
 308                        Videos already present in the file are not downloaded again.
 309     break_on_existing: Stop the download process after attempting to download a
 310                        file that is in the archive.
 311     break_per_url:     Whether break_on_reject and break_on_existing
 312                        should act on each input URL as opposed to for the entire queue
 313     cookiefile:        File name or text stream from where cookies should be read and dumped to
 314     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 315                        name/path from where cookies are loaded, the name of the keyring,
 316                        and the container name, e.g. ('chrome', ) or
 317                        ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
 318     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 319                        support RFC 5746 secure renegotiation
 320     nocheckcertificate:  Do not verify SSL certificates
 321     client_certificate:  Path to client certificate file in PEM format. May include the private key
 322     client_certificate_key:  Path to private key file for client certificate
 323     client_certificate_password:  Password for client certificate private key, if encrypted.
 324                         If not provided and the key is encrypted, yt-dlp will ask interactively
 325     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 326                        (Only supported by some extractors)
 327     enable_file_urls:  Enable file:// URLs. This is disabled by default for security reasons.
 328     http_headers:      A dictionary of custom headers to be used for all requests
 329     proxy:             URL of the proxy server to use
 330     geo_verification_proxy:  URL of the proxy to use for IP address verification
 331                        on geo-restricted sites.
 332     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 333     bidi_workaround:   Work around buggy terminals without bidirectional text
 334                        support, using fridibi
 335     debug_printtraffic:Print out sent and received HTTP traffic
 336     default_search:    Prepend this string if an input url is not valid.
 337                        'auto' for elaborate guessing
 338     encoding:          Use this encoding instead of the system-specified.
 339     extract_flat:      Whether to resolve and process url_results further
 340                        * False:     Always process. Default for API
 341                        * True:      Never process
 342                        * 'in_playlist': Do not process inside playlist/multi_video
 343                        * 'discard': Always process, but don't return the result
 344                                     from inside playlist/multi_video
 345                        * 'discard_in_playlist': Same as "discard", but only for
 346                                     playlists (not multi_video). Default for CLI
 347     wait_for_video:    If given, wait for scheduled streams to become available.
 348                        The value should be a tuple containing the range
 349                        (min_secs, max_secs) to wait between retries
 350     postprocessors:    A list of dictionaries, each with an entry
 351                        * key:  The name of the postprocessor. See
 352                                yt_dlp/postprocessor/__init__.py for a list.
 353                        * when: When to run the postprocessor. Allowed values are
 354                                the entries of utils.POSTPROCESS_WHEN
 355                                Assumed to be 'post_process' if not given
 356     progress_hooks:    A list of functions that get called on download
 357                        progress, with a dictionary with the entries
 358                        * status: One of "downloading", "error", or "finished".
 359                                  Check this first and ignore unknown values.
 360                        * info_dict: The extracted info_dict
 361
 362                        If status is one of "downloading", or "finished", the
 363                        following properties may also be present:
 364                        * filename: The final filename (always present)
 365                        * tmpfilename: The filename we're currently writing to
 366                        * downloaded_bytes: Bytes on disk
 367                        * total_bytes: Size of the whole file, None if unknown
 368                        * total_bytes_estimate: Guess of the eventual file size,
 369                                                None if unavailable.
 370                        * elapsed: The number of seconds since download started.
 371                        * eta: The estimated time in seconds, None if unknown
 372                        * speed: The download speed in bytes/second, None if
 373                                 unknown
 374                        * fragment_index: The counter of the currently
 375                                          downloaded video fragment.
 376                        * fragment_count: The number of fragments (= individual
 377                                          files that will be merged)
 378
 379                        Progress hooks are guaranteed to be called at least once
 380                        (with status "finished") if the download is successful.
 381     postprocessor_hooks:  A list of functions that get called on postprocessing
 382                        progress, with a dictionary with the entries
 383                        * status: One of "started", "processing", or "finished".
 384                                  Check this first and ignore unknown values.
 385                        * postprocessor: Name of the postprocessor
 386                        * info_dict: The extracted info_dict
 387
 388                        Progress hooks are guaranteed to be called at least twice
 389                        (with status "started" and "finished") if the processing is successful.
 390     merge_output_format: "/" separated list of extensions to use when merging formats.
 391     final_ext:         Expected final extension; used to detect when the file was
 392                        already downloaded and converted
 393     fixup:             Automatically correct known faults of the file.
 394                        One of:
 395                        - "never": do nothing
 396                        - "warn": only emit a warning
 397                        - "detect_or_warn": check whether we can do anything
 398                                            about it, warn otherwise (default)
 399     source_address:    Client-side IP address to bind to.
 400     sleep_interval_requests: Number of seconds to sleep between requests
 401                        during extraction
 402     sleep_interval:    Number of seconds to sleep before each download when
 403                        used alone or a lower bound of a range for randomized
 404                        sleep before each download (minimum possible number
 405                        of seconds to sleep) when used along with
 406                        max_sleep_interval.
 407     max_sleep_interval:Upper bound of a range for randomized sleep before each
 408                        download (maximum possible number of seconds to sleep).
 409                        Must only be used along with sleep_interval.
 410                        Actual sleep time will be a random float from range
 411                        [sleep_interval; max_sleep_interval].
 412     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 413     listformats:       Print an overview of available video formats and exit.
 414     list_thumbnails:   Print a table of all thumbnails and exit.
 415     match_filter:      A function that gets called for every video with the signature
 416                        (info_dict, *, incomplete: bool) -> Optional[str]
 417                        For backward compatibility with youtube-dl, the signature
 418                        (info_dict) -> Optional[str] is also allowed.
 419                        - If it returns a message, the video is ignored.
 420                        - If it returns None, the video is downloaded.
 421                        - If it returns utils.NO_DEFAULT, the user is interactively
 422                          asked whether to download the video.
 423                        - Raise utils.DownloadCancelled(msg) to abort remaining
 424                          downloads when a video is rejected.
 425                        match_filter_func in utils/_utils.py is one example for this.
 426     color:             A Dictionary with output stream names as keys
 427                        and their respective color policy as values.
 428                        Can also just be a single color policy,
 429                        in which case it applies to all outputs.
 430                        Valid stream names are 'stdout' and 'stderr'.
 431                        Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
 432     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 433                        HTTP header
 434     geo_bypass_country:
 435                        Two-letter ISO 3166-2 country code that will be used for
 436                        explicit geographic restriction bypassing via faking
 437                        X-Forwarded-For HTTP header
 438     geo_bypass_ip_block:
 439                        IP range in CIDR notation that will be used similarly to
 440                        geo_bypass_country
 441     external_downloader: A dictionary of protocol keys and the executable of the
 442                        external downloader to use for it. The allowed protocols
 443                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 444                        Set the value to 'native' to use the native downloader
 445     compat_opts:       Compatibility options. See "Differences in default behavior".
 446                        The following options do not work when used through the API:
 447                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 448                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 449                        Refer __init__.py for their implementation
 450     progress_template: Dictionary of templates for progress outputs.
 451                        Allowed keys are 'download', 'postprocess',
 452                        'download-title' (console title) and 'postprocess-title'.
 453                        The template is mapped on a dictionary with keys 'progress' and 'info'
 454     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 455                        as argument and returns the time to sleep in seconds.
 456                        Allowed keys are 'http', 'fragment', 'file_access'
 457     download_ranges:   A callback function that gets called for every video with
 458                        the signature (info_dict, ydl) -> Iterable[Section].
 459                        Only the returned sections will be downloaded.
 460                        Each Section is a dict with the following keys:
 461                        * start_time: Start time of the section in seconds
 462                        * end_time: End time of the section in seconds
 463                        * title: Section title (Optional)
 464                        * index: Section number (Optional)
 465     force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
 466     noprogress:        Do not print the progress bar
 467     live_from_start:   Whether to download livestreams videos from the start
 468
 469     The following parameters are not used by YoutubeDL itself, they are used by
 470     the downloader (see yt_dlp/downloader/common.py):
 471     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 472     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 473     continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 474     external_downloader_args, concurrent_fragment_downloads.
 475
 476     The following options are used by the post processors:
 477     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 478                        to the binary or its containing directory.
 479     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 480                        and a list of additional command-line arguments for the
 481                        postprocessor/executable. The dict can also have "PP+EXE" keys
 482                        which are used when the given exe is used by the given PP.
 483                        Use 'default' as the name for arguments to passed to all PP
 484                        For compatibility with youtube-dl, a single list of args
 485                        can also be used
 486
 487     The following options are used by the extractors:
 488     extractor_retries: Number of times to retry for known errors (default: 3)
 489     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 490     hls_split_discontinuity: Split HLS playlists to different formats at
 491                        discontinuities such as ad breaks (default: False)
 492     extractor_args:    A dictionary of arguments to be passed to the extractors.
 493                        See "EXTRACTOR ARGUMENTS" for details.
 494                        E.g. {'youtube': {'skip': ['dash', 'hls']}}
 495     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 496
 497     The following options are deprecated and may be removed in the future:
 498
 499     break_on_reject:   Stop the download process when encountering a video that
 500                        has been filtered out.
 501                        - `raise DownloadCancelled(msg)` in match_filter instead
 502     force_generic_extractor: Force downloader to use the generic extractor
 503                        - Use allowed_extractors = ['generic', 'default']
 504     playliststart:     - Use playlist_items
 505                        Playlist item to start at.
 506     playlistend:       - Use playlist_items
 507                        Playlist item to end at.
 508     playlistreverse:   - Use playlist_items
 509                        Download playlist items in reverse order.
 510     forceurl:          - Use forceprint
 511                        Force printing final URL.
 512     forcetitle:        - Use forceprint
 513                        Force printing title.
 514     forceid:           - Use forceprint
 515                        Force printing ID.
 516     forcethumbnail:    - Use forceprint
 517                        Force printing thumbnail URL.
 518     forcedescription:  - Use forceprint
 519                        Force printing description.
 520     forcefilename:     - Use forceprint
 521                        Force printing final filename.
 522     forceduration:     - Use forceprint
 523                        Force printing duration.
 524     allsubtitles:      - Use subtitleslangs = ['all']
 525                        Downloads all the subtitles of the video
 526                        (requires writesubtitles or writeautomaticsub)
 527     include_ads:       - Doesn't work
 528                        Download ads as well
 529     call_home:         - Not implemented
 530                        Boolean, true iff we are allowed to contact the
 531                        yt-dlp servers for debugging.
 532     post_hooks:        - Register a custom postprocessor
 533                        A list of functions that get called as the final step
 534                        for each video file, after all postprocessors have been
 535                        called. The filename will be passed as the only argument.
 536     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 537                        Use the native HLS downloader instead of ffmpeg/avconv
 538                        if True, otherwise use ffmpeg/avconv if False, otherwise
 539                        use downloader suggested by extractor if None.
 540     prefer_ffmpeg:     - avconv support is deprecated
 541                        If False, use avconv instead of ffmpeg if both are available,
 542                        otherwise prefer ffmpeg.
 543     youtube_include_dash_manifest: - Use extractor_args
 544                        If True (default), DASH manifests and related
 545                        data will be downloaded and processed by extractor.
 546                        You can reduce network I/O by disabling it if you don't
 547                        care about DASH. (only for youtube)
 548     youtube_include_hls_manifest: - Use extractor_args
 549                        If True (default), HLS manifests and related
 550                        data will be downloaded and processed by extractor.
 551                        You can reduce network I/O by disabling it if you don't
 552                        care about HLS. (only for youtube)
 553     no_color:          Same as `color='no_color'`
 554     no_overwrites:     Same as `overwrites=False`
 555     """
 556
 557     _NUMERIC_FIELDS = {
 558         'width', 'height', 'asr', 'audio_channels', 'fps',
 559         'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
 560         'timestamp', 'release_timestamp',
 561         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 562         'average_rating', 'comment_count', 'age_limit',
 563         'start_time', 'end_time',
 564         'chapter_number', 'season_number', 'episode_number',
 565         'track_number', 'disc_number', 'release_year',
 566     }
 567
 568     _format_fields = {
 569         # NB: Keep in sync with the docstring of extractor/common.py
 570         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 571         'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
 572         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
 573         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 574         'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
 575         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
 576         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 577     }
 578     _format_selection_exts = {
 579         'audio': set(MEDIA_EXTENSIONS.common_audio),
 580         'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
 581         'storyboards': set(MEDIA_EXTENSIONS.storyboards),
 582     }
 583
 584     def __init__(self, params=None, auto_init=True):
 585         """Create a FileDownloader object with the given options.
 586         @param auto_init    Whether to load the default extractors and print header (if verbose).
 587                             Set to 'no_verbose_header' to not print the header
 588         """
 589         if params is None:
 590             params = {}
 591         self.params = params
 592         self._ies = {}
 593         self._ies_instances = {}
 594         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 595         self._printed_messages = set()
 596         self._first_webpage_request = True
 597         self._post_hooks = []
 598         self._progress_hooks = []
 599         self._postprocessor_hooks = []
 600         self._download_retcode = 0
 601         self._num_downloads = 0
 602         self._num_videos = 0
 603         self._playlist_level = 0
 604         self._playlist_urls = set()
 605         self.cache = Cache(self)
 606         self.__header_cookies = []
 607
 608         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 609         self._out_files = Namespace(
 610             out=stdout,
 611             error=sys.stderr,
 612             screen=sys.stderr if self.params.get('quiet') else stdout,
 613             console=None if compat_os_name == 'nt' else next(
 614                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 615         )
 616
 617         try:
 618             windows_enable_vt_mode()
 619         except Exception as e:
 620             self.write_debug(f'Failed to enable VT mode: {e}')
 621
 622         if self.params.get('no_color'):
 623             if self.params.get('color') is not None:
 624                 self.params.setdefault('_warnings', []).append(
 625                     'Overwriting params from "color" with "no_color"')
 626             self.params['color'] = 'no_color'
 627
 628         term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
 629         no_color = bool(os.getenv('NO_COLOR'))
 630
 631         def process_color_policy(stream):
 632             stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
 633             policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
 634             if policy in ('auto', None):
 635                 if term_allow_color and supports_terminal_sequences(stream):
 636                     return 'no_color' if no_color else True
 637                 return False
 638             assert policy in ('always', 'never', 'no_color'), policy
 639             return {'always': True, 'never': False}.get(policy, policy)
 640
 641         self._allow_colors = Namespace(**{
 642             name: process_color_policy(stream)
 643             for name, stream in self._out_files.items_ if name != 'console'
 644         })
 645
 646         system_deprecation = _get_system_deprecation()
 647         if system_deprecation:
 648             self.deprecated_feature(system_deprecation.replace('\n', '\n                    '))
 649
 650         if self.params.get('allow_unplayable_formats'):
 651             self.report_warning(
 652                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 653                 'This is a developer option intended for debugging. \n'
 654                 '         If you experience any issues while using this option, '
 655                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 656
 657         if self.params.get('bidi_workaround', False):
 658             try:
 659                 import pty
 660                 master, slave = pty.openpty()
 661                 width = shutil.get_terminal_size().columns
 662                 width_args = [] if width is None else ['-w', str(width)]
 663                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 664                 try:
 665                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 666                 except OSError:
 667                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 668                 self._output_channel = os.fdopen(master, 'rb')
 669             except OSError as ose:
 670                 if ose.errno == errno.ENOENT:
 671                     self.report_warning(
 672                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 673                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 674                 else:
 675                     raise
 676
 677         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 678         self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
 679         self._load_cookies(self.params['http_headers'].get('Cookie'))  # compat
 680         self.params['http_headers'].pop('Cookie', None)
 681         self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
 682
 683         if auto_init and auto_init != 'no_verbose_header':
 684             self.print_debug_header()
 685
 686         def check_deprecated(param, option, suggestion):
 687             if self.params.get(param) is not None:
 688                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 689                 return True
 690             return False
 691
 692         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 693             if self.params.get('geo_verification_proxy') is None:
 694                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 695
 696         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 697         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 698         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 699
 700         for msg in self.params.get('_warnings', []):
 701             self.report_warning(msg)
 702         for msg in self.params.get('_deprecation_warnings', []):
 703             self.deprecated_feature(msg)
 704
 705         if 'list-formats' in self.params['compat_opts']:
 706             self.params['listformats_table'] = False
 707
 708         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 709             # nooverwrites was unnecessarily changed to overwrites
 710             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 711             # This ensures compatibility with both keys
 712             self.params['overwrites'] = not self.params['nooverwrites']
 713         elif self.params.get('overwrites') is None:
 714             self.params.pop('overwrites', None)
 715         else:
 716             self.params['nooverwrites'] = not self.params['overwrites']
 717
 718         if self.params.get('simulate') is None and any((
 719             self.params.get('list_thumbnails'),
 720             self.params.get('listformats'),
 721             self.params.get('listsubtitles'),
 722         )):
 723             self.params['simulate'] = 'list_only'
 724
 725         self.params.setdefault('forceprint', {})
 726         self.params.setdefault('print_to_file', {})
 727
 728         # Compatibility with older syntax
 729         if not isinstance(params['forceprint'], dict):
 730             self.params['forceprint'] = {'video': params['forceprint']}
 731
 732         if auto_init:
 733             self.add_default_info_extractors()
 734
 735         if (sys.platform != 'win32'
 736                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 737                 and not self.params.get('restrictfilenames', False)):
 738             # Unicode filesystem API will throw errors (#1474, #13027)
 739             self.report_warning(
 740                 'Assuming --restrict-filenames since file system encoding '
 741                 'cannot encode all characters. '
 742                 'Set the LC_ALL environment variable to fix this.')
 743             self.params['restrictfilenames'] = True
 744
 745         self._parse_outtmpl()
 746
 747         # Creating format selector here allows us to catch syntax errors before the extraction
 748         self.format_selector = (
 749             self.params.get('format') if self.params.get('format') in (None, '-')
 750             else self.params['format'] if callable(self.params['format'])
 751             else self.build_format_selector(self.params['format']))
 752
 753         hooks = {
 754             'post_hooks': self.add_post_hook,
 755             'progress_hooks': self.add_progress_hook,
 756             'postprocessor_hooks': self.add_postprocessor_hook,
 757         }
 758         for opt, fn in hooks.items():
 759             for ph in self.params.get(opt, []):
 760                 fn(ph)
 761
 762         for pp_def_raw in self.params.get('postprocessors', []):
 763             pp_def = dict(pp_def_raw)
 764             when = pp_def.pop('when', 'post_process')
 765             self.add_post_processor(
 766                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 767                 when=when)
 768
 769         def preload_download_archive(fn):
 770             """Preload the archive, if any is specified"""
 771             archive = set()
 772             if fn is None:
 773                 return archive
 774             elif not is_path_like(fn):
 775                 return fn
 776
 777             self.write_debug(f'Loading archive file {fn!r}')
 778             try:
 779                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 780                     for line in archive_file:
 781                         archive.add(line.strip())
 782             except OSError as ioe:
 783                 if ioe.errno != errno.ENOENT:
 784                     raise
 785             return archive
 786
 787         self.archive = preload_download_archive(self.params.get('download_archive'))
 788
 789     def warn_if_short_id(self, argv):
 790         # short YouTube ID starting with dash?
 791         idxs = [
 792             i for i, a in enumerate(argv)
 793             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 794         if idxs:
 795             correct_argv = (
 796                 ['yt-dlp']
 797                 + [a for i, a in enumerate(argv) if i not in idxs]
 798                 + ['--'] + [argv[i] for i in idxs]
 799             )
 800             self.report_warning(
 801                 'Long argument string detected. '
 802                 'Use -- to separate parameters and URLs, like this:\n%s' %
 803                 args_to_str(correct_argv))
 804
 805     def add_info_extractor(self, ie):
 806         """Add an InfoExtractor object to the end of the list."""
 807         ie_key = ie.ie_key()
 808         self._ies[ie_key] = ie
 809         if not isinstance(ie, type):
 810             self._ies_instances[ie_key] = ie
 811             ie.set_downloader(self)
 812
 813     def get_info_extractor(self, ie_key):
 814         """
 815         Get an instance of an IE with name ie_key, it will try to get one from
 816         the _ies list, if there's no instance it will create a new one and add
 817         it to the extractor list.
 818         """
 819         ie = self._ies_instances.get(ie_key)
 820         if ie is None:
 821             ie = get_info_extractor(ie_key)()
 822             self.add_info_extractor(ie)
 823         return ie
 824
 825     def add_default_info_extractors(self):
 826         """
 827         Add the InfoExtractors returned by gen_extractors to the end of the list
 828         """
 829         all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
 830         all_ies['end'] = UnsupportedURLIE()
 831         try:
 832             ie_names = orderedSet_from_options(
 833                 self.params.get('allowed_extractors', ['default']), {
 834                     'all': list(all_ies),
 835                     'default': [name for name, ie in all_ies.items() if ie._ENABLED],
 836                 }, use_regex=True)
 837         except re.error as e:
 838             raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
 839         for name in ie_names:
 840             self.add_info_extractor(all_ies[name])
 841         self.write_debug(f'Loaded {len(ie_names)} extractors')
 842
 843     def add_post_processor(self, pp, when='post_process'):
 844         """Add a PostProcessor object to the end of the chain."""
 845         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 846         self._pps[when].append(pp)
 847         pp.set_downloader(self)
 848
 849     def add_post_hook(self, ph):
 850         """Add the post hook"""
 851         self._post_hooks.append(ph)
 852
 853     def add_progress_hook(self, ph):
 854         """Add the download progress hook"""
 855         self._progress_hooks.append(ph)
 856
 857     def add_postprocessor_hook(self, ph):
 858         """Add the postprocessing progress hook"""
 859         self._postprocessor_hooks.append(ph)
 860         for pps in self._pps.values():
 861             for pp in pps:
 862                 pp.add_progress_hook(ph)
 863
 864     def _bidi_workaround(self, message):
 865         if not hasattr(self, '_output_channel'):
 866             return message
 867
 868         assert hasattr(self, '_output_process')
 869         assert isinstance(message, str)
 870         line_count = message.count('\n') + 1
 871         self._output_process.stdin.write((message + '\n').encode())
 872         self._output_process.stdin.flush()
 873         res = ''.join(self._output_channel.readline().decode()
 874                       for _ in range(line_count))
 875         return res[:-len('\n')]
 876
 877     def _write_string(self, message, out=None, only_once=False):
 878         if only_once:
 879             if message in self._printed_messages:
 880                 return
 881             self._printed_messages.add(message)
 882         write_string(message, out=out, encoding=self.params.get('encoding'))
 883
 884     def to_stdout(self, message, skip_eol=False, quiet=None):
 885         """Print message to stdout"""
 886         if quiet is not None:
 887             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
 888                                      'Use "YoutubeDL.to_screen" instead')
 889         if skip_eol is not False:
 890             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
 891                                      'Use "YoutubeDL.to_screen" instead')
 892         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 893
 894     def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
 895         """Print message to screen if not in quiet mode"""
 896         if self.params.get('logger'):
 897             self.params['logger'].debug(message)
 898             return
 899         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 900             return
 901         self._write_string(
 902             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 903             self._out_files.screen, only_once=only_once)
 904
 905     def to_stderr(self, message, only_once=False):
 906         """Print message to stderr"""
 907         assert isinstance(message, str)
 908         if self.params.get('logger'):
 909             self.params['logger'].error(message)
 910         else:
 911             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 912
 913     def _send_console_code(self, code):
 914         if compat_os_name == 'nt' or not self._out_files.console:
 915             return
 916         self._write_string(code, self._out_files.console)
 917
 918     def to_console_title(self, message):
 919         if not self.params.get('consoletitle', False):
 920             return
 921         message = remove_terminal_sequences(message)
 922         if compat_os_name == 'nt':
 923             if ctypes.windll.kernel32.GetConsoleWindow():
 924                 # c_wchar_p() might not be necessary if `message` is
 925                 # already of type unicode()
 926                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 927         else:
 928             self._send_console_code(f'\033]0;{message}\007')
 929
 930     def save_console_title(self):
 931         if not self.params.get('consoletitle') or self.params.get('simulate'):
 932             return
 933         self._send_console_code('\033[22;0t')  # Save the title on stack
 934
 935     def restore_console_title(self):
 936         if not self.params.get('consoletitle') or self.params.get('simulate'):
 937             return
 938         self._send_console_code('\033[23;0t')  # Restore the title from stack
 939
 940     def __enter__(self):
 941         self.save_console_title()
 942         return self
 943
 944     def save_cookies(self):
 945         if self.params.get('cookiefile') is not None:
 946             self.cookiejar.save()
 947
 948     def __exit__(self, *args):
 949         self.restore_console_title()
 950         self.close()
 951
 952     def close(self):
 953         self.save_cookies()
 954         self._request_director.close()
 955
 956     def trouble(self, message=None, tb=None, is_error=True):
 957         """Determine action to take when a download problem appears.
 958
 959         Depending on if the downloader has been configured to ignore
 960         download errors or not, this method may throw an exception or
 961         not when errors are found, after printing the message.
 962
 963         @param tb          If given, is additional traceback information
 964         @param is_error    Whether to raise error according to ignorerrors
 965         """
 966         if message is not None:
 967             self.to_stderr(message)
 968         if self.params.get('verbose'):
 969             if tb is None:
 970                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 971                     tb = ''
 972                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 973                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 974                     tb += encode_compat_str(traceback.format_exc())
 975                 else:
 976                     tb_data = traceback.format_list(traceback.extract_stack())
 977                     tb = ''.join(tb_data)
 978             if tb:
 979                 self.to_stderr(tb)
 980         if not is_error:
 981             return
 982         if not self.params.get('ignoreerrors'):
 983             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 984                 exc_info = sys.exc_info()[1].exc_info
 985             else:
 986                 exc_info = sys.exc_info()
 987             raise DownloadError(message, exc_info)
 988         self._download_retcode = 1
 989
 990     Styles = Namespace(
 991         HEADERS='yellow',
 992         EMPHASIS='light blue',
 993         FILENAME='green',
 994         ID='green',
 995         DELIM='blue',
 996         ERROR='red',
 997         BAD_FORMAT='light red',
 998         WARNING='yellow',
 999         SUPPRESS='light black',
1000     )
1001
1002     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1003         text = str(text)
1004         if test_encoding:
1005             original_text = text
1006             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1007             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1008             text = text.encode(encoding, 'ignore').decode(encoding)
1009             if fallback is not None and text != original_text:
1010                 text = fallback
1011         return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1012
1013     def _format_out(self, *args, **kwargs):
1014         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1015
1016     def _format_screen(self, *args, **kwargs):
1017         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1018
1019     def _format_err(self, *args, **kwargs):
1020         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1021
1022     def report_warning(self, message, only_once=False):
1023         '''
1024         Print the message to stderr, it will be prefixed with 'WARNING:'
1025         If stderr is a tty file the 'WARNING:' will be colored
1026         '''
1027         if self.params.get('logger') is not None:
1028             self.params['logger'].warning(message)
1029         else:
1030             if self.params.get('no_warnings'):
1031                 return
1032             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1033
1034     def deprecation_warning(self, message, *, stacklevel=0):
1035         deprecation_warning(
1036             message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1037
1038     def deprecated_feature(self, message):
1039         if self.params.get('logger') is not None:
1040             self.params['logger'].warning(f'Deprecated Feature: {message}')
1041         self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1042
1043     def report_error(self, message, *args, **kwargs):
1044         '''
1045         Do the same as trouble, but prefixes the message with 'ERROR:', colored
1046         in red if stderr is a tty file.
1047         '''
1048         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1049
1050     def write_debug(self, message, only_once=False):
1051         '''Log debug message or Print message to stderr'''
1052         if not self.params.get('verbose', False):
1053             return
1054         message = f'[debug] {message}'
1055         if self.params.get('logger'):
1056             self.params['logger'].debug(message)
1057         else:
1058             self.to_stderr(message, only_once)
1059
1060     def report_file_already_downloaded(self, file_name):
1061         """Report file has already been fully downloaded."""
1062         try:
1063             self.to_screen('[download] %s has already been downloaded' % file_name)
1064         except UnicodeEncodeError:
1065             self.to_screen('[download] The file has already been downloaded')
1066
1067     def report_file_delete(self, file_name):
1068         """Report that existing file will be deleted."""
1069         try:
1070             self.to_screen('Deleting existing file %s' % file_name)
1071         except UnicodeEncodeError:
1072             self.to_screen('Deleting existing file')
1073
1074     def raise_no_formats(self, info, forced=False, *, msg=None):
1075         has_drm = info.get('_has_drm')
1076         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1077         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1078         if forced or not ignored:
1079             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1080                                  expected=has_drm or ignored or expected)
1081         else:
1082             self.report_warning(msg)
1083
1084     def parse_outtmpl(self):
1085         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1086         self._parse_outtmpl()
1087         return self.params['outtmpl']
1088
1089     def _parse_outtmpl(self):
1090         sanitize = IDENTITY
1091         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1092             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1093
1094         outtmpl = self.params.setdefault('outtmpl', {})
1095         if not isinstance(outtmpl, dict):
1096             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1097         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1098
1099     def get_output_path(self, dir_type='', filename=None):
1100         paths = self.params.get('paths', {})
1101         assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1102         path = os.path.join(
1103             expand_path(paths.get('home', '').strip()),
1104             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1105             filename or '')
1106         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1107
1108     @staticmethod
1109     def _outtmpl_expandpath(outtmpl):
1110         # expand_path translates '%%' into '%' and '$$' into '$'
1111         # correspondingly that is not what we want since we need to keep
1112         # '%%' intact for template dict substitution step. Working around
1113         # with boundary-alike separator hack.
1114         sep = ''.join(random.choices(string.ascii_letters, k=32))
1115         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1116
1117         # outtmpl should be expand_path'ed before template dict substitution
1118         # because meta fields may contain env variables we don't want to
1119         # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1120         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1121         return expand_path(outtmpl).replace(sep, '')
1122
1123     @staticmethod
1124     def escape_outtmpl(outtmpl):
1125         ''' Escape any remaining strings like %s, %abc% etc. '''
1126         return re.sub(
1127             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1128             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1129             outtmpl)
1130
1131     @classmethod
1132     def validate_outtmpl(cls, outtmpl):
1133         ''' @return None or Exception object '''
1134         outtmpl = re.sub(
1135             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1136             lambda mobj: f'{mobj.group(0)[:-1]}s',
1137             cls._outtmpl_expandpath(outtmpl))
1138         try:
1139             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1140             return None
1141         except ValueError as err:
1142             return err
1143
1144     @staticmethod
1145     def _copy_infodict(info_dict):
1146         info_dict = dict(info_dict)
1147         info_dict.pop('__postprocessors', None)
1148         info_dict.pop('__pending_error', None)
1149         return info_dict
1150
1151     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1152         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1153         @param sanitize    Whether to sanitize the output as a filename.
1154                            For backward compatibility, a function can also be passed
1155         """
1156
1157         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1158
1159         info_dict = self._copy_infodict(info_dict)
1160         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1161             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1162             if info_dict.get('duration', None) is not None
1163             else None)
1164         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1165         info_dict['video_autonumber'] = self._num_videos
1166         if info_dict.get('resolution') is None:
1167             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1168
1169         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1170         # of %(field)s to %(field)0Nd for backward compatibility
1171         field_size_compat_map = {
1172             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1173             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1174             'autonumber': self.params.get('autonumber_size') or 5,
1175         }
1176
1177         TMPL_DICT = {}
1178         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1179         MATH_FUNCTIONS = {
1180             '+': float.__add__,
1181             '-': float.__sub__,
1182         }
1183         # Field is of the form key1.key2...
1184         # where keys (except first) can be string, int, slice or "{field, ...}"
1185         FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1186         FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1187             'inner': FIELD_INNER_RE,
1188             'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1189         }
1190         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1191         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1192         INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1193             (?P<negate>-)?
1194             (?P<fields>{FIELD_RE})
1195             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1196             (?:>(?P<strf_format>.+?))?
1197             (?P<remaining>
1198                 (?P<alternate>(?<!\\),[^|&)]+)?
1199                 (?:&(?P<replacement>.*?))?
1200                 (?:\|(?P<default>.*?))?
1201             )$''')
1202
1203         def _traverse_infodict(fields):
1204             fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1205                       for f in ([x] if x.startswith('{') else x.split('.'))]
1206             for i in (0, -1):
1207                 if fields and not fields[i]:
1208                     fields.pop(i)
1209
1210             for i, f in enumerate(fields):
1211                 if not f.startswith('{'):
1212                     continue
1213                 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1214                 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1215
1216             return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1217
1218         def get_value(mdict):
1219             # Object traversal
1220             value = _traverse_infodict(mdict['fields'])
1221             # Negative
1222             if mdict['negate']:
1223                 value = float_or_none(value)
1224                 if value is not None:
1225                     value *= -1
1226             # Do maths
1227             offset_key = mdict['maths']
1228             if offset_key:
1229                 value = float_or_none(value)
1230                 operator = None
1231                 while offset_key:
1232                     item = re.match(
1233                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1234                         offset_key).group(0)
1235                     offset_key = offset_key[len(item):]
1236                     if operator is None:
1237                         operator = MATH_FUNCTIONS[item]
1238                         continue
1239                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1240                     offset = float_or_none(item)
1241                     if offset is None:
1242                         offset = float_or_none(_traverse_infodict(item))
1243                     try:
1244                         value = operator(value, multiplier * offset)
1245                     except (TypeError, ZeroDivisionError):
1246                         return None
1247                     operator = None
1248             # Datetime formatting
1249             if mdict['strf_format']:
1250                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1251
1252             # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1253             if sanitize and value == '':
1254                 value = None
1255             return value
1256
1257         na = self.params.get('outtmpl_na_placeholder', 'NA')
1258
1259         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1260             return sanitize_filename(str(value), restricted=restricted, is_id=(
1261                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1262                 if 'filename-sanitization' in self.params['compat_opts']
1263                 else NO_DEFAULT))
1264
1265         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1266         sanitize = bool(sanitize)
1267
1268         def _dumpjson_default(obj):
1269             if isinstance(obj, (set, LazyList)):
1270                 return list(obj)
1271             return repr(obj)
1272
1273         class _ReplacementFormatter(string.Formatter):
1274             def get_field(self, field_name, args, kwargs):
1275                 if field_name.isdigit():
1276                     return args[0], -1
1277                 raise ValueError('Unsupported field')
1278
1279         replacement_formatter = _ReplacementFormatter()
1280
1281         def create_key(outer_mobj):
1282             if not outer_mobj.group('has_key'):
1283                 return outer_mobj.group(0)
1284             key = outer_mobj.group('key')
1285             mobj = re.match(INTERNAL_FORMAT_RE, key)
1286             value, replacement, default, last_field = None, None, na, ''
1287             while mobj:
1288                 mobj = mobj.groupdict()
1289                 default = mobj['default'] if mobj['default'] is not None else default
1290                 value = get_value(mobj)
1291                 last_field, replacement = mobj['fields'], mobj['replacement']
1292                 if value is None and mobj['alternate']:
1293                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1294                 else:
1295                     break
1296
1297             if None not in (value, replacement):
1298                 try:
1299                     value = replacement_formatter.format(replacement, value)
1300                 except ValueError:
1301                     value, default = None, na
1302
1303             fmt = outer_mobj.group('format')
1304             if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
1305                 fmt = f'0{field_size_compat_map[last_field]:d}d'
1306
1307             flags = outer_mobj.group('conversion') or ''
1308             str_fmt = f'{fmt[:-1]}s'
1309             if value is None:
1310                 value, fmt = default, 's'
1311             elif fmt[-1] == 'l':  # list
1312                 delim = '\n' if '#' in flags else ', '
1313                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1314             elif fmt[-1] == 'j':  # json
1315                 value, fmt = json.dumps(
1316                     value, default=_dumpjson_default,
1317                     indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1318             elif fmt[-1] == 'h':  # html
1319                 value, fmt = escapeHTML(str(value)), str_fmt
1320             elif fmt[-1] == 'q':  # quoted
1321                 value = map(str, variadic(value) if '#' in flags else [value])
1322                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1323             elif fmt[-1] == 'B':  # bytes
1324                 value = f'%{str_fmt}'.encode() % str(value).encode()
1325                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1326             elif fmt[-1] == 'U':  # unicode normalized
1327                 value, fmt = unicodedata.normalize(
1328                     # "+" = compatibility equivalence, "#" = NFD
1329                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1330                     value), str_fmt
1331             elif fmt[-1] == 'D':  # decimal suffix
1332                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1333                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1334                                               factor=1024 if '#' in flags else 1000)
1335             elif fmt[-1] == 'S':  # filename sanitization
1336                 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1337             elif fmt[-1] == 'c':
1338                 if value:
1339                     value = str(value)[0]
1340                 else:
1341                     fmt = str_fmt
1342             elif fmt[-1] not in 'rsa':  # numeric
1343                 value = float_or_none(value)
1344                 if value is None:
1345                     value, fmt = default, 's'
1346
1347             if sanitize:
1348                 # If value is an object, sanitize might convert it to a string
1349                 # So we convert it to repr first
1350                 if fmt[-1] == 'r':
1351                     value, fmt = repr(value), str_fmt
1352                 elif fmt[-1] == 'a':
1353                     value, fmt = ascii(value), str_fmt
1354                 if fmt[-1] in 'csra':
1355                     value = sanitizer(last_field, value)
1356
1357             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1358             TMPL_DICT[key] = value
1359             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1360
1361         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1362
1363     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1364         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1365         return self.escape_outtmpl(outtmpl) % info_dict
1366
1367     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1368         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1369         if outtmpl is None:
1370             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1371         try:
1372             outtmpl = self._outtmpl_expandpath(outtmpl)
1373             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1374             if not filename:
1375                 return None
1376
1377             if tmpl_type in ('', 'temp'):
1378                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1379                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1380                     filename = replace_extension(filename, ext, final_ext)
1381             elif tmpl_type:
1382                 force_ext = OUTTMPL_TYPES[tmpl_type]
1383                 if force_ext:
1384                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1385
1386             # https://github.com/blackjack4494/youtube-dlc/issues/85
1387             trim_file_name = self.params.get('trim_file_name', False)
1388             if trim_file_name:
1389                 no_ext, *ext = filename.rsplit('.', 2)
1390                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1391
1392             return filename
1393         except ValueError as err:
1394             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1395             return None
1396
1397     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1398         """Generate the output filename"""
1399         if outtmpl:
1400             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1401             dir_type = None
1402         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1403         if not filename and dir_type not in ('', 'temp'):
1404             return ''
1405
1406         if warn:
1407             if not self.params.get('paths'):
1408                 pass
1409             elif filename == '-':
1410                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1411             elif os.path.isabs(filename):
1412                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1413         if filename == '-' or not filename:
1414             return filename
1415
1416         return self.get_output_path(dir_type, filename)
1417
1418     def _match_entry(self, info_dict, incomplete=False, silent=False):
1419         """Returns None if the file should be downloaded"""
1420         _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1421         assert incomplete or _type == 'video', 'Only video result can be considered complete'
1422
1423         video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1424
1425         def check_filter():
1426             if _type in ('playlist', 'multi_video'):
1427                 return
1428             elif _type in ('url', 'url_transparent') and not try_call(
1429                     lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1430                 return
1431
1432             if 'title' in info_dict:
1433                 # This can happen when we're just evaluating the playlist
1434                 title = info_dict['title']
1435                 matchtitle = self.params.get('matchtitle', False)
1436                 if matchtitle:
1437                     if not re.search(matchtitle, title, re.IGNORECASE):
1438                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1439                 rejecttitle = self.params.get('rejecttitle', False)
1440                 if rejecttitle:
1441                     if re.search(rejecttitle, title, re.IGNORECASE):
1442                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1443
1444             date = info_dict.get('upload_date')
1445             if date is not None:
1446                 dateRange = self.params.get('daterange', DateRange())
1447                 if date not in dateRange:
1448                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1449             view_count = info_dict.get('view_count')
1450             if view_count is not None:
1451                 min_views = self.params.get('min_views')
1452                 if min_views is not None and view_count < min_views:
1453                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1454                 max_views = self.params.get('max_views')
1455                 if max_views is not None and view_count > max_views:
1456                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1457             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1458                 return 'Skipping "%s" because it is age restricted' % video_title
1459
1460             match_filter = self.params.get('match_filter')
1461             if match_filter is None:
1462                 return None
1463
1464             cancelled = None
1465             try:
1466                 try:
1467                     ret = match_filter(info_dict, incomplete=incomplete)
1468                 except TypeError:
1469                     # For backward compatibility
1470                     ret = None if incomplete else match_filter(info_dict)
1471             except DownloadCancelled as err:
1472                 if err.msg is not NO_DEFAULT:
1473                     raise
1474                 ret, cancelled = err.msg, err
1475
1476             if ret is NO_DEFAULT:
1477                 while True:
1478                     filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1479                     reply = input(self._format_screen(
1480                         f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1481                     if reply in {'y', ''}:
1482                         return None
1483                     elif reply == 'n':
1484                         if cancelled:
1485                             raise type(cancelled)(f'Skipping {video_title}')
1486                         return f'Skipping {video_title}'
1487             return ret
1488
1489         if self.in_download_archive(info_dict):
1490             reason = ''.join((
1491                 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1492                 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1493                 'has already been recorded in the archive'))
1494             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1495         else:
1496             try:
1497                 reason = check_filter()
1498             except DownloadCancelled as e:
1499                 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1500             else:
1501                 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1502         if reason is not None:
1503             if not silent:
1504                 self.to_screen('[download] ' + reason)
1505             if self.params.get(break_opt, False):
1506                 raise break_err()
1507         return reason
1508
1509     @staticmethod
1510     def add_extra_info(info_dict, extra_info):
1511         '''Set the keys from extra_info in info dict if they are missing'''
1512         for key, value in extra_info.items():
1513             info_dict.setdefault(key, value)
1514
1515     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1516                      process=True, force_generic_extractor=False):
1517         """
1518         Extract and return the information dictionary of the URL
1519
1520         Arguments:
1521         @param url          URL to extract
1522
1523         Keyword arguments:
1524         @param download     Whether to download videos
1525         @param process      Whether to resolve all unresolved references (URLs, playlist items).
1526                             Must be True for download to work
1527         @param ie_key       Use only the extractor with this key
1528
1529         @param extra_info   Dictionary containing the extra values to add to the info (For internal use only)
1530         @force_generic_extractor  Force using the generic extractor (Deprecated; use ie_key='Generic')
1531         """
1532
1533         if extra_info is None:
1534             extra_info = {}
1535
1536         if not ie_key and force_generic_extractor:
1537             ie_key = 'Generic'
1538
1539         if ie_key:
1540             ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1541         else:
1542             ies = self._ies
1543
1544         for key, ie in ies.items():
1545             if not ie.suitable(url):
1546                 continue
1547
1548             if not ie.working():
1549                 self.report_warning('The program functionality for this site has been marked as broken, '
1550                                     'and will probably not work.')
1551
1552             temp_id = ie.get_temp_id(url)
1553             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1554                 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1555                                'has already been recorded in the archive')
1556                 if self.params.get('break_on_existing', False):
1557                     raise ExistingVideoReached()
1558                 break
1559             return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1560         else:
1561             extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1562             self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1563                               tb=False if extractors_restricted else None)
1564
1565     def _handle_extraction_exceptions(func):
1566         @functools.wraps(func)
1567         def wrapper(self, *args, **kwargs):
1568             while True:
1569                 try:
1570                     return func(self, *args, **kwargs)
1571                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1572                     raise
1573                 except ReExtractInfo as e:
1574                     if e.expected:
1575                         self.to_screen(f'{e}; Re-extracting data')
1576                     else:
1577                         self.to_stderr('\r')
1578                         self.report_warning(f'{e}; Re-extracting data')
1579                     continue
1580                 except GeoRestrictedError as e:
1581                     msg = e.msg
1582                     if e.countries:
1583                         msg += '\nThis video is available in %s.' % ', '.join(
1584                             map(ISO3166Utils.short2full, e.countries))
1585                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1586                     self.report_error(msg)
1587                 except ExtractorError as e:  # An error we somewhat expected
1588                     self.report_error(str(e), e.format_traceback())
1589                 except Exception as e:
1590                     if self.params.get('ignoreerrors'):
1591                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1592                     else:
1593                         raise
1594                 break
1595         return wrapper
1596
1597     def _wait_for_video(self, ie_result={}):
1598         if (not self.params.get('wait_for_video')
1599                 or ie_result.get('_type', 'video') != 'video'
1600                 or ie_result.get('formats') or ie_result.get('url')):
1601             return
1602
1603         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1604         last_msg = ''
1605
1606         def progress(msg):
1607             nonlocal last_msg
1608             full_msg = f'{msg}\n'
1609             if not self.params.get('noprogress'):
1610                 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1611             elif last_msg:
1612                 return
1613             self.to_screen(full_msg, skip_eol=True)
1614             last_msg = msg
1615
1616         min_wait, max_wait = self.params.get('wait_for_video')
1617         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1618         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1619             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1620             self.report_warning('Release time of video is not known')
1621         elif ie_result and (diff or 0) <= 0:
1622             self.report_warning('Video should already be available according to extracted info')
1623         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1624         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1625
1626         wait_till = time.time() + diff
1627         try:
1628             while True:
1629                 diff = wait_till - time.time()
1630                 if diff <= 0:
1631                     progress('')
1632                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1633                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1634                 time.sleep(1)
1635         except KeyboardInterrupt:
1636             progress('')
1637             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1638         except BaseException as e:
1639             if not isinstance(e, ReExtractInfo):
1640                 self.to_screen('')
1641             raise
1642
1643     def _load_cookies(self, data, *, autoscope=True):
1644         """Loads cookies from a `Cookie` header
1645
1646         This tries to work around the security vulnerability of passing cookies to every domain.
1647         See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1648
1649         @param data         The Cookie header as string to load the cookies from
1650         @param autoscope    If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1651                             If `True`, save cookies for later to be stored in the jar with a limited scope
1652                             If a URL, save cookies in the jar with the domain of the URL
1653         """
1654         for cookie in LenientSimpleCookie(data).values():
1655             if autoscope and any(cookie.values()):
1656                 raise ValueError('Invalid syntax in Cookie Header')
1657
1658             domain = cookie.get('domain') or ''
1659             expiry = cookie.get('expires')
1660             if expiry == '':  # 0 is valid
1661                 expiry = None
1662             prepared_cookie = http.cookiejar.Cookie(
1663                 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1664                 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1665                 cookie.get('secure') or False, expiry, False, None, None, {})
1666
1667             if domain:
1668                 self.cookiejar.set_cookie(prepared_cookie)
1669             elif autoscope is True:
1670                 self.deprecated_feature(
1671                     'Passing cookies as a header is a potential security risk; '
1672                     'they will be scoped to the domain of the downloaded urls. '
1673                     'Please consider loading cookies from a file or browser instead.')
1674                 self.__header_cookies.append(prepared_cookie)
1675             elif autoscope:
1676                 self.report_warning(
1677                     'The extractor result contains an unscoped cookie as an HTTP header. '
1678                     f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1679                     only_once=True)
1680                 self._apply_header_cookies(autoscope, [prepared_cookie])
1681             else:
1682                 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1683                                   tb=False, is_error=False)
1684
1685     def _apply_header_cookies(self, url, cookies=None):
1686         """Applies stray header cookies to the provided url
1687
1688         This loads header cookies and scopes them to the domain provided in `url`.
1689         While this is not ideal, it helps reduce the risk of them being sent
1690         to an unintended destination while mostly maintaining compatibility.
1691         """
1692         parsed = urllib.parse.urlparse(url)
1693         if not parsed.hostname:
1694             return
1695
1696         for cookie in map(copy.copy, cookies or self.__header_cookies):
1697             cookie.domain = f'.{parsed.hostname}'
1698             self.cookiejar.set_cookie(cookie)
1699
1700     @_handle_extraction_exceptions
1701     def __extract_info(self, url, ie, download, extra_info, process):
1702         self._apply_header_cookies(url)
1703
1704         try:
1705             ie_result = ie.extract(url)
1706         except UserNotLive as e:
1707             if process:
1708                 if self.params.get('wait_for_video'):
1709                     self.report_warning(e)
1710                 self._wait_for_video()
1711             raise
1712         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1713             self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1714             return
1715         if isinstance(ie_result, list):
1716             # Backwards compatibility: old IE result format
1717             ie_result = {
1718                 '_type': 'compat_list',
1719                 'entries': ie_result,
1720             }
1721         if extra_info.get('original_url'):
1722             ie_result.setdefault('original_url', extra_info['original_url'])
1723         self.add_default_extra_info(ie_result, ie, url)
1724         if process:
1725             self._wait_for_video(ie_result)
1726             return self.process_ie_result(ie_result, download, extra_info)
1727         else:
1728             return ie_result
1729
1730     def add_default_extra_info(self, ie_result, ie, url):
1731         if url is not None:
1732             self.add_extra_info(ie_result, {
1733                 'webpage_url': url,
1734                 'original_url': url,
1735             })
1736         webpage_url = ie_result.get('webpage_url')
1737         if webpage_url:
1738             self.add_extra_info(ie_result, {
1739                 'webpage_url_basename': url_basename(webpage_url),
1740                 'webpage_url_domain': get_domain(webpage_url),
1741             })
1742         if ie is not None:
1743             self.add_extra_info(ie_result, {
1744                 'extractor': ie.IE_NAME,
1745                 'extractor_key': ie.ie_key(),
1746             })
1747
1748     def process_ie_result(self, ie_result, download=True, extra_info=None):
1749         """
1750         Take the result of the ie(may be modified) and resolve all unresolved
1751         references (URLs, playlist items).
1752
1753         It will also download the videos if 'download'.
1754         Returns the resolved ie_result.
1755         """
1756         if extra_info is None:
1757             extra_info = {}
1758         result_type = ie_result.get('_type', 'video')
1759
1760         if result_type in ('url', 'url_transparent'):
1761             ie_result['url'] = sanitize_url(
1762                 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1763             if ie_result.get('original_url') and not extra_info.get('original_url'):
1764                 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1765
1766             extract_flat = self.params.get('extract_flat', False)
1767             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1768                     or extract_flat is True):
1769                 info_copy = ie_result.copy()
1770                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1771                 if ie and not ie_result.get('id'):
1772                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1773                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1774                 self.add_extra_info(info_copy, extra_info)
1775                 info_copy, _ = self.pre_process(info_copy)
1776                 self._fill_common_fields(info_copy, False)
1777                 self.__forced_printings(info_copy)
1778                 self._raise_pending_errors(info_copy)
1779                 if self.params.get('force_write_download_archive', False):
1780                     self.record_download_archive(info_copy)
1781                 return ie_result
1782
1783         if result_type == 'video':
1784             self.add_extra_info(ie_result, extra_info)
1785             ie_result = self.process_video_result(ie_result, download=download)
1786             self._raise_pending_errors(ie_result)
1787             additional_urls = (ie_result or {}).get('additional_urls')
1788             if additional_urls:
1789                 # TODO: Improve MetadataParserPP to allow setting a list
1790                 if isinstance(additional_urls, str):
1791                     additional_urls = [additional_urls]
1792                 self.to_screen(
1793                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1794                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1795                 ie_result['additional_entries'] = [
1796                     self.extract_info(
1797                         url, download, extra_info=extra_info,
1798                         force_generic_extractor=self.params.get('force_generic_extractor'))
1799                     for url in additional_urls
1800                 ]
1801             return ie_result
1802         elif result_type == 'url':
1803             # We have to add extra_info to the results because it may be
1804             # contained in a playlist
1805             return self.extract_info(
1806                 ie_result['url'], download,
1807                 ie_key=ie_result.get('ie_key'),
1808                 extra_info=extra_info)
1809         elif result_type == 'url_transparent':
1810             # Use the information from the embedding page
1811             info = self.extract_info(
1812                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1813                 extra_info=extra_info, download=False, process=False)
1814
1815             # extract_info may return None when ignoreerrors is enabled and
1816             # extraction failed with an error, don't crash and return early
1817             # in this case
1818             if not info:
1819                 return info
1820
1821             exempted_fields = {'_type', 'url', 'ie_key'}
1822             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1823                 # For video clips, the id etc of the clip extractor should be used
1824                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1825
1826             new_result = info.copy()
1827             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1828
1829             # Extracted info may not be a video result (i.e.
1830             # info.get('_type', 'video') != video) but rather an url or
1831             # url_transparent. In such cases outer metadata (from ie_result)
1832             # should be propagated to inner one (info). For this to happen
1833             # _type of info should be overridden with url_transparent. This
1834             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1835             if new_result.get('_type') == 'url':
1836                 new_result['_type'] = 'url_transparent'
1837
1838             return self.process_ie_result(
1839                 new_result, download=download, extra_info=extra_info)
1840         elif result_type in ('playlist', 'multi_video'):
1841             # Protect from infinite recursion due to recursively nested playlists
1842             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1843             webpage_url = ie_result.get('webpage_url')  # Playlists maynot have webpage_url
1844             if webpage_url and webpage_url in self._playlist_urls:
1845                 self.to_screen(
1846                     '[download] Skipping already downloaded playlist: %s'
1847                     % ie_result.get('title') or ie_result.get('id'))
1848                 return
1849
1850             self._playlist_level += 1
1851             self._playlist_urls.add(webpage_url)
1852             self._fill_common_fields(ie_result, False)
1853             self._sanitize_thumbnails(ie_result)
1854             try:
1855                 return self.__process_playlist(ie_result, download)
1856             finally:
1857                 self._playlist_level -= 1
1858                 if not self._playlist_level:
1859                     self._playlist_urls.clear()
1860         elif result_type == 'compat_list':
1861             self.report_warning(
1862                 'Extractor %s returned a compat_list result. '
1863                 'It needs to be updated.' % ie_result.get('extractor'))
1864
1865             def _fixup(r):
1866                 self.add_extra_info(r, {
1867                     'extractor': ie_result['extractor'],
1868                     'webpage_url': ie_result['webpage_url'],
1869                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1870                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1871                     'extractor_key': ie_result['extractor_key'],
1872                 })
1873                 return r
1874             ie_result['entries'] = [
1875                 self.process_ie_result(_fixup(r), download, extra_info)
1876                 for r in ie_result['entries']
1877             ]
1878             return ie_result
1879         else:
1880             raise Exception('Invalid result type: %s' % result_type)
1881
1882     def _ensure_dir_exists(self, path):
1883         return make_dir(path, self.report_error)
1884
1885     @staticmethod
1886     def _playlist_infodict(ie_result, strict=False, **kwargs):
1887         info = {
1888             'playlist_count': ie_result.get('playlist_count'),
1889             'playlist': ie_result.get('title') or ie_result.get('id'),
1890             'playlist_id': ie_result.get('id'),
1891             'playlist_title': ie_result.get('title'),
1892             'playlist_uploader': ie_result.get('uploader'),
1893             'playlist_uploader_id': ie_result.get('uploader_id'),
1894             **kwargs,
1895         }
1896         if strict:
1897             return info
1898         if ie_result.get('webpage_url'):
1899             info.update({
1900                 'webpage_url': ie_result['webpage_url'],
1901                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1902                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1903             })
1904         return {
1905             **info,
1906             'playlist_index': 0,
1907             '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1908             'extractor': ie_result['extractor'],
1909             'extractor_key': ie_result['extractor_key'],
1910         }
1911
1912     def __process_playlist(self, ie_result, download):
1913         """Process each entry in the playlist"""
1914         assert ie_result['_type'] in ('playlist', 'multi_video')
1915
1916         common_info = self._playlist_infodict(ie_result, strict=True)
1917         title = common_info.get('playlist') or '<Untitled>'
1918         if self._match_entry(common_info, incomplete=True) is not None:
1919             return
1920         self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1921
1922         all_entries = PlaylistEntries(self, ie_result)
1923         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1924
1925         lazy = self.params.get('lazy_playlist')
1926         if lazy:
1927             resolved_entries, n_entries = [], 'N/A'
1928             ie_result['requested_entries'], ie_result['entries'] = None, None
1929         else:
1930             entries = resolved_entries = list(entries)
1931             n_entries = len(resolved_entries)
1932             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1933         if not ie_result.get('playlist_count'):
1934             # Better to do this after potentially exhausting entries
1935             ie_result['playlist_count'] = all_entries.get_full_count()
1936
1937         extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1938         ie_copy = collections.ChainMap(ie_result, extra)
1939
1940         _infojson_written = False
1941         write_playlist_files = self.params.get('allow_playlist_files', True)
1942         if write_playlist_files and self.params.get('list_thumbnails'):
1943             self.list_thumbnails(ie_result)
1944         if write_playlist_files and not self.params.get('simulate'):
1945             _infojson_written = self._write_info_json(
1946                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1947             if _infojson_written is None:
1948                 return
1949             if self._write_description('playlist', ie_result,
1950                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1951                 return
1952             # TODO: This should be passed to ThumbnailsConvertor if necessary
1953             self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1954
1955         if lazy:
1956             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1957                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1958         elif self.params.get('playlistreverse'):
1959             entries.reverse()
1960         elif self.params.get('playlistrandom'):
1961             random.shuffle(entries)
1962
1963         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1964                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1965
1966         keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1967         if self.params.get('extract_flat') == 'discard_in_playlist':
1968             keep_resolved_entries = ie_result['_type'] != 'playlist'
1969         if keep_resolved_entries:
1970             self.write_debug('The information of all playlist entries will be held in memory')
1971
1972         failures = 0
1973         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1974         for i, (playlist_index, entry) in enumerate(entries):
1975             if lazy:
1976                 resolved_entries.append((playlist_index, entry))
1977             if not entry:
1978                 continue
1979
1980             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1981             if not lazy and 'playlist-index' in self.params['compat_opts']:
1982                 playlist_index = ie_result['requested_entries'][i]
1983
1984             entry_copy = collections.ChainMap(entry, {
1985                 **common_info,
1986                 'n_entries': int_or_none(n_entries),
1987                 'playlist_index': playlist_index,
1988                 'playlist_autonumber': i + 1,
1989             })
1990
1991             if self._match_entry(entry_copy, incomplete=True) is not None:
1992                 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1993                 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1994                 continue
1995
1996             self.to_screen('[download] Downloading item %s of %s' % (
1997                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1998
1999             entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2000                 'playlist_index': playlist_index,
2001                 'playlist_autonumber': i + 1,
2002             }, extra))
2003             if not entry_result:
2004                 failures += 1
2005             if failures >= max_failures:
2006                 self.report_error(
2007                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2008                 break
2009             if keep_resolved_entries:
2010                 resolved_entries[i] = (playlist_index, entry_result)
2011
2012         # Update with processed data
2013         ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2014         ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2015         if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2016             # Do not set for full playlist
2017             ie_result.pop('requested_entries')
2018
2019         # Write the updated info to json
2020         if _infojson_written is True and self._write_info_json(
2021                 'updated playlist', ie_result,
2022                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2023             return
2024
2025         ie_result = self.run_all_pps('playlist', ie_result)
2026         self.to_screen(f'[download] Finished downloading playlist: {title}')
2027         return ie_result
2028
2029     @_handle_extraction_exceptions
2030     def __process_iterable_entry(self, entry, download, extra_info):
2031         return self.process_ie_result(
2032             entry, download=download, extra_info=extra_info)
2033
2034     def _build_format_filter(self, filter_spec):
2035         " Returns a function to filter the formats according to the filter_spec "
2036
2037         OPERATORS = {
2038             '<': operator.lt,
2039             '<=': operator.le,
2040             '>': operator.gt,
2041             '>=': operator.ge,
2042             '=': operator.eq,
2043             '!=': operator.ne,
2044         }
2045         operator_rex = re.compile(r'''(?x)\s*
2046             (?P<key>[\w.-]+)\s*
2047             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2048             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2049             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2050         m = operator_rex.fullmatch(filter_spec)
2051         if m:
2052             try:
2053                 comparison_value = int(m.group('value'))
2054             except ValueError:
2055                 comparison_value = parse_filesize(m.group('value'))
2056                 if comparison_value is None:
2057                     comparison_value = parse_filesize(m.group('value') + 'B')
2058                 if comparison_value is None:
2059                     raise ValueError(
2060                         'Invalid value %r in format specification %r' % (
2061                             m.group('value'), filter_spec))
2062             op = OPERATORS[m.group('op')]
2063
2064         if not m:
2065             STR_OPERATORS = {
2066                 '=': operator.eq,
2067                 '^=': lambda attr, value: attr.startswith(value),
2068                 '$=': lambda attr, value: attr.endswith(value),
2069                 '*=': lambda attr, value: value in attr,
2070                 '~=': lambda attr, value: value.search(attr) is not None
2071             }
2072             str_operator_rex = re.compile(r'''(?x)\s*
2073                 (?P<key>[a-zA-Z0-9._-]+)\s*
2074                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2075                 (?P<quote>["'])?
2076                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2077                 (?(quote)(?P=quote))\s*
2078                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2079             m = str_operator_rex.fullmatch(filter_spec)
2080             if m:
2081                 if m.group('op') == '~=':
2082                     comparison_value = re.compile(m.group('value'))
2083                 else:
2084                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2085                 str_op = STR_OPERATORS[m.group('op')]
2086                 if m.group('negation'):
2087                     op = lambda attr, value: not str_op(attr, value)
2088                 else:
2089                     op = str_op
2090
2091         if not m:
2092             raise SyntaxError('Invalid filter specification %r' % filter_spec)
2093
2094         def _filter(f):
2095             actual_value = f.get(m.group('key'))
2096             if actual_value is None:
2097                 return m.group('none_inclusive')
2098             return op(actual_value, comparison_value)
2099         return _filter
2100
2101     def _check_formats(self, formats):
2102         for f in formats:
2103             self.to_screen('[info] Testing format %s' % f['format_id'])
2104             path = self.get_output_path('temp')
2105             if not self._ensure_dir_exists(f'{path}/'):
2106                 continue
2107             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2108             temp_file.close()
2109             try:
2110                 success, _ = self.dl(temp_file.name, f, test=True)
2111             except (DownloadError, OSError, ValueError) + network_exceptions:
2112                 success = False
2113             finally:
2114                 if os.path.exists(temp_file.name):
2115                     try:
2116                         os.remove(temp_file.name)
2117                     except OSError:
2118                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2119             if success:
2120                 yield f
2121             else:
2122                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2123
2124     def _default_format_spec(self, info_dict, download=True):
2125
2126         def can_merge():
2127             merger = FFmpegMergerPP(self)
2128             return merger.available and merger.can_merge()
2129
2130         prefer_best = (
2131             not self.params.get('simulate')
2132             and download
2133             and (
2134                 not can_merge()
2135                 or info_dict.get('is_live') and not self.params.get('live_from_start')
2136                 or self.params['outtmpl']['default'] == '-'))
2137         compat = (
2138             prefer_best
2139             or self.params.get('allow_multiple_audio_streams', False)
2140             or 'format-spec' in self.params['compat_opts'])
2141
2142         return (
2143             'best/bestvideo+bestaudio' if prefer_best
2144             else 'bestvideo*+bestaudio/best' if not compat
2145             else 'bestvideo+bestaudio/best')
2146
2147     def build_format_selector(self, format_spec):
2148         def syntax_error(note, start):
2149             message = (
2150                 'Invalid format specification: '
2151                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2152             return SyntaxError(message)
2153
2154         PICKFIRST = 'PICKFIRST'
2155         MERGE = 'MERGE'
2156         SINGLE = 'SINGLE'
2157         GROUP = 'GROUP'
2158         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2159
2160         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2161                                   'video': self.params.get('allow_multiple_video_streams', False)}
2162
2163         def _parse_filter(tokens):
2164             filter_parts = []
2165             for type, string_, start, _, _ in tokens:
2166                 if type == tokenize.OP and string_ == ']':
2167                     return ''.join(filter_parts)
2168                 else:
2169                     filter_parts.append(string_)
2170
2171         def _remove_unused_ops(tokens):
2172             # Remove operators that we don't use and join them with the surrounding strings.
2173             # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2174             ALLOWED_OPS = ('/', '+', ',', '(', ')')
2175             last_string, last_start, last_end, last_line = None, None, None, None
2176             for type, string_, start, end, line in tokens:
2177                 if type == tokenize.OP and string_ == '[':
2178                     if last_string:
2179                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2180                         last_string = None
2181                     yield type, string_, start, end, line
2182                     # everything inside brackets will be handled by _parse_filter
2183                     for type, string_, start, end, line in tokens:
2184                         yield type, string_, start, end, line
2185                         if type == tokenize.OP and string_ == ']':
2186                             break
2187                 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2188                     if last_string:
2189                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2190                         last_string = None
2191                     yield type, string_, start, end, line
2192                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2193                     if not last_string:
2194                         last_string = string_
2195                         last_start = start
2196                         last_end = end
2197                     else:
2198                         last_string += string_
2199             if last_string:
2200                 yield tokenize.NAME, last_string, last_start, last_end, last_line
2201
2202         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2203             selectors = []
2204             current_selector = None
2205             for type, string_, start, _, _ in tokens:
2206                 # ENCODING is only defined in python 3.x
2207                 if type == getattr(tokenize, 'ENCODING', None):
2208                     continue
2209                 elif type in [tokenize.NAME, tokenize.NUMBER]:
2210                     current_selector = FormatSelector(SINGLE, string_, [])
2211                 elif type == tokenize.OP:
2212                     if string_ == ')':
2213                         if not inside_group:
2214                             # ')' will be handled by the parentheses group
2215                             tokens.restore_last_token()
2216                         break
2217                     elif inside_merge and string_ in ['/', ',']:
2218                         tokens.restore_last_token()
2219                         break
2220                     elif inside_choice and string_ == ',':
2221                         tokens.restore_last_token()
2222                         break
2223                     elif string_ == ',':
2224                         if not current_selector:
2225                             raise syntax_error('"," must follow a format selector', start)
2226                         selectors.append(current_selector)
2227                         current_selector = None
2228                     elif string_ == '/':
2229                         if not current_selector:
2230                             raise syntax_error('"/" must follow a format selector', start)
2231                         first_choice = current_selector
2232                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2233                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2234                     elif string_ == '[':
2235                         if not current_selector:
2236                             current_selector = FormatSelector(SINGLE, 'best', [])
2237                         format_filter = _parse_filter(tokens)
2238                         current_selector.filters.append(format_filter)
2239                     elif string_ == '(':
2240                         if current_selector:
2241                             raise syntax_error('Unexpected "("', start)
2242                         group = _parse_format_selection(tokens, inside_group=True)
2243                         current_selector = FormatSelector(GROUP, group, [])
2244                     elif string_ == '+':
2245                         if not current_selector:
2246                             raise syntax_error('Unexpected "+"', start)
2247                         selector_1 = current_selector
2248                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2249                         if not selector_2:
2250                             raise syntax_error('Expected a selector', start)
2251                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2252                     else:
2253                         raise syntax_error(f'Operator not recognized: "{string_}"', start)
2254                 elif type == tokenize.ENDMARKER:
2255                     break
2256             if current_selector:
2257                 selectors.append(current_selector)
2258             return selectors
2259
2260         def _merge(formats_pair):
2261             format_1, format_2 = formats_pair
2262
2263             formats_info = []
2264             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2265             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2266
2267             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2268                 get_no_more = {'video': False, 'audio': False}
2269                 for (i, fmt_info) in enumerate(formats_info):
2270                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2271                         formats_info.pop(i)
2272                         continue
2273                     for aud_vid in ['audio', 'video']:
2274                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2275                             if get_no_more[aud_vid]:
2276                                 formats_info.pop(i)
2277                                 break
2278                             get_no_more[aud_vid] = True
2279
2280             if len(formats_info) == 1:
2281                 return formats_info[0]
2282
2283             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2284             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2285
2286             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2287             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2288
2289             output_ext = get_compatible_ext(
2290                 vcodecs=[f.get('vcodec') for f in video_fmts],
2291                 acodecs=[f.get('acodec') for f in audio_fmts],
2292                 vexts=[f['ext'] for f in video_fmts],
2293                 aexts=[f['ext'] for f in audio_fmts],
2294                 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2295                              or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2296
2297             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2298
2299             new_dict = {
2300                 'requested_formats': formats_info,
2301                 'format': '+'.join(filtered('format')),
2302                 'format_id': '+'.join(filtered('format_id')),
2303                 'ext': output_ext,
2304                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2305                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2306                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2307                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2308                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2309             }
2310
2311             if the_only_video:
2312                 new_dict.update({
2313                     'width': the_only_video.get('width'),
2314                     'height': the_only_video.get('height'),
2315                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2316                     'fps': the_only_video.get('fps'),
2317                     'dynamic_range': the_only_video.get('dynamic_range'),
2318                     'vcodec': the_only_video.get('vcodec'),
2319                     'vbr': the_only_video.get('vbr'),
2320                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2321                     'aspect_ratio': the_only_video.get('aspect_ratio'),
2322                 })
2323
2324             if the_only_audio:
2325                 new_dict.update({
2326                     'acodec': the_only_audio.get('acodec'),
2327                     'abr': the_only_audio.get('abr'),
2328                     'asr': the_only_audio.get('asr'),
2329                     'audio_channels': the_only_audio.get('audio_channels')
2330                 })
2331
2332             return new_dict
2333
2334         def _check_formats(formats):
2335             if self.params.get('check_formats') == 'selected':
2336                 yield from self._check_formats(formats)
2337                 return
2338             elif (self.params.get('check_formats') is not None
2339                     or self.params.get('allow_unplayable_formats')):
2340                 yield from formats
2341                 return
2342
2343             for f in formats:
2344                 if f.get('has_drm') or f.get('__needs_testing'):
2345                     yield from self._check_formats([f])
2346                 else:
2347                     yield f
2348
2349         def _build_selector_function(selector):
2350             if isinstance(selector, list):  # ,
2351                 fs = [_build_selector_function(s) for s in selector]
2352
2353                 def selector_function(ctx):
2354                     for f in fs:
2355                         yield from f(ctx)
2356                 return selector_function
2357
2358             elif selector.type == GROUP:  # ()
2359                 selector_function = _build_selector_function(selector.selector)
2360
2361             elif selector.type == PICKFIRST:  # /
2362                 fs = [_build_selector_function(s) for s in selector.selector]
2363
2364                 def selector_function(ctx):
2365                     for f in fs:
2366                         picked_formats = list(f(ctx))
2367                         if picked_formats:
2368                             return picked_formats
2369                     return []
2370
2371             elif selector.type == MERGE:  # +
2372                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2373
2374                 def selector_function(ctx):
2375                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2376                         yield _merge(pair)
2377
2378             elif selector.type == SINGLE:  # atom
2379                 format_spec = selector.selector or 'best'
2380
2381                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2382                 if format_spec == 'all':
2383                     def selector_function(ctx):
2384                         yield from _check_formats(ctx['formats'][::-1])
2385                 elif format_spec == 'mergeall':
2386                     def selector_function(ctx):
2387                         formats = list(_check_formats(
2388                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2389                         if not formats:
2390                             return
2391                         merged_format = formats[-1]
2392                         for f in formats[-2::-1]:
2393                             merged_format = _merge((merged_format, f))
2394                         yield merged_format
2395
2396                 else:
2397                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2398                     mobj = re.match(
2399                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2400                         format_spec)
2401                     if mobj is not None:
2402                         format_idx = int_or_none(mobj.group('n'), default=1)
2403                         format_reverse = mobj.group('bw')[0] == 'b'
2404                         format_type = (mobj.group('type') or [None])[0]
2405                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2406                         format_modified = mobj.group('mod') is not None
2407
2408                         format_fallback = not format_type and not format_modified  # for b, w
2409                         _filter_f = (
2410                             (lambda f: f.get('%scodec' % format_type) != 'none')
2411                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2412                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2413                             if format_type  # bv, ba, wv, wa
2414                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2415                             if not format_modified  # b, w
2416                             else lambda f: True)  # b*, w*
2417                         filter_f = lambda f: _filter_f(f) and (
2418                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2419                     else:
2420                         if format_spec in self._format_selection_exts['audio']:
2421                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2422                         elif format_spec in self._format_selection_exts['video']:
2423                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2424                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2425                         elif format_spec in self._format_selection_exts['storyboards']:
2426                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2427                         else:
2428                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2429
2430                     def selector_function(ctx):
2431                         formats = list(ctx['formats'])
2432                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2433                         if not matches:
2434                             if format_fallback and ctx['incomplete_formats']:
2435                                 # for extractors with incomplete formats (audio only (soundcloud)
2436                                 # or video only (imgur)) best/worst will fallback to
2437                                 # best/worst {video,audio}-only format
2438                                 matches = formats
2439                             elif seperate_fallback and not ctx['has_merged_format']:
2440                                 # for compatibility with youtube-dl when there is no pre-merged format
2441                                 matches = list(filter(seperate_fallback, formats))
2442                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2443                         try:
2444                             yield matches[format_idx - 1]
2445                         except LazyList.IndexError:
2446                             return
2447
2448             filters = [self._build_format_filter(f) for f in selector.filters]
2449
2450             def final_selector(ctx):
2451                 ctx_copy = dict(ctx)
2452                 for _filter in filters:
2453                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2454                 return selector_function(ctx_copy)
2455             return final_selector
2456
2457         stream = io.BytesIO(format_spec.encode())
2458         try:
2459             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2460         except tokenize.TokenError:
2461             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2462
2463         class TokenIterator:
2464             def __init__(self, tokens):
2465                 self.tokens = tokens
2466                 self.counter = 0
2467
2468             def __iter__(self):
2469                 return self
2470
2471             def __next__(self):
2472                 if self.counter >= len(self.tokens):
2473                     raise StopIteration()
2474                 value = self.tokens[self.counter]
2475                 self.counter += 1
2476                 return value
2477
2478             next = __next__
2479
2480             def restore_last_token(self):
2481                 self.counter -= 1
2482
2483         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2484         return _build_selector_function(parsed_selector)
2485
2486     def _calc_headers(self, info_dict, load_cookies=False):
2487         res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2488         clean_headers(res)
2489
2490         if load_cookies:  # For --load-info-json
2491             self._load_cookies(res.get('Cookie'), autoscope=info_dict['url'])  # compat
2492             self._load_cookies(info_dict.get('cookies'), autoscope=False)
2493         # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2494         # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2495         res.pop('Cookie', None)
2496         cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2497         if cookies:
2498             encoder = LenientSimpleCookie()
2499             values = []
2500             for cookie in cookies:
2501                 _, value = encoder.value_encode(cookie.value)
2502                 values.append(f'{cookie.name}={value}')
2503                 if cookie.domain:
2504                     values.append(f'Domain={cookie.domain}')
2505                 if cookie.path:
2506                     values.append(f'Path={cookie.path}')
2507                 if cookie.secure:
2508                     values.append('Secure')
2509                 if cookie.expires:
2510                     values.append(f'Expires={cookie.expires}')
2511                 if cookie.version:
2512                     values.append(f'Version={cookie.version}')
2513             info_dict['cookies'] = '; '.join(values)
2514
2515         if 'X-Forwarded-For' not in res:
2516             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2517             if x_forwarded_for_ip:
2518                 res['X-Forwarded-For'] = x_forwarded_for_ip
2519
2520         return res
2521
2522     def _calc_cookies(self, url):
2523         self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2524         return self.cookiejar.get_cookie_header(url)
2525
2526     def _sort_thumbnails(self, thumbnails):
2527         thumbnails.sort(key=lambda t: (
2528             t.get('preference') if t.get('preference') is not None else -1,
2529             t.get('width') if t.get('width') is not None else -1,
2530             t.get('height') if t.get('height') is not None else -1,
2531             t.get('id') if t.get('id') is not None else '',
2532             t.get('url')))
2533
2534     def _sanitize_thumbnails(self, info_dict):
2535         thumbnails = info_dict.get('thumbnails')
2536         if thumbnails is None:
2537             thumbnail = info_dict.get('thumbnail')
2538             if thumbnail:
2539                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2540         if not thumbnails:
2541             return
2542
2543         def check_thumbnails(thumbnails):
2544             for t in thumbnails:
2545                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2546                 try:
2547                     self.urlopen(HEADRequest(t['url']))
2548                 except network_exceptions as err:
2549                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2550                     continue
2551                 yield t
2552
2553         self._sort_thumbnails(thumbnails)
2554         for i, t in enumerate(thumbnails):
2555             if t.get('id') is None:
2556                 t['id'] = '%d' % i
2557             if t.get('width') and t.get('height'):
2558                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2559             t['url'] = sanitize_url(t['url'])
2560
2561         if self.params.get('check_formats') is True:
2562             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2563         else:
2564             info_dict['thumbnails'] = thumbnails
2565
2566     def _fill_common_fields(self, info_dict, final=True):
2567         # TODO: move sanitization here
2568         if final:
2569             title = info_dict['fulltitle'] = info_dict.get('title')
2570             if not title:
2571                 if title == '':
2572                     self.write_debug('Extractor gave empty title. Creating a generic title')
2573                 else:
2574                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2575                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2576
2577         if info_dict.get('duration') is not None:
2578             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2579
2580         for ts_key, date_key in (
2581                 ('timestamp', 'upload_date'),
2582                 ('release_timestamp', 'release_date'),
2583                 ('modified_timestamp', 'modified_date'),
2584         ):
2585             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2586                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2587                 # see http://bugs.python.org/issue1646728)
2588                 with contextlib.suppress(ValueError, OverflowError, OSError):
2589                     upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
2590                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2591
2592         if not info_dict.get('release_year'):
2593             info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
2594
2595         live_keys = ('is_live', 'was_live')
2596         live_status = info_dict.get('live_status')
2597         if live_status is None:
2598             for key in live_keys:
2599                 if info_dict.get(key) is False:
2600                     continue
2601                 if info_dict.get(key):
2602                     live_status = key
2603                 break
2604             if all(info_dict.get(key) is False for key in live_keys):
2605                 live_status = 'not_live'
2606         if live_status:
2607             info_dict['live_status'] = live_status
2608             for key in live_keys:
2609                 if info_dict.get(key) is None:
2610                     info_dict[key] = (live_status == key)
2611         if live_status == 'post_live':
2612             info_dict['was_live'] = True
2613
2614         # Auto generate title fields corresponding to the *_number fields when missing
2615         # in order to always have clean titles. This is very common for TV series.
2616         for field in ('chapter', 'season', 'episode'):
2617             if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2618                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2619
2620     def _raise_pending_errors(self, info):
2621         err = info.pop('__pending_error', None)
2622         if err:
2623             self.report_error(err, tb=False)
2624
2625     def sort_formats(self, info_dict):
2626         formats = self._get_formats(info_dict)
2627         formats.sort(key=FormatSorter(
2628             self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2629
2630     def process_video_result(self, info_dict, download=True):
2631         assert info_dict.get('_type', 'video') == 'video'
2632         self._num_videos += 1
2633
2634         if 'id' not in info_dict:
2635             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2636         elif not info_dict.get('id'):
2637             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2638
2639         def report_force_conversion(field, field_not, conversion):
2640             self.report_warning(
2641                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2642                 % (field, field_not, conversion))
2643
2644         def sanitize_string_field(info, string_field):
2645             field = info.get(string_field)
2646             if field is None or isinstance(field, str):
2647                 return
2648             report_force_conversion(string_field, 'a string', 'string')
2649             info[string_field] = str(field)
2650
2651         def sanitize_numeric_fields(info):
2652             for numeric_field in self._NUMERIC_FIELDS:
2653                 field = info.get(numeric_field)
2654                 if field is None or isinstance(field, (int, float)):
2655                     continue
2656                 report_force_conversion(numeric_field, 'numeric', 'int')
2657                 info[numeric_field] = int_or_none(field)
2658
2659         sanitize_string_field(info_dict, 'id')
2660         sanitize_numeric_fields(info_dict)
2661         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2662             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2663         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2664             self.report_warning('"duration" field is negative, there is an error in extractor')
2665
2666         chapters = info_dict.get('chapters') or []
2667         if chapters and chapters[0].get('start_time'):
2668             chapters.insert(0, {'start_time': 0})
2669
2670         dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2671         for idx, (prev, current, next_) in enumerate(zip(
2672                 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2673             if current.get('start_time') is None:
2674                 current['start_time'] = prev.get('end_time')
2675             if not current.get('end_time'):
2676                 current['end_time'] = next_.get('start_time')
2677             if not current.get('title'):
2678                 current['title'] = f'<Untitled Chapter {idx}>'
2679
2680         if 'playlist' not in info_dict:
2681             # It isn't part of a playlist
2682             info_dict['playlist'] = None
2683             info_dict['playlist_index'] = None
2684
2685         self._sanitize_thumbnails(info_dict)
2686
2687         thumbnail = info_dict.get('thumbnail')
2688         thumbnails = info_dict.get('thumbnails')
2689         if thumbnail:
2690             info_dict['thumbnail'] = sanitize_url(thumbnail)
2691         elif thumbnails:
2692             info_dict['thumbnail'] = thumbnails[-1]['url']
2693
2694         if info_dict.get('display_id') is None and 'id' in info_dict:
2695             info_dict['display_id'] = info_dict['id']
2696
2697         self._fill_common_fields(info_dict)
2698
2699         for cc_kind in ('subtitles', 'automatic_captions'):
2700             cc = info_dict.get(cc_kind)
2701             if cc:
2702                 for _, subtitle in cc.items():
2703                     for subtitle_format in subtitle:
2704                         if subtitle_format.get('url'):
2705                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2706                         if subtitle_format.get('ext') is None:
2707                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2708
2709         automatic_captions = info_dict.get('automatic_captions')
2710         subtitles = info_dict.get('subtitles')
2711
2712         info_dict['requested_subtitles'] = self.process_subtitles(
2713             info_dict['id'], subtitles, automatic_captions)
2714
2715         formats = self._get_formats(info_dict)
2716
2717         # Backward compatibility with InfoExtractor._sort_formats
2718         field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2719         if field_preference:
2720             info_dict['_format_sort_fields'] = field_preference
2721
2722         info_dict['_has_drm'] = any(  # or None ensures --clean-infojson removes it
2723             f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2724         if not self.params.get('allow_unplayable_formats'):
2725             formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2726
2727         if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2728             self.report_warning(
2729                 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2730                 'only images are available for download. Use --list-formats to see them'.capitalize())
2731
2732         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2733         if not get_from_start:
2734             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2735         if info_dict.get('is_live') and formats:
2736             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2737             if get_from_start and not formats:
2738                 self.raise_no_formats(info_dict, msg=(
2739                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2740                     'If you want to download from the current time, use --no-live-from-start'))
2741
2742         def is_wellformed(f):
2743             url = f.get('url')
2744             if not url:
2745                 self.report_warning(
2746                     '"url" field is missing or empty - skipping format, '
2747                     'there is an error in extractor')
2748                 return False
2749             if isinstance(url, bytes):
2750                 sanitize_string_field(f, 'url')
2751             return True
2752
2753         # Filter out malformed formats for better extraction robustness
2754         formats = list(filter(is_wellformed, formats or []))
2755
2756         if not formats:
2757             self.raise_no_formats(info_dict)
2758
2759         for format in formats:
2760             sanitize_string_field(format, 'format_id')
2761             sanitize_numeric_fields(format)
2762             format['url'] = sanitize_url(format['url'])
2763             if format.get('ext') is None:
2764                 format['ext'] = determine_ext(format['url']).lower()
2765             if format.get('protocol') is None:
2766                 format['protocol'] = determine_protocol(format)
2767             if format.get('resolution') is None:
2768                 format['resolution'] = self.format_resolution(format, default=None)
2769             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2770                 format['dynamic_range'] = 'SDR'
2771             if format.get('aspect_ratio') is None:
2772                 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2773             # For fragmented formats, "tbr" is often max bitrate and not average
2774             if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
2775                     and info_dict.get('duration') and format.get('tbr')
2776                     and not format.get('filesize') and not format.get('filesize_approx')):
2777                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2778             format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
2779
2780         # Safeguard against old/insecure infojson when using --load-info-json
2781         if info_dict.get('http_headers'):
2782             info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2783             info_dict['http_headers'].pop('Cookie', None)
2784
2785         # This is copied to http_headers by the above _calc_headers and can now be removed
2786         if '__x_forwarded_for_ip' in info_dict:
2787             del info_dict['__x_forwarded_for_ip']
2788
2789         self.sort_formats({
2790             'formats': formats,
2791             '_format_sort_fields': info_dict.get('_format_sort_fields')
2792         })
2793
2794         # Sanitize and group by format_id
2795         formats_dict = {}
2796         for i, format in enumerate(formats):
2797             if not format.get('format_id'):
2798                 format['format_id'] = str(i)
2799             else:
2800                 # Sanitize format_id from characters used in format selector expression
2801                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2802             formats_dict.setdefault(format['format_id'], []).append(format)
2803
2804         # Make sure all formats have unique format_id
2805         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2806         for format_id, ambiguous_formats in formats_dict.items():
2807             ambigious_id = len(ambiguous_formats) > 1
2808             for i, format in enumerate(ambiguous_formats):
2809                 if ambigious_id:
2810                     format['format_id'] = '%s-%d' % (format_id, i)
2811                 # Ensure there is no conflict between id and ext in format selection
2812                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2813                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2814                     format['format_id'] = 'f%s' % format['format_id']
2815
2816                 if format.get('format') is None:
2817                     format['format'] = '{id} - {res}{note}'.format(
2818                         id=format['format_id'],
2819                         res=self.format_resolution(format),
2820                         note=format_field(format, 'format_note', ' (%s)'),
2821                     )
2822
2823         if self.params.get('check_formats') is True:
2824             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2825
2826         if not formats or formats[0] is not info_dict:
2827             # only set the 'formats' fields if the original info_dict list them
2828             # otherwise we end up with a circular reference, the first (and unique)
2829             # element in the 'formats' field in info_dict is info_dict itself,
2830             # which can't be exported to json
2831             info_dict['formats'] = formats
2832
2833         info_dict, _ = self.pre_process(info_dict)
2834
2835         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2836             return info_dict
2837
2838         self.post_extract(info_dict)
2839         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2840
2841         # The pre-processors may have modified the formats
2842         formats = self._get_formats(info_dict)
2843
2844         list_only = self.params.get('simulate') == 'list_only'
2845         interactive_format_selection = not list_only and self.format_selector == '-'
2846         if self.params.get('list_thumbnails'):
2847             self.list_thumbnails(info_dict)
2848         if self.params.get('listsubtitles'):
2849             if 'automatic_captions' in info_dict:
2850                 self.list_subtitles(
2851                     info_dict['id'], automatic_captions, 'automatic captions')
2852             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2853         if self.params.get('listformats') or interactive_format_selection:
2854             self.list_formats(info_dict)
2855         if list_only:
2856             # Without this printing, -F --print-json will not work
2857             self.__forced_printings(info_dict)
2858             return info_dict
2859
2860         format_selector = self.format_selector
2861         while True:
2862             if interactive_format_selection:
2863                 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2864                                    + '(Press ENTER for default, or Ctrl+C to quit)'
2865                                    + self._format_screen(': ', self.Styles.EMPHASIS))
2866                 try:
2867                     format_selector = self.build_format_selector(req_format) if req_format else None
2868                 except SyntaxError as err:
2869                     self.report_error(err, tb=False, is_error=False)
2870                     continue
2871
2872             if format_selector is None:
2873                 req_format = self._default_format_spec(info_dict, download=download)
2874                 self.write_debug(f'Default format spec: {req_format}')
2875                 format_selector = self.build_format_selector(req_format)
2876
2877             formats_to_download = list(format_selector({
2878                 'formats': formats,
2879                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2880                 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
2881                                        or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
2882             }))
2883             if interactive_format_selection and not formats_to_download:
2884                 self.report_error('Requested format is not available', tb=False, is_error=False)
2885                 continue
2886             break
2887
2888         if not formats_to_download:
2889             if not self.params.get('ignore_no_formats_error'):
2890                 raise ExtractorError(
2891                     'Requested format is not available. Use --list-formats for a list of available formats',
2892                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2893             self.report_warning('Requested format is not available')
2894             # Process what we can, even without any available formats.
2895             formats_to_download = [{}]
2896
2897         requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2898         best_format, downloaded_formats = formats_to_download[-1], []
2899         if download:
2900             if best_format and requested_ranges:
2901                 def to_screen(*msg):
2902                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2903
2904                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2905                           (f['format_id'] for f in formats_to_download))
2906                 if requested_ranges != ({}, ):
2907                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2908                               (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2909             max_downloads_reached = False
2910
2911             for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2912                 new_info = self._copy_infodict(info_dict)
2913                 new_info.update(fmt)
2914                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2915                 end_time = offset + min(chapter.get('end_time', duration), duration)
2916                 # duration may not be accurate. So allow deviations <1sec
2917                 if end_time == float('inf') or end_time > offset + duration + 1:
2918                     end_time = None
2919                 if chapter or offset:
2920                     new_info.update({
2921                         'section_start': offset + chapter.get('start_time', 0),
2922                         'section_end': end_time,
2923                         'section_title': chapter.get('title'),
2924                         'section_number': chapter.get('index'),
2925                     })
2926                 downloaded_formats.append(new_info)
2927                 try:
2928                     self.process_info(new_info)
2929                 except MaxDownloadsReached:
2930                     max_downloads_reached = True
2931                 self._raise_pending_errors(new_info)
2932                 # Remove copied info
2933                 for key, val in tuple(new_info.items()):
2934                     if info_dict.get(key) == val:
2935                         new_info.pop(key)
2936                 if max_downloads_reached:
2937                     break
2938
2939             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2940             assert write_archive.issubset({True, False, 'ignore'})
2941             if True in write_archive and False not in write_archive:
2942                 self.record_download_archive(info_dict)
2943
2944             info_dict['requested_downloads'] = downloaded_formats
2945             info_dict = self.run_all_pps('after_video', info_dict)
2946             if max_downloads_reached:
2947                 raise MaxDownloadsReached()
2948
2949         # We update the info dict with the selected best quality format (backwards compatibility)
2950         info_dict.update(best_format)
2951         return info_dict
2952
2953     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2954         """Select the requested subtitles and their format"""
2955         available_subs, normal_sub_langs = {}, []
2956         if normal_subtitles and self.params.get('writesubtitles'):
2957             available_subs.update(normal_subtitles)
2958             normal_sub_langs = tuple(normal_subtitles.keys())
2959         if automatic_captions and self.params.get('writeautomaticsub'):
2960             for lang, cap_info in automatic_captions.items():
2961                 if lang not in available_subs:
2962                     available_subs[lang] = cap_info
2963
2964         if not available_subs or (
2965                 not self.params.get('writesubtitles')
2966                 and not self.params.get('writeautomaticsub')):
2967             return None
2968
2969         all_sub_langs = tuple(available_subs.keys())
2970         if self.params.get('allsubtitles', False):
2971             requested_langs = all_sub_langs
2972         elif self.params.get('subtitleslangs', False):
2973             try:
2974                 requested_langs = orderedSet_from_options(
2975                     self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2976             except re.error as e:
2977                 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2978         else:
2979             requested_langs = LazyList(itertools.chain(
2980                 ['en'] if 'en' in normal_sub_langs else [],
2981                 filter(lambda f: f.startswith('en'), normal_sub_langs),
2982                 ['en'] if 'en' in all_sub_langs else [],
2983                 filter(lambda f: f.startswith('en'), all_sub_langs),
2984                 normal_sub_langs, all_sub_langs,
2985             ))[:1]
2986         if requested_langs:
2987             self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2988
2989         formats_query = self.params.get('subtitlesformat', 'best')
2990         formats_preference = formats_query.split('/') if formats_query else []
2991         subs = {}
2992         for lang in requested_langs:
2993             formats = available_subs.get(lang)
2994             if formats is None:
2995                 self.report_warning(f'{lang} subtitles not available for {video_id}')
2996                 continue
2997             for ext in formats_preference:
2998                 if ext == 'best':
2999                     f = formats[-1]
3000                     break
3001                 matches = list(filter(lambda f: f['ext'] == ext, formats))
3002                 if matches:
3003                     f = matches[-1]
3004                     break
3005             else:
3006                 f = formats[-1]
3007                 self.report_warning(
3008                     'No subtitle format found matching "%s" for language %s, '
3009                     'using %s' % (formats_query, lang, f['ext']))
3010             subs[lang] = f
3011         return subs
3012
3013     def _forceprint(self, key, info_dict):
3014         if info_dict is None:
3015             return
3016         info_copy = info_dict.copy()
3017         info_copy.setdefault('filename', self.prepare_filename(info_dict))
3018         if info_dict.get('requested_formats') is not None:
3019             # For RTMP URLs, also include the playpath
3020             info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3021         elif info_dict.get('url'):
3022             info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3023         info_copy['formats_table'] = self.render_formats_table(info_dict)
3024         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3025         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3026         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3027
3028         def format_tmpl(tmpl):
3029             mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3030             if not mobj:
3031                 return tmpl
3032
3033             fmt = '%({})s'
3034             if tmpl.startswith('{'):
3035                 tmpl, fmt = f'.{tmpl}', '%({})j'
3036             if tmpl.endswith('='):
3037                 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3038             return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3039
3040         for tmpl in self.params['forceprint'].get(key, []):
3041             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3042
3043         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3044             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3045             tmpl = format_tmpl(tmpl)
3046             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3047             if self._ensure_dir_exists(filename):
3048                 with open(filename, 'a', encoding='utf-8', newline='') as f:
3049                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3050
3051         return info_copy
3052
3053     def __forced_printings(self, info_dict, filename=None, incomplete=True):
3054         if (self.params.get('forcejson')
3055                 or self.params['forceprint'].get('video')
3056                 or self.params['print_to_file'].get('video')):
3057             self.post_extract(info_dict)
3058         if filename:
3059             info_dict['filename'] = filename
3060         info_copy = self._forceprint('video', info_dict)
3061
3062         def print_field(field, actual_field=None, optional=False):
3063             if actual_field is None:
3064                 actual_field = field
3065             if self.params.get(f'force{field}') and (
3066                     info_copy.get(field) is not None or (not optional and not incomplete)):
3067                 self.to_stdout(info_copy[actual_field])
3068
3069         print_field('title')
3070         print_field('id')
3071         print_field('url', 'urls')
3072         print_field('thumbnail', optional=True)
3073         print_field('description', optional=True)
3074         print_field('filename')
3075         if self.params.get('forceduration') and info_copy.get('duration') is not None:
3076             self.to_stdout(formatSeconds(info_copy['duration']))
3077         print_field('format')
3078
3079         if self.params.get('forcejson'):
3080             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3081
3082     def dl(self, name, info, subtitle=False, test=False):
3083         if not info.get('url'):
3084             self.raise_no_formats(info, True)
3085
3086         if test:
3087             verbose = self.params.get('verbose')
3088             params = {
3089                 'test': True,
3090                 'quiet': self.params.get('quiet') or not verbose,
3091                 'verbose': verbose,
3092                 'noprogress': not verbose,
3093                 'nopart': True,
3094                 'skip_unavailable_fragments': False,
3095                 'keep_fragments': False,
3096                 'overwrites': True,
3097                 '_no_ytdl_file': True,
3098             }
3099         else:
3100             params = self.params
3101         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3102         if not test:
3103             for ph in self._progress_hooks:
3104                 fd.add_progress_hook(ph)
3105             urls = '", "'.join(
3106                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3107                 for f in info.get('requested_formats', []) or [info])
3108             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3109
3110         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3111         # But it may contain objects that are not deep-copyable
3112         new_info = self._copy_infodict(info)
3113         if new_info.get('http_headers') is None:
3114             new_info['http_headers'] = self._calc_headers(new_info)
3115         return fd.download(name, new_info, subtitle)
3116
3117     def existing_file(self, filepaths, *, default_overwrite=True):
3118         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3119         if existing_files and not self.params.get('overwrites', default_overwrite):
3120             return existing_files[0]
3121
3122         for file in existing_files:
3123             self.report_file_delete(file)
3124             os.remove(file)
3125         return None
3126
3127     def process_info(self, info_dict):
3128         """Process a single resolved IE result. (Modifies it in-place)"""
3129
3130         assert info_dict.get('_type', 'video') == 'video'
3131         original_infodict = info_dict
3132
3133         if 'format' not in info_dict and 'ext' in info_dict:
3134             info_dict['format'] = info_dict['ext']
3135
3136         if self._match_entry(info_dict) is not None:
3137             info_dict['__write_download_archive'] = 'ignore'
3138             return
3139
3140         # Does nothing under normal operation - for backward compatibility of process_info
3141         self.post_extract(info_dict)
3142
3143         def replace_info_dict(new_info):
3144             nonlocal info_dict
3145             if new_info == info_dict:
3146                 return
3147             info_dict.clear()
3148             info_dict.update(new_info)
3149
3150         new_info, _ = self.pre_process(info_dict, 'video')
3151         replace_info_dict(new_info)
3152         self._num_downloads += 1
3153
3154         # info_dict['_filename'] needs to be set for backward compatibility
3155         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3156         temp_filename = self.prepare_filename(info_dict, 'temp')
3157         files_to_move = {}
3158
3159         # Forced printings
3160         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3161
3162         def check_max_downloads():
3163             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3164                 raise MaxDownloadsReached()
3165
3166         if self.params.get('simulate'):
3167             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3168             check_max_downloads()
3169             return
3170
3171         if full_filename is None:
3172             return
3173         if not self._ensure_dir_exists(encodeFilename(full_filename)):
3174             return
3175         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3176             return
3177
3178         if self._write_description('video', info_dict,
3179                                    self.prepare_filename(info_dict, 'description')) is None:
3180             return
3181
3182         sub_files = self._write_subtitles(info_dict, temp_filename)
3183         if sub_files is None:
3184             return
3185         files_to_move.update(dict(sub_files))
3186
3187         thumb_files = self._write_thumbnails(
3188             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3189         if thumb_files is None:
3190             return
3191         files_to_move.update(dict(thumb_files))
3192
3193         infofn = self.prepare_filename(info_dict, 'infojson')
3194         _infojson_written = self._write_info_json('video', info_dict, infofn)
3195         if _infojson_written:
3196             info_dict['infojson_filename'] = infofn
3197             # For backward compatibility, even though it was a private field
3198             info_dict['__infojson_filename'] = infofn
3199         elif _infojson_written is None:
3200             return
3201
3202         # Note: Annotations are deprecated
3203         annofn = None
3204         if self.params.get('writeannotations', False):
3205             annofn = self.prepare_filename(info_dict, 'annotation')
3206         if annofn:
3207             if not self._ensure_dir_exists(encodeFilename(annofn)):
3208                 return
3209             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3210                 self.to_screen('[info] Video annotations are already present')
3211             elif not info_dict.get('annotations'):
3212                 self.report_warning('There are no annotations to write.')
3213             else:
3214                 try:
3215                     self.to_screen('[info] Writing video annotations to: ' + annofn)
3216                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3217                         annofile.write(info_dict['annotations'])
3218                 except (KeyError, TypeError):
3219                     self.report_warning('There are no annotations to write.')
3220                 except OSError:
3221                     self.report_error('Cannot write annotations file: ' + annofn)
3222                     return
3223
3224         # Write internet shortcut files
3225         def _write_link_file(link_type):
3226             url = try_get(info_dict['webpage_url'], iri_to_uri)
3227             if not url:
3228                 self.report_warning(
3229                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3230                 return True
3231             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3232             if not self._ensure_dir_exists(encodeFilename(linkfn)):
3233                 return False
3234             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3235                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3236                 return True
3237             try:
3238                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3239                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3240                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3241                     template_vars = {'url': url}
3242                     if link_type == 'desktop':
3243                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3244                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3245             except OSError:
3246                 self.report_error(f'Cannot write internet shortcut {linkfn}')
3247                 return False
3248             return True
3249
3250         write_links = {
3251             'url': self.params.get('writeurllink'),
3252             'webloc': self.params.get('writewebloclink'),
3253             'desktop': self.params.get('writedesktoplink'),
3254         }
3255         if self.params.get('writelink'):
3256             link_type = ('webloc' if sys.platform == 'darwin'
3257                          else 'desktop' if sys.platform.startswith('linux')
3258                          else 'url')
3259             write_links[link_type] = True
3260
3261         if any(should_write and not _write_link_file(link_type)
3262                for link_type, should_write in write_links.items()):
3263             return
3264
3265         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3266         replace_info_dict(new_info)
3267
3268         if self.params.get('skip_download'):
3269             info_dict['filepath'] = temp_filename
3270             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3271             info_dict['__files_to_move'] = files_to_move
3272             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3273             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3274         else:
3275             # Download
3276             info_dict.setdefault('__postprocessors', [])
3277             try:
3278
3279                 def existing_video_file(*filepaths):
3280                     ext = info_dict.get('ext')
3281                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3282                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3283                                               default_overwrite=False)
3284                     if file:
3285                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3286                     return file
3287
3288                 fd, success = None, True
3289                 if info_dict.get('protocol') or info_dict.get('url'):
3290                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3291                     if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3292                             info_dict.get('section_start') or info_dict.get('section_end')):
3293                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3294                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
3295                         self.report_error(f'{msg}. Aborting')
3296                         return
3297
3298                 if info_dict.get('requested_formats') is not None:
3299                     old_ext = info_dict['ext']
3300                     if self.params.get('merge_output_format') is None:
3301                         if (info_dict['ext'] == 'webm'
3302                                 and info_dict.get('thumbnails')
3303                                 # check with type instead of pp_key, __name__, or isinstance
3304                                 # since we dont want any custom PPs to trigger this
3305                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3306                             info_dict['ext'] = 'mkv'
3307                             self.report_warning(
3308                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3309                     new_ext = info_dict['ext']
3310
3311                     def correct_ext(filename, ext=new_ext):
3312                         if filename == '-':
3313                             return filename
3314                         filename_real_ext = os.path.splitext(filename)[1][1:]
3315                         filename_wo_ext = (
3316                             os.path.splitext(filename)[0]
3317                             if filename_real_ext in (old_ext, new_ext)
3318                             else filename)
3319                         return f'{filename_wo_ext}.{ext}'
3320
3321                     # Ensure filename always has a correct extension for successful merge
3322                     full_filename = correct_ext(full_filename)
3323                     temp_filename = correct_ext(temp_filename)
3324                     dl_filename = existing_video_file(full_filename, temp_filename)
3325
3326                     info_dict['__real_download'] = False
3327                     # NOTE: Copy so that original format dicts are not modified
3328                     info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3329
3330                     merger = FFmpegMergerPP(self)
3331                     downloaded = []
3332                     if dl_filename is not None:
3333                         self.report_file_already_downloaded(dl_filename)
3334                     elif fd:
3335                         for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3336                             f['filepath'] = fname = prepend_extension(
3337                                 correct_ext(temp_filename, info_dict['ext']),
3338                                 'f%s' % f['format_id'], info_dict['ext'])
3339                             downloaded.append(fname)
3340                         info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3341                         success, real_download = self.dl(temp_filename, info_dict)
3342                         info_dict['__real_download'] = real_download
3343                     else:
3344                         if self.params.get('allow_unplayable_formats'):
3345                             self.report_warning(
3346                                 'You have requested merging of multiple formats '
3347                                 'while also allowing unplayable formats to be downloaded. '
3348                                 'The formats won\'t be merged to prevent data corruption.')
3349                         elif not merger.available:
3350                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3351                             if not self.params.get('ignoreerrors'):
3352                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3353                                 return
3354                             self.report_warning(f'{msg}. The formats won\'t be merged')
3355
3356                         if temp_filename == '-':
3357                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3358                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3359                                       else 'but ffmpeg is not installed')
3360                             self.report_warning(
3361                                 f'You have requested downloading multiple formats to stdout {reason}. '
3362                                 'The formats will be streamed one after the other')
3363                             fname = temp_filename
3364                         for f in info_dict['requested_formats']:
3365                             new_info = dict(info_dict)
3366                             del new_info['requested_formats']
3367                             new_info.update(f)
3368                             if temp_filename != '-':
3369                                 fname = prepend_extension(
3370                                     correct_ext(temp_filename, new_info['ext']),
3371                                     'f%s' % f['format_id'], new_info['ext'])
3372                                 if not self._ensure_dir_exists(fname):
3373                                     return
3374                                 f['filepath'] = fname
3375                                 downloaded.append(fname)
3376                             partial_success, real_download = self.dl(fname, new_info)
3377                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3378                             success = success and partial_success
3379
3380                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3381                         info_dict['__postprocessors'].append(merger)
3382                         info_dict['__files_to_merge'] = downloaded
3383                         # Even if there were no downloads, it is being merged only now
3384                         info_dict['__real_download'] = True
3385                     else:
3386                         for file in downloaded:
3387                             files_to_move[file] = None
3388                 else:
3389                     # Just a single file
3390                     dl_filename = existing_video_file(full_filename, temp_filename)
3391                     if dl_filename is None or dl_filename == temp_filename:
3392                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3393                         # So we should try to resume the download
3394                         success, real_download = self.dl(temp_filename, info_dict)
3395                         info_dict['__real_download'] = real_download
3396                     else:
3397                         self.report_file_already_downloaded(dl_filename)
3398
3399                 dl_filename = dl_filename or temp_filename
3400                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3401
3402             except network_exceptions as err:
3403                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3404                 return
3405             except OSError as err:
3406                 raise UnavailableVideoError(err)
3407             except (ContentTooShortError, ) as err:
3408                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3409                 return
3410
3411             self._raise_pending_errors(info_dict)
3412             if success and full_filename != '-':
3413
3414                 def fixup():
3415                     do_fixup = True
3416                     fixup_policy = self.params.get('fixup')
3417                     vid = info_dict['id']
3418
3419                     if fixup_policy in ('ignore', 'never'):
3420                         return
3421                     elif fixup_policy == 'warn':
3422                         do_fixup = 'warn'
3423                     elif fixup_policy != 'force':
3424                         assert fixup_policy in ('detect_or_warn', None)
3425                         if not info_dict.get('__real_download'):
3426                             do_fixup = False
3427
3428                     def ffmpeg_fixup(cndn, msg, cls):
3429                         if not (do_fixup and cndn):
3430                             return
3431                         elif do_fixup == 'warn':
3432                             self.report_warning(f'{vid}: {msg}')
3433                             return
3434                         pp = cls(self)
3435                         if pp.available:
3436                             info_dict['__postprocessors'].append(pp)
3437                         else:
3438                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3439
3440                     stretched_ratio = info_dict.get('stretched_ratio')
3441                     ffmpeg_fixup(stretched_ratio not in (1, None),
3442                                  f'Non-uniform pixel ratio {stretched_ratio}',
3443                                  FFmpegFixupStretchedPP)
3444
3445                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3446                     downloader = downloader.FD_NAME if downloader else None
3447
3448                     ext = info_dict.get('ext')
3449                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3450                         isinstance(pp, FFmpegVideoConvertorPP)
3451                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3452                     ) for pp in self._pps['post_process'])
3453
3454                     if not postprocessed_by_ffmpeg:
3455                         ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
3456                                      and info_dict.get('container') == 'm4a_dash',
3457                                      'writing DASH m4a. Only some players support this container',
3458                                      FFmpegFixupM4aPP)
3459                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3460                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3461                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3462                                      FFmpegFixupM3u8PP)
3463                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
3464                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3465
3466                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3467                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3468
3469                 fixup()
3470                 try:
3471                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3472                 except PostProcessingError as err:
3473                     self.report_error('Postprocessing: %s' % str(err))
3474                     return
3475                 try:
3476                     for ph in self._post_hooks:
3477                         ph(info_dict['filepath'])
3478                 except Exception as err:
3479                     self.report_error('post hooks: %s' % str(err))
3480                     return
3481                 info_dict['__write_download_archive'] = True
3482
3483         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3484         if self.params.get('force_write_download_archive'):
3485             info_dict['__write_download_archive'] = True
3486         check_max_downloads()
3487
3488     def __download_wrapper(self, func):
3489         @functools.wraps(func)
3490         def wrapper(*args, **kwargs):
3491             try:
3492                 res = func(*args, **kwargs)
3493             except UnavailableVideoError as e:
3494                 self.report_error(e)
3495             except DownloadCancelled as e:
3496                 self.to_screen(f'[info] {e}')
3497                 if not self.params.get('break_per_url'):
3498                     raise
3499                 self._num_downloads = 0
3500             else:
3501                 if self.params.get('dump_single_json', False):
3502                     self.post_extract(res)
3503                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3504         return wrapper
3505
3506     def download(self, url_list):
3507         """Download a given list of URLs."""
3508         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3509         outtmpl = self.params['outtmpl']['default']
3510         if (len(url_list) > 1
3511                 and outtmpl != '-'
3512                 and '%' not in outtmpl
3513                 and self.params.get('max_downloads') != 1):
3514             raise SameFileError(outtmpl)
3515
3516         for url in url_list:
3517             self.__download_wrapper(self.extract_info)(
3518                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3519
3520         return self._download_retcode
3521
3522     def download_with_info_file(self, info_filename):
3523         with contextlib.closing(fileinput.FileInput(
3524                 [info_filename], mode='r',
3525                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3526             # FileInput doesn't have a read method, we can't call json.load
3527             infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3528                      for info in variadic(json.loads('\n'.join(f)))]
3529         for info in infos:
3530             try:
3531                 self.__download_wrapper(self.process_ie_result)(info, download=True)
3532             except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3533                 if not isinstance(e, EntryNotInPlaylist):
3534                     self.to_stderr('\r')
3535                 webpage_url = info.get('webpage_url')
3536                 if webpage_url is None:
3537                     raise
3538                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3539                 self.download([webpage_url])
3540         return self._download_retcode
3541
3542     @staticmethod
3543     def sanitize_info(info_dict, remove_private_keys=False):
3544         ''' Sanitize the infodict for converting to json '''
3545         if info_dict is None:
3546             return info_dict
3547         info_dict.setdefault('epoch', int(time.time()))
3548         info_dict.setdefault('_type', 'video')
3549         info_dict.setdefault('_version', {
3550             'version': __version__,
3551             'current_git_head': current_git_head(),
3552             'release_git_head': RELEASE_GIT_HEAD,
3553             'repository': ORIGIN,
3554         })
3555
3556         if remove_private_keys:
3557             reject = lambda k, v: v is None or k.startswith('__') or k in {
3558                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3559                 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3560                 'playlist_autonumber',
3561             }
3562         else:
3563             reject = lambda k, v: False
3564
3565         def filter_fn(obj):
3566             if isinstance(obj, dict):
3567                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3568             elif isinstance(obj, (list, tuple, set, LazyList)):
3569                 return list(map(filter_fn, obj))
3570             elif obj is None or isinstance(obj, (str, int, float, bool)):
3571                 return obj
3572             else:
3573                 return repr(obj)
3574
3575         return filter_fn(info_dict)
3576
3577     @staticmethod
3578     def filter_requested_info(info_dict, actually_filter=True):
3579         ''' Alias of sanitize_info for backward compatibility '''
3580         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3581
3582     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3583         for filename in set(filter(None, files_to_delete)):
3584             if msg:
3585                 self.to_screen(msg % filename)
3586             try:
3587                 os.remove(filename)
3588             except OSError:
3589                 self.report_warning(f'Unable to delete file {filename}')
3590             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3591                 del info['__files_to_move'][filename]
3592
3593     @staticmethod
3594     def post_extract(info_dict):
3595         def actual_post_extract(info_dict):
3596             if info_dict.get('_type') in ('playlist', 'multi_video'):
3597                 for video_dict in info_dict.get('entries', {}):
3598                     actual_post_extract(video_dict or {})
3599                 return
3600
3601             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3602             info_dict.update(post_extractor())
3603
3604         actual_post_extract(info_dict or {})
3605
3606     def run_pp(self, pp, infodict):
3607         files_to_delete = []
3608         if '__files_to_move' not in infodict:
3609             infodict['__files_to_move'] = {}
3610         try:
3611             files_to_delete, infodict = pp.run(infodict)
3612         except PostProcessingError as e:
3613             # Must be True and not 'only_download'
3614             if self.params.get('ignoreerrors') is True:
3615                 self.report_error(e)
3616                 return infodict
3617             raise
3618
3619         if not files_to_delete:
3620             return infodict
3621         if self.params.get('keepvideo', False):
3622             for f in files_to_delete:
3623                 infodict['__files_to_move'].setdefault(f, '')
3624         else:
3625             self._delete_downloaded_files(
3626                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3627         return infodict
3628
3629     def run_all_pps(self, key, info, *, additional_pps=None):
3630         if key != 'video':
3631             self._forceprint(key, info)
3632         for pp in (additional_pps or []) + self._pps[key]:
3633             info = self.run_pp(pp, info)
3634         return info
3635
3636     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3637         info = dict(ie_info)
3638         info['__files_to_move'] = files_to_move or {}
3639         try:
3640             info = self.run_all_pps(key, info)
3641         except PostProcessingError as err:
3642             msg = f'Preprocessing: {err}'
3643             info.setdefault('__pending_error', msg)
3644             self.report_error(msg, is_error=False)
3645         return info, info.pop('__files_to_move', None)
3646
3647     def post_process(self, filename, info, files_to_move=None):
3648         """Run all the postprocessors on the given file."""
3649         info['filepath'] = filename
3650         info['__files_to_move'] = files_to_move or {}
3651         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3652         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3653         del info['__files_to_move']
3654         return self.run_all_pps('after_move', info)
3655
3656     def _make_archive_id(self, info_dict):
3657         video_id = info_dict.get('id')
3658         if not video_id:
3659             return
3660         # Future-proof against any change in case
3661         # and backwards compatibility with prior versions
3662         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3663         if extractor is None:
3664             url = str_or_none(info_dict.get('url'))
3665             if not url:
3666                 return
3667             # Try to find matching extractor for the URL and take its ie_key
3668             for ie_key, ie in self._ies.items():
3669                 if ie.suitable(url):
3670                     extractor = ie_key
3671                     break
3672             else:
3673                 return
3674         return make_archive_id(extractor, video_id)
3675
3676     def in_download_archive(self, info_dict):
3677         if not self.archive:
3678             return False
3679
3680         vid_ids = [self._make_archive_id(info_dict)]
3681         vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3682         return any(id_ in self.archive for id_ in vid_ids)
3683
3684     def record_download_archive(self, info_dict):
3685         fn = self.params.get('download_archive')
3686         if fn is None:
3687             return
3688         vid_id = self._make_archive_id(info_dict)
3689         assert vid_id
3690
3691         self.write_debug(f'Adding to archive: {vid_id}')
3692         if is_path_like(fn):
3693             with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3694                 archive_file.write(vid_id + '\n')
3695         self.archive.add(vid_id)
3696
3697     @staticmethod
3698     def format_resolution(format, default='unknown'):
3699         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3700             return 'audio only'
3701         if format.get('resolution') is not None:
3702             return format['resolution']
3703         if format.get('width') and format.get('height'):
3704             return '%dx%d' % (format['width'], format['height'])
3705         elif format.get('height'):
3706             return '%sp' % format['height']
3707         elif format.get('width'):
3708             return '%dx?' % format['width']
3709         return default
3710
3711     def _list_format_headers(self, *headers):
3712         if self.params.get('listformats_table', True) is not False:
3713             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3714         return headers
3715
3716     def _format_note(self, fdict):
3717         res = ''
3718         if fdict.get('ext') in ['f4f', 'f4m']:
3719             res += '(unsupported)'
3720         if fdict.get('language'):
3721             if res:
3722                 res += ' '
3723             res += '[%s]' % fdict['language']
3724         if fdict.get('format_note') is not None:
3725             if res:
3726                 res += ' '
3727             res += fdict['format_note']
3728         if fdict.get('tbr') is not None:
3729             if res:
3730                 res += ', '
3731             res += '%4dk' % fdict['tbr']
3732         if fdict.get('container') is not None:
3733             if res:
3734                 res += ', '
3735             res += '%s container' % fdict['container']
3736         if (fdict.get('vcodec') is not None
3737                 and fdict.get('vcodec') != 'none'):
3738             if res:
3739                 res += ', '
3740             res += fdict['vcodec']
3741             if fdict.get('vbr') is not None:
3742                 res += '@'
3743         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3744             res += 'video@'
3745         if fdict.get('vbr') is not None:
3746             res += '%4dk' % fdict['vbr']
3747         if fdict.get('fps') is not None:
3748             if res:
3749                 res += ', '
3750             res += '%sfps' % fdict['fps']
3751         if fdict.get('acodec') is not None:
3752             if res:
3753                 res += ', '
3754             if fdict['acodec'] == 'none':
3755                 res += 'video only'
3756             else:
3757                 res += '%-5s' % fdict['acodec']
3758         elif fdict.get('abr') is not None:
3759             if res:
3760                 res += ', '
3761             res += 'audio'
3762         if fdict.get('abr') is not None:
3763             res += '@%3dk' % fdict['abr']
3764         if fdict.get('asr') is not None:
3765             res += ' (%5dHz)' % fdict['asr']
3766         if fdict.get('filesize') is not None:
3767             if res:
3768                 res += ', '
3769             res += format_bytes(fdict['filesize'])
3770         elif fdict.get('filesize_approx') is not None:
3771             if res:
3772                 res += ', '
3773             res += '~' + format_bytes(fdict['filesize_approx'])
3774         return res
3775
3776     def _get_formats(self, info_dict):
3777         if info_dict.get('formats') is None:
3778             if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3779                 return [info_dict]
3780             return []
3781         return info_dict['formats']
3782
3783     def render_formats_table(self, info_dict):
3784         formats = self._get_formats(info_dict)
3785         if not formats:
3786             return
3787         if not self.params.get('listformats_table', True) is not False:
3788             table = [
3789                 [
3790                     format_field(f, 'format_id'),
3791                     format_field(f, 'ext'),
3792                     self.format_resolution(f),
3793                     self._format_note(f)
3794                 ] for f in formats if (f.get('preference') or 0) >= -1000]
3795             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3796
3797         def simplified_codec(f, field):
3798             assert field in ('acodec', 'vcodec')
3799             codec = f.get(field)
3800             if not codec:
3801                 return 'unknown'
3802             elif codec != 'none':
3803                 return '.'.join(codec.split('.')[:4])
3804
3805             if field == 'vcodec' and f.get('acodec') == 'none':
3806                 return 'images'
3807             elif field == 'acodec' and f.get('vcodec') == 'none':
3808                 return ''
3809             return self._format_out('audio only' if field == 'vcodec' else 'video only',
3810                                     self.Styles.SUPPRESS)
3811
3812         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3813         table = [
3814             [
3815                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3816                 format_field(f, 'ext'),
3817                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3818                 format_field(f, 'fps', '\t%d', func=round),
3819                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3820                 format_field(f, 'audio_channels', '\t%s'),
3821                 delim, (
3822                     format_field(f, 'filesize', ' \t%s', func=format_bytes)
3823                     or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3824                     or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3825                                     None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
3826                 format_field(f, 'tbr', '\t%dk', func=round),
3827                 shorten_protocol_name(f.get('protocol', '')),
3828                 delim,
3829                 simplified_codec(f, 'vcodec'),
3830                 format_field(f, 'vbr', '\t%dk', func=round),
3831                 simplified_codec(f, 'acodec'),
3832                 format_field(f, 'abr', '\t%dk', func=round),
3833                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3834                 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3835                     self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3836                     (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3837                      else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3838                     format_field(f, 'format_note'),
3839                     format_field(f, 'container', ignore=(None, f.get('ext'))),
3840                     delim=', '), delim=' '),
3841             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3842         header_line = self._list_format_headers(
3843             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3844             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3845
3846         return render_table(
3847             header_line, table, hide_empty=True,
3848             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3849
3850     def render_thumbnails_table(self, info_dict):
3851         thumbnails = list(info_dict.get('thumbnails') or [])
3852         if not thumbnails:
3853             return None
3854         return render_table(
3855             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3856             [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3857
3858     def render_subtitles_table(self, video_id, subtitles):
3859         def _row(lang, formats):
3860             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3861             if len(set(names)) == 1:
3862                 names = [] if names[0] == 'unknown' else names[:1]
3863             return [lang, ', '.join(names), ', '.join(exts)]
3864
3865         if not subtitles:
3866             return None
3867         return render_table(
3868             self._list_format_headers('Language', 'Name', 'Formats'),
3869             [_row(lang, formats) for lang, formats in subtitles.items()],
3870             hide_empty=True)
3871
3872     def __list_table(self, video_id, name, func, *args):
3873         table = func(*args)
3874         if not table:
3875             self.to_screen(f'{video_id} has no {name}')
3876             return
3877         self.to_screen(f'[info] Available {name} for {video_id}:')
3878         self.to_stdout(table)
3879
3880     def list_formats(self, info_dict):
3881         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3882
3883     def list_thumbnails(self, info_dict):
3884         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3885
3886     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3887         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3888
3889     def print_debug_header(self):
3890         if not self.params.get('verbose'):
3891             return
3892
3893         from . import _IN_CLI  # Must be delayed import
3894
3895         # These imports can be slow. So import them only as needed
3896         from .extractor.extractors import _LAZY_LOADER
3897         from .extractor.extractors import (
3898             _PLUGIN_CLASSES as plugin_ies,
3899             _PLUGIN_OVERRIDES as plugin_ie_overrides
3900         )
3901
3902         def get_encoding(stream):
3903             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3904             additional_info = []
3905             if os.environ.get('TERM', '').lower() == 'dumb':
3906                 additional_info.append('dumb')
3907             if not supports_terminal_sequences(stream):
3908                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3909                 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3910             if additional_info:
3911                 ret = f'{ret} ({",".join(additional_info)})'
3912             return ret
3913
3914         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3915             locale.getpreferredencoding(),
3916             sys.getfilesystemencoding(),
3917             self.get_encoding(),
3918             ', '.join(
3919                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3920                 if stream is not None and key != 'console')
3921         )
3922
3923         logger = self.params.get('logger')
3924         if logger:
3925             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3926             write_debug(encoding_str)
3927         else:
3928             write_string(f'[debug] {encoding_str}\n', encoding=None)
3929             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3930
3931         source = detect_variant()
3932         if VARIANT not in (None, 'pip'):
3933             source += '*'
3934         klass = type(self)
3935         write_debug(join_nonempty(
3936             f'{REPOSITORY.rpartition("/")[2]} version',
3937             _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
3938             f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3939             '' if source == 'unknown' else f'({source})',
3940             '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3941             delim=' '))
3942
3943         if not _IN_CLI:
3944             write_debug(f'params: {self.params}')
3945
3946         if not _LAZY_LOADER:
3947             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3948                 write_debug('Lazy loading extractors is forcibly disabled')
3949             else:
3950                 write_debug('Lazy loading extractors is disabled')
3951         if self.params['compat_opts']:
3952             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3953
3954         if current_git_head():
3955             write_debug(f'Git HEAD: {current_git_head()}')
3956         write_debug(system_identifier())
3957
3958         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3959         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3960         if ffmpeg_features:
3961             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3962
3963         exe_versions['rtmpdump'] = rtmpdump_version()
3964         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3965         exe_str = ', '.join(
3966             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3967         ) or 'none'
3968         write_debug('exe versions: %s' % exe_str)
3969
3970         from .compat.compat_utils import get_package_info
3971         from .dependencies import available_dependencies
3972
3973         write_debug('Optional libraries: %s' % (', '.join(sorted({
3974             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3975         })) or 'none'))
3976
3977         write_debug(f'Proxy map: {self.proxies}')
3978         write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
3979         for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3980             display_list = ['%s%s' % (
3981                 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3982                 for name, klass in plugins.items()]
3983             if plugin_type == 'Extractor':
3984                 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3985                                     for parent, plugins in plugin_ie_overrides.items())
3986             if not display_list:
3987                 continue
3988             write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3989
3990         plugin_dirs = plugin_directories()
3991         if plugin_dirs:
3992             write_debug(f'Plugin directories: {plugin_dirs}')
3993
3994         # Not implemented
3995         if False and self.params.get('call_home'):
3996             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3997             write_debug('Public IP address: %s' % ipaddr)
3998             latest_version = self.urlopen(
3999                 'https://yt-dl.org/latest/version').read().decode()
4000             if version_tuple(latest_version) > version_tuple(__version__):
4001                 self.report_warning(
4002                     'You are using an outdated version (newest version: %s)! '
4003                     'See https://yt-dl.org/update if you need help updating.' %
4004                     latest_version)
4005
4006     @functools.cached_property
4007     def proxies(self):
4008         """Global proxy configuration"""
4009         opts_proxy = self.params.get('proxy')
4010         if opts_proxy is not None:
4011             if opts_proxy == '':
4012                 opts_proxy = '__noproxy__'
4013             proxies = {'all': opts_proxy}
4014         else:
4015             proxies = urllib.request.getproxies()
4016             # compat. Set HTTPS_PROXY to __noproxy__ to revert
4017             if 'http' in proxies and 'https' not in proxies:
4018                 proxies['https'] = proxies['http']
4019
4020         return proxies
4021
4022     @functools.cached_property
4023     def cookiejar(self):
4024         """Global cookiejar instance"""
4025         return load_cookies(
4026             self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4027
4028     @property
4029     def _opener(self):
4030         """
4031         Get a urllib OpenerDirector from the Urllib handler (deprecated).
4032         """
4033         self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4034         handler = self._request_director.handlers['Urllib']
4035         return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4036
4037     def urlopen(self, req):
4038         """ Start an HTTP download """
4039         if isinstance(req, str):
4040             req = Request(req)
4041         elif isinstance(req, urllib.request.Request):
4042             self.deprecation_warning(
4043                 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4044                 'Use yt_dlp.networking.common.Request instead.')
4045             req = urllib_req_to_req(req)
4046         assert isinstance(req, Request)
4047
4048         # compat: Assume user:pass url params are basic auth
4049         url, basic_auth_header = extract_basic_auth(req.url)
4050         if basic_auth_header:
4051             req.headers['Authorization'] = basic_auth_header
4052         req.url = sanitize_url(url)
4053
4054         clean_proxies(proxies=req.proxies, headers=req.headers)
4055         clean_headers(req.headers)
4056
4057         try:
4058             return self._request_director.send(req)
4059         except NoSupportingHandlers as e:
4060             for ue in e.unsupported_errors:
4061                 # FIXME: This depends on the order of errors.
4062                 if not (ue.handler and ue.msg):
4063                     continue
4064                 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4065                     raise RequestError(
4066                         'file:// URLs are disabled by default in yt-dlp for security reasons. '
4067                         'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4068                 if 'unsupported proxy type: "https"' in ue.msg.lower():
4069                     raise RequestError(
4070                         'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
4071
4072                 elif (
4073                     re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
4074                     and 'websockets' not in self._request_director.handlers
4075                 ):
4076                     raise RequestError(
4077                         'This request requires WebSocket support. '
4078                         'Ensure one of the following dependencies are installed: websockets',
4079                         cause=ue) from ue
4080             raise
4081         except SSLError as e:
4082             if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4083                 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4084             elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4085                 raise RequestError(
4086                     'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4087                     'Try using --legacy-server-connect', cause=e) from e
4088             raise
4089         except HTTPError as e:  # TODO: Remove in a future release
4090             raise _CompatHTTPError(e) from e
4091
4092     def build_request_director(self, handlers, preferences=None):
4093         logger = _YDLLogger(self)
4094         headers = self.params['http_headers'].copy()
4095         proxies = self.proxies.copy()
4096         clean_headers(headers)
4097         clean_proxies(proxies, headers)
4098
4099         director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4100         for handler in handlers:
4101             director.add_handler(handler(
4102                 logger=logger,
4103                 headers=headers,
4104                 cookiejar=self.cookiejar,
4105                 proxies=proxies,
4106                 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4107                 verify=not self.params.get('nocheckcertificate'),
4108                 **traverse_obj(self.params, {
4109                     'verbose': 'debug_printtraffic',
4110                     'source_address': 'source_address',
4111                     'timeout': 'socket_timeout',
4112                     'legacy_ssl_support': 'legacyserverconnect',
4113                     'enable_file_urls': 'enable_file_urls',
4114                     'client_cert': {
4115                         'client_certificate': 'client_certificate',
4116                         'client_certificate_key': 'client_certificate_key',
4117                         'client_certificate_password': 'client_certificate_password',
4118                     },
4119                 }),
4120             ))
4121         director.preferences.update(preferences or [])
4122         if 'prefer-legacy-http-handler' in self.params['compat_opts']:
4123             director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
4124         return director
4125
4126     def encode(self, s):
4127         if isinstance(s, bytes):
4128             return s  # Already encoded
4129
4130         try:
4131             return s.encode(self.get_encoding())
4132         except UnicodeEncodeError as err:
4133             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4134             raise
4135
4136     def get_encoding(self):
4137         encoding = self.params.get('encoding')
4138         if encoding is None:
4139             encoding = preferredencoding()
4140         return encoding
4141
4142     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4143         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4144         if overwrite is None:
4145             overwrite = self.params.get('overwrites', True)
4146         if not self.params.get('writeinfojson'):
4147             return False
4148         elif not infofn:
4149             self.write_debug(f'Skipping writing {label} infojson')
4150             return False
4151         elif not self._ensure_dir_exists(infofn):
4152             return None
4153         elif not overwrite and os.path.exists(infofn):
4154             self.to_screen(f'[info] {label.title()} metadata is already present')
4155             return 'exists'
4156
4157         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4158         try:
4159             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4160             return True
4161         except OSError:
4162             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4163             return None
4164
4165     def _write_description(self, label, ie_result, descfn):
4166         ''' Write description and returns True = written, False = skip, None = error '''
4167         if not self.params.get('writedescription'):
4168             return False
4169         elif not descfn:
4170             self.write_debug(f'Skipping writing {label} description')
4171             return False
4172         elif not self._ensure_dir_exists(descfn):
4173             return None
4174         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4175             self.to_screen(f'[info] {label.title()} description is already present')
4176         elif ie_result.get('description') is None:
4177             self.to_screen(f'[info] There\'s no {label} description to write')
4178             return False
4179         else:
4180             try:
4181                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4182                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4183                     descfile.write(ie_result['description'])
4184             except OSError:
4185                 self.report_error(f'Cannot write {label} description file {descfn}')
4186                 return None
4187         return True
4188
4189     def _write_subtitles(self, info_dict, filename):
4190         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4191         ret = []
4192         subtitles = info_dict.get('requested_subtitles')
4193         if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4194             # subtitles download errors are already managed as troubles in relevant IE
4195             # that way it will silently go on when used with unsupporting IE
4196             return ret
4197         elif not subtitles:
4198             self.to_screen('[info] There are no subtitles for the requested languages')
4199             return ret
4200         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4201         if not sub_filename_base:
4202             self.to_screen('[info] Skipping writing video subtitles')
4203             return ret
4204
4205         for sub_lang, sub_info in subtitles.items():
4206             sub_format = sub_info['ext']
4207             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4208             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4209             existing_sub = self.existing_file((sub_filename_final, sub_filename))
4210             if existing_sub:
4211                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4212                 sub_info['filepath'] = existing_sub
4213                 ret.append((existing_sub, sub_filename_final))
4214                 continue
4215
4216             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4217             if sub_info.get('data') is not None:
4218                 try:
4219                     # Use newline='' to prevent conversion of newline characters
4220                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
4221                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4222                         subfile.write(sub_info['data'])
4223                     sub_info['filepath'] = sub_filename
4224                     ret.append((sub_filename, sub_filename_final))
4225                     continue
4226                 except OSError:
4227                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
4228                     return None
4229
4230             try:
4231                 sub_copy = sub_info.copy()
4232                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4233                 self.dl(sub_filename, sub_copy, subtitle=True)
4234                 sub_info['filepath'] = sub_filename
4235                 ret.append((sub_filename, sub_filename_final))
4236             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4237                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4238                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
4239                     if not self.params.get('ignoreerrors'):
4240                         self.report_error(msg)
4241                     raise DownloadError(msg)
4242                 self.report_warning(msg)
4243         return ret
4244
4245     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4246         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
4247         write_all = self.params.get('write_all_thumbnails', False)
4248         thumbnails, ret = [], []
4249         if write_all or self.params.get('writethumbnail', False):
4250             thumbnails = info_dict.get('thumbnails') or []
4251             if not thumbnails:
4252                 self.to_screen(f'[info] There are no {label} thumbnails to download')
4253                 return ret
4254         multiple = write_all and len(thumbnails) > 1
4255
4256         if thumb_filename_base is None:
4257             thumb_filename_base = filename
4258         if thumbnails and not thumb_filename_base:
4259             self.write_debug(f'Skipping writing {label} thumbnail')
4260             return ret
4261
4262         if thumbnails and not self._ensure_dir_exists(filename):
4263             return None
4264
4265         for idx, t in list(enumerate(thumbnails))[::-1]:
4266             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4267             thumb_display_id = f'{label} thumbnail {t["id"]}'
4268             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4269             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4270
4271             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4272             if existing_thumb:
4273                 self.to_screen('[info] %s is already present' % (
4274                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4275                 t['filepath'] = existing_thumb
4276                 ret.append((existing_thumb, thumb_filename_final))
4277             else:
4278                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4279                 try:
4280                     uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4281                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4282                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4283                         shutil.copyfileobj(uf, thumbf)
4284                     ret.append((thumb_filename, thumb_filename_final))
4285                     t['filepath'] = thumb_filename
4286                 except network_exceptions as err:
4287                     if isinstance(err, HTTPError) and err.status == 404:
4288                         self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4289                     else:
4290                         self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4291                     thumbnails.pop(idx)
4292             if ret and not write_all:
4293                 break
4294         return ret