yt_dlp/YoutubeDL.py

   1 import collections
   2 import contextlib
   3 import copy
   4 import datetime as dt
   5 import errno
   6 import fileinput
   7 import http.cookiejar
   8 import io
   9 import itertools
  10 import json
  11 import locale
  12 import operator
  13 import os
  14 import random
  15 import re
  16 import shutil
  17 import string
  18 import subprocess
  19 import sys
  20 import tempfile
  21 import time
  22 import tokenize
  23 import traceback
  24 import unicodedata
  25
  26 from .cache import Cache
  27 from .compat import functools, urllib  # isort: split
  28 from .compat import compat_os_name, urllib_req_to_req
  29 from .cookies import LenientSimpleCookie, load_cookies
  30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  31 from .downloader.rtmp import rtmpdump_version
  32 from .extractor import gen_extractor_classes, get_info_extractor
  33 from .extractor.common import UnsupportedURLIE
  34 from .extractor.openload import PhantomJSwrapper
  35 from .minicurses import format_text
  36 from .networking import HEADRequest, Request, RequestDirector
  37 from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
  38 from .networking.exceptions import (
  39     HTTPError,
  40     NoSupportingHandlers,
  41     RequestError,
  42     SSLError,
  43     network_exceptions,
  44 )
  45 from .networking.impersonate import ImpersonateRequestHandler
  46 from .plugins import directories as plugin_directories
  47 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
  48 from .postprocessor import (
  49     EmbedThumbnailPP,
  50     FFmpegFixupDuplicateMoovPP,
  51     FFmpegFixupDurationPP,
  52     FFmpegFixupM3u8PP,
  53     FFmpegFixupM4aPP,
  54     FFmpegFixupStretchedPP,
  55     FFmpegFixupTimestampPP,
  56     FFmpegMergerPP,
  57     FFmpegPostProcessor,
  58     FFmpegVideoConvertorPP,
  59     MoveFilesAfterDownloadPP,
  60     get_postprocessor,
  61 )
  62 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
  63 from .update import (
  64     REPOSITORY,
  65     _get_system_deprecation,
  66     _make_label,
  67     current_git_head,
  68     detect_variant,
  69 )
  70 from .utils import (
  71     DEFAULT_OUTTMPL,
  72     IDENTITY,
  73     LINK_TEMPLATES,
  74     MEDIA_EXTENSIONS,
  75     NO_DEFAULT,
  76     NUMBER_RE,
  77     OUTTMPL_TYPES,
  78     POSTPROCESS_WHEN,
  79     STR_FORMAT_RE_TMPL,
  80     STR_FORMAT_TYPES,
  81     ContentTooShortError,
  82     DateRange,
  83     DownloadCancelled,
  84     DownloadError,
  85     EntryNotInPlaylist,
  86     ExistingVideoReached,
  87     ExtractorError,
  88     FormatSorter,
  89     GeoRestrictedError,
  90     ISO3166Utils,
  91     LazyList,
  92     MaxDownloadsReached,
  93     Namespace,
  94     PagedList,
  95     PlaylistEntries,
  96     Popen,
  97     PostProcessingError,
  98     ReExtractInfo,
  99     RejectedVideoReached,
 100     SameFileError,
 101     UnavailableVideoError,
 102     UserNotLive,
 103     YoutubeDLError,
 104     age_restricted,
 105     bug_reports_message,
 106     date_from_str,
 107     deprecation_warning,
 108     determine_ext,
 109     determine_protocol,
 110     encode_compat_str,
 111     encodeFilename,
 112     escapeHTML,
 113     expand_path,
 114     extract_basic_auth,
 115     filter_dict,
 116     float_or_none,
 117     format_bytes,
 118     format_decimal_suffix,
 119     format_field,
 120     formatSeconds,
 121     get_compatible_ext,
 122     get_domain,
 123     int_or_none,
 124     iri_to_uri,
 125     is_path_like,
 126     join_nonempty,
 127     locked_file,
 128     make_archive_id,
 129     make_dir,
 130     number_of_digits,
 131     orderedSet,
 132     orderedSet_from_options,
 133     parse_filesize,
 134     preferredencoding,
 135     prepend_extension,
 136     remove_terminal_sequences,
 137     render_table,
 138     replace_extension,
 139     sanitize_filename,
 140     sanitize_path,
 141     sanitize_url,
 142     shell_quote,
 143     str_or_none,
 144     strftime_or_none,
 145     subtitles_filename,
 146     supports_terminal_sequences,
 147     system_identifier,
 148     filesize_from_tbr,
 149     timetuple_from_msec,
 150     to_high_limit_path,
 151     traverse_obj,
 152     try_call,
 153     try_get,
 154     url_basename,
 155     variadic,
 156     version_tuple,
 157     windows_enable_vt_mode,
 158     write_json_file,
 159     write_string,
 160 )
 161 from .utils._utils import _YDLLogger
 162 from .utils.networking import (
 163     HTTPHeaderDict,
 164     clean_headers,
 165     clean_proxies,
 166     std_headers,
 167 )
 168 from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
 169
 170 if compat_os_name == 'nt':
 171     import ctypes
 172
 173
 174 class YoutubeDL:
 175     """YoutubeDL class.
 176
 177     YoutubeDL objects are the ones responsible of downloading the
 178     actual video file and writing it to disk if the user has requested
 179     it, among some other tasks. In most cases there should be one per
 180     program. As, given a video URL, the downloader doesn't know how to
 181     extract all the needed information, task that InfoExtractors do, it
 182     has to pass the URL to one of them.
 183
 184     For this, YoutubeDL objects have a method that allows
 185     InfoExtractors to be registered in a given order. When it is passed
 186     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 187     finds that reports being able to handle it. The InfoExtractor extracts
 188     all the information about the video or videos the URL refers to, and
 189     YoutubeDL process the extracted information, possibly using a File
 190     Downloader to download the video.
 191
 192     YoutubeDL objects accept a lot of parameters. In order not to saturate
 193     the object constructor with arguments, it receives a dictionary of
 194     options instead. These options are available through the params
 195     attribute for the InfoExtractors to use. The YoutubeDL also
 196     registers itself as the downloader in charge for the InfoExtractors
 197     that are added to it, so this is a "mutual registration".
 198
 199     Available options:
 200
 201     username:          Username for authentication purposes.
 202     password:          Password for authentication purposes.
 203     videopassword:     Password for accessing a video.
 204     ap_mso:            Adobe Pass multiple-system operator identifier.
 205     ap_username:       Multiple-system operator account username.
 206     ap_password:       Multiple-system operator account password.
 207     usenetrc:          Use netrc for authentication instead.
 208     netrc_location:    Location of the netrc file. Defaults to ~/.netrc.
 209     netrc_cmd:         Use a shell command to get credentials
 210     verbose:           Print additional info to stdout.
 211     quiet:             Do not print messages to stdout.
 212     no_warnings:       Do not print out anything for warnings.
 213     forceprint:        A dict with keys WHEN mapped to a list of templates to
 214                        print to stdout. The allowed keys are video or any of the
 215                        items in utils.POSTPROCESS_WHEN.
 216                        For compatibility, a single list is also accepted
 217     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 218                        a list of tuples with (template, filename)
 219     forcejson:         Force printing info_dict as JSON.
 220     dump_single_json:  Force printing the info_dict of the whole playlist
 221                        (or video) as a single JSON line.
 222     force_write_download_archive: Force writing download archive regardless
 223                        of 'skip_download' or 'simulate'.
 224     simulate:          Do not download the video files. If unset (or None),
 225                        simulate only if listsubtitles, listformats or list_thumbnails is used
 226     format:            Video format code. see "FORMAT SELECTION" for more details.
 227                        You can also pass a function. The function takes 'ctx' as
 228                        argument and returns the formats to download.
 229                        See "build_format_selector" for an implementation
 230     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 231     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 232                        extracting metadata even if the video is not actually
 233                        available for download (experimental)
 234     format_sort:       A list of fields by which to sort the video formats.
 235                        See "Sorting Formats" for more details.
 236     format_sort_force: Force the given format_sort. see "Sorting Formats"
 237                        for more details.
 238     prefer_free_formats: Whether to prefer video formats with free containers
 239                        over non-free ones of same quality.
 240     allow_multiple_video_streams:   Allow multiple video streams to be merged
 241                        into a single file
 242     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 243                        into a single file
 244     check_formats      Whether to test if the formats are downloadable.
 245                        Can be True (check all), False (check none),
 246                        'selected' (check selected formats),
 247                        or None (check only if requested by extractor)
 248     paths:             Dictionary of output paths. The allowed keys are 'home'
 249                        'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
 250     outtmpl:           Dictionary of templates for output names. Allowed keys
 251                        are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
 252                        For compatibility with youtube-dl, a single string can also be used
 253     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 254     restrictfilenames: Do not allow "&" and spaces in file names
 255     trim_file_name:    Limit length of filename (extension excluded)
 256     windowsfilenames:  Force the filenames to be windows compatible
 257     ignoreerrors:      Do not stop on download/postprocessing errors.
 258                        Can be 'only_download' to ignore only download errors.
 259                        Default is 'only_download' for CLI, but False for API
 260     skip_playlist_after_errors: Number of allowed failures until the rest of
 261                        the playlist is skipped
 262     allowed_extractors:  List of regexes to match against extractor names that are allowed
 263     overwrites:        Overwrite all video and metadata files if True,
 264                        overwrite only non-video files if None
 265                        and don't overwrite any file if False
 266     playlist_items:    Specific indices of playlist to download.
 267     playlistrandom:    Download playlist items in random order.
 268     lazy_playlist:     Process playlist entries as they are received.
 269     matchtitle:        Download only matching titles.
 270     rejecttitle:       Reject downloads for matching titles.
 271     logger:            Log messages to a logging.Logger instance.
 272     logtostderr:       Print everything to stderr instead of stdout.
 273     consoletitle:      Display progress in console window's titlebar.
 274     writedescription:  Write the video description to a .description file
 275     writeinfojson:     Write the video description to a .info.json file
 276     clean_infojson:    Remove internal metadata from the infojson
 277     getcomments:       Extract video comments. This will not be written to disk
 278                        unless writeinfojson is also given
 279     writeannotations:  Write the video annotations to a .annotations.xml file
 280     writethumbnail:    Write the thumbnail image to a file
 281     allow_playlist_files: Whether to write playlists' description, infojson etc
 282                        also to disk when using the 'write*' options
 283     write_all_thumbnails:  Write all thumbnail formats to files
 284     writelink:         Write an internet shortcut file, depending on the
 285                        current platform (.url/.webloc/.desktop)
 286     writeurllink:      Write a Windows internet shortcut file (.url)
 287     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 288     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 289     writesubtitles:    Write the video subtitles to a file
 290     writeautomaticsub: Write the automatically generated subtitles to a file
 291     listsubtitles:     Lists all available subtitles for the video
 292     subtitlesformat:   The format code for subtitles
 293     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 294                        The list may contain "all" to refer to all the available
 295                        subtitles. The language can be prefixed with a "-" to
 296                        exclude it from the requested languages, e.g. ['all', '-live_chat']
 297     keepvideo:         Keep the video file after post-processing
 298     daterange:         A utils.DateRange object, download only if the upload_date is in the range.
 299     skip_download:     Skip the actual download of the video file
 300     cachedir:          Location of the cache files in the filesystem.
 301                        False to disable filesystem cache.
 302     noplaylist:        Download single video instead of a playlist if in doubt.
 303     age_limit:         An integer representing the user's age in years.
 304                        Unsuitable videos for the given age are skipped.
 305     min_views:         An integer representing the minimum view count the video
 306                        must have in order to not be skipped.
 307                        Videos without view count information are always
 308                        downloaded. None for no limit.
 309     max_views:         An integer representing the maximum view count.
 310                        Videos that are more popular than that are not
 311                        downloaded.
 312                        Videos without view count information are always
 313                        downloaded. None for no limit.
 314     download_archive:  A set, or the name of a file where all downloads are recorded.
 315                        Videos already present in the file are not downloaded again.
 316     break_on_existing: Stop the download process after attempting to download a
 317                        file that is in the archive.
 318     break_per_url:     Whether break_on_reject and break_on_existing
 319                        should act on each input URL as opposed to for the entire queue
 320     cookiefile:        File name or text stream from where cookies should be read and dumped to
 321     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 322                        name/path from where cookies are loaded, the name of the keyring,
 323                        and the container name, e.g. ('chrome', ) or
 324                        ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
 325     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 326                        support RFC 5746 secure renegotiation
 327     nocheckcertificate:  Do not verify SSL certificates
 328     client_certificate:  Path to client certificate file in PEM format. May include the private key
 329     client_certificate_key:  Path to private key file for client certificate
 330     client_certificate_password:  Password for client certificate private key, if encrypted.
 331                         If not provided and the key is encrypted, yt-dlp will ask interactively
 332     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 333                        (Only supported by some extractors)
 334     enable_file_urls:  Enable file:// URLs. This is disabled by default for security reasons.
 335     http_headers:      A dictionary of custom headers to be used for all requests
 336     proxy:             URL of the proxy server to use
 337     geo_verification_proxy:  URL of the proxy to use for IP address verification
 338                        on geo-restricted sites.
 339     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 340     bidi_workaround:   Work around buggy terminals without bidirectional text
 341                        support, using fridibi
 342     debug_printtraffic:Print out sent and received HTTP traffic
 343     default_search:    Prepend this string if an input url is not valid.
 344                        'auto' for elaborate guessing
 345     encoding:          Use this encoding instead of the system-specified.
 346     extract_flat:      Whether to resolve and process url_results further
 347                        * False:     Always process. Default for API
 348                        * True:      Never process
 349                        * 'in_playlist': Do not process inside playlist/multi_video
 350                        * 'discard': Always process, but don't return the result
 351                                     from inside playlist/multi_video
 352                        * 'discard_in_playlist': Same as "discard", but only for
 353                                     playlists (not multi_video). Default for CLI
 354     wait_for_video:    If given, wait for scheduled streams to become available.
 355                        The value should be a tuple containing the range
 356                        (min_secs, max_secs) to wait between retries
 357     postprocessors:    A list of dictionaries, each with an entry
 358                        * key:  The name of the postprocessor. See
 359                                yt_dlp/postprocessor/__init__.py for a list.
 360                        * when: When to run the postprocessor. Allowed values are
 361                                the entries of utils.POSTPROCESS_WHEN
 362                                Assumed to be 'post_process' if not given
 363     progress_hooks:    A list of functions that get called on download
 364                        progress, with a dictionary with the entries
 365                        * status: One of "downloading", "error", or "finished".
 366                                  Check this first and ignore unknown values.
 367                        * info_dict: The extracted info_dict
 368
 369                        If status is one of "downloading", or "finished", the
 370                        following properties may also be present:
 371                        * filename: The final filename (always present)
 372                        * tmpfilename: The filename we're currently writing to
 373                        * downloaded_bytes: Bytes on disk
 374                        * total_bytes: Size of the whole file, None if unknown
 375                        * total_bytes_estimate: Guess of the eventual file size,
 376                                                None if unavailable.
 377                        * elapsed: The number of seconds since download started.
 378                        * eta: The estimated time in seconds, None if unknown
 379                        * speed: The download speed in bytes/second, None if
 380                                 unknown
 381                        * fragment_index: The counter of the currently
 382                                          downloaded video fragment.
 383                        * fragment_count: The number of fragments (= individual
 384                                          files that will be merged)
 385
 386                        Progress hooks are guaranteed to be called at least once
 387                        (with status "finished") if the download is successful.
 388     postprocessor_hooks:  A list of functions that get called on postprocessing
 389                        progress, with a dictionary with the entries
 390                        * status: One of "started", "processing", or "finished".
 391                                  Check this first and ignore unknown values.
 392                        * postprocessor: Name of the postprocessor
 393                        * info_dict: The extracted info_dict
 394
 395                        Progress hooks are guaranteed to be called at least twice
 396                        (with status "started" and "finished") if the processing is successful.
 397     merge_output_format: "/" separated list of extensions to use when merging formats.
 398     final_ext:         Expected final extension; used to detect when the file was
 399                        already downloaded and converted
 400     fixup:             Automatically correct known faults of the file.
 401                        One of:
 402                        - "never": do nothing
 403                        - "warn": only emit a warning
 404                        - "detect_or_warn": check whether we can do anything
 405                                            about it, warn otherwise (default)
 406     source_address:    Client-side IP address to bind to.
 407     impersonate:       Client to impersonate for requests.
 408                        An ImpersonateTarget (from yt_dlp.networking.impersonate)
 409     sleep_interval_requests: Number of seconds to sleep between requests
 410                        during extraction
 411     sleep_interval:    Number of seconds to sleep before each download when
 412                        used alone or a lower bound of a range for randomized
 413                        sleep before each download (minimum possible number
 414                        of seconds to sleep) when used along with
 415                        max_sleep_interval.
 416     max_sleep_interval:Upper bound of a range for randomized sleep before each
 417                        download (maximum possible number of seconds to sleep).
 418                        Must only be used along with sleep_interval.
 419                        Actual sleep time will be a random float from range
 420                        [sleep_interval; max_sleep_interval].
 421     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 422     listformats:       Print an overview of available video formats and exit.
 423     list_thumbnails:   Print a table of all thumbnails and exit.
 424     match_filter:      A function that gets called for every video with the signature
 425                        (info_dict, *, incomplete: bool) -> Optional[str]
 426                        For backward compatibility with youtube-dl, the signature
 427                        (info_dict) -> Optional[str] is also allowed.
 428                        - If it returns a message, the video is ignored.
 429                        - If it returns None, the video is downloaded.
 430                        - If it returns utils.NO_DEFAULT, the user is interactively
 431                          asked whether to download the video.
 432                        - Raise utils.DownloadCancelled(msg) to abort remaining
 433                          downloads when a video is rejected.
 434                        match_filter_func in utils/_utils.py is one example for this.
 435     color:             A Dictionary with output stream names as keys
 436                        and their respective color policy as values.
 437                        Can also just be a single color policy,
 438                        in which case it applies to all outputs.
 439                        Valid stream names are 'stdout' and 'stderr'.
 440                        Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
 441     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 442                        HTTP header
 443     geo_bypass_country:
 444                        Two-letter ISO 3166-2 country code that will be used for
 445                        explicit geographic restriction bypassing via faking
 446                        X-Forwarded-For HTTP header
 447     geo_bypass_ip_block:
 448                        IP range in CIDR notation that will be used similarly to
 449                        geo_bypass_country
 450     external_downloader: A dictionary of protocol keys and the executable of the
 451                        external downloader to use for it. The allowed protocols
 452                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 453                        Set the value to 'native' to use the native downloader
 454     compat_opts:       Compatibility options. See "Differences in default behavior".
 455                        The following options do not work when used through the API:
 456                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 457                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 458                        Refer __init__.py for their implementation
 459     progress_template: Dictionary of templates for progress outputs.
 460                        Allowed keys are 'download', 'postprocess',
 461                        'download-title' (console title) and 'postprocess-title'.
 462                        The template is mapped on a dictionary with keys 'progress' and 'info'
 463     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 464                        as argument and returns the time to sleep in seconds.
 465                        Allowed keys are 'http', 'fragment', 'file_access'
 466     download_ranges:   A callback function that gets called for every video with
 467                        the signature (info_dict, ydl) -> Iterable[Section].
 468                        Only the returned sections will be downloaded.
 469                        Each Section is a dict with the following keys:
 470                        * start_time: Start time of the section in seconds
 471                        * end_time: End time of the section in seconds
 472                        * title: Section title (Optional)
 473                        * index: Section number (Optional)
 474     force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
 475     noprogress:        Do not print the progress bar
 476     live_from_start:   Whether to download livestreams videos from the start
 477
 478     The following parameters are not used by YoutubeDL itself, they are used by
 479     the downloader (see yt_dlp/downloader/common.py):
 480     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 481     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 482     continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 483     external_downloader_args, concurrent_fragment_downloads, progress_delta.
 484
 485     The following options are used by the post processors:
 486     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 487                        to the binary or its containing directory.
 488     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 489                        and a list of additional command-line arguments for the
 490                        postprocessor/executable. The dict can also have "PP+EXE" keys
 491                        which are used when the given exe is used by the given PP.
 492                        Use 'default' as the name for arguments to passed to all PP
 493                        For compatibility with youtube-dl, a single list of args
 494                        can also be used
 495
 496     The following options are used by the extractors:
 497     extractor_retries: Number of times to retry for known errors (default: 3)
 498     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 499     hls_split_discontinuity: Split HLS playlists to different formats at
 500                        discontinuities such as ad breaks (default: False)
 501     extractor_args:    A dictionary of arguments to be passed to the extractors.
 502                        See "EXTRACTOR ARGUMENTS" for details.
 503                        E.g. {'youtube': {'skip': ['dash', 'hls']}}
 504     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 505
 506     The following options are deprecated and may be removed in the future:
 507
 508     break_on_reject:   Stop the download process when encountering a video that
 509                        has been filtered out.
 510                        - `raise DownloadCancelled(msg)` in match_filter instead
 511     force_generic_extractor: Force downloader to use the generic extractor
 512                        - Use allowed_extractors = ['generic', 'default']
 513     playliststart:     - Use playlist_items
 514                        Playlist item to start at.
 515     playlistend:       - Use playlist_items
 516                        Playlist item to end at.
 517     playlistreverse:   - Use playlist_items
 518                        Download playlist items in reverse order.
 519     forceurl:          - Use forceprint
 520                        Force printing final URL.
 521     forcetitle:        - Use forceprint
 522                        Force printing title.
 523     forceid:           - Use forceprint
 524                        Force printing ID.
 525     forcethumbnail:    - Use forceprint
 526                        Force printing thumbnail URL.
 527     forcedescription:  - Use forceprint
 528                        Force printing description.
 529     forcefilename:     - Use forceprint
 530                        Force printing final filename.
 531     forceduration:     - Use forceprint
 532                        Force printing duration.
 533     allsubtitles:      - Use subtitleslangs = ['all']
 534                        Downloads all the subtitles of the video
 535                        (requires writesubtitles or writeautomaticsub)
 536     include_ads:       - Doesn't work
 537                        Download ads as well
 538     call_home:         - Not implemented
 539                        Boolean, true iff we are allowed to contact the
 540                        yt-dlp servers for debugging.
 541     post_hooks:        - Register a custom postprocessor
 542                        A list of functions that get called as the final step
 543                        for each video file, after all postprocessors have been
 544                        called. The filename will be passed as the only argument.
 545     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 546                        Use the native HLS downloader instead of ffmpeg/avconv
 547                        if True, otherwise use ffmpeg/avconv if False, otherwise
 548                        use downloader suggested by extractor if None.
 549     prefer_ffmpeg:     - avconv support is deprecated
 550                        If False, use avconv instead of ffmpeg if both are available,
 551                        otherwise prefer ffmpeg.
 552     youtube_include_dash_manifest: - Use extractor_args
 553                        If True (default), DASH manifests and related
 554                        data will be downloaded and processed by extractor.
 555                        You can reduce network I/O by disabling it if you don't
 556                        care about DASH. (only for youtube)
 557     youtube_include_hls_manifest: - Use extractor_args
 558                        If True (default), HLS manifests and related
 559                        data will be downloaded and processed by extractor.
 560                        You can reduce network I/O by disabling it if you don't
 561                        care about HLS. (only for youtube)
 562     no_color:          Same as `color='no_color'`
 563     no_overwrites:     Same as `overwrites=False`
 564     """
 565
 566     _NUMERIC_FIELDS = {
 567         'width', 'height', 'asr', 'audio_channels', 'fps',
 568         'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
 569         'timestamp', 'release_timestamp',
 570         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 571         'average_rating', 'comment_count', 'age_limit',
 572         'start_time', 'end_time',
 573         'chapter_number', 'season_number', 'episode_number',
 574         'track_number', 'disc_number', 'release_year',
 575     }
 576
 577     _format_fields = {
 578         # NB: Keep in sync with the docstring of extractor/common.py
 579         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 580         'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
 581         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
 582         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
 583         'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
 584         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url',
 585         'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version',
 586         'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
 587     }
 588     _deprecated_multivalue_fields = {
 589         'album_artist': 'album_artists',
 590         'artist': 'artists',
 591         'composer': 'composers',
 592         'creator': 'creators',
 593         'genre': 'genres',
 594     }
 595     _format_selection_exts = {
 596         'audio': set(MEDIA_EXTENSIONS.common_audio),
 597         'video': {*MEDIA_EXTENSIONS.common_video, '3gp'},
 598         'storyboards': set(MEDIA_EXTENSIONS.storyboards),
 599     }
 600
 601     def __init__(self, params=None, auto_init=True):
 602         """Create a FileDownloader object with the given options.
 603         @param auto_init    Whether to load the default extractors and print header (if verbose).
 604                             Set to 'no_verbose_header' to not print the header
 605         """
 606         if params is None:
 607             params = {}
 608         self.params = params
 609         self._ies = {}
 610         self._ies_instances = {}
 611         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 612         self._printed_messages = set()
 613         self._first_webpage_request = True
 614         self._post_hooks = []
 615         self._progress_hooks = []
 616         self._postprocessor_hooks = []
 617         self._download_retcode = 0
 618         self._num_downloads = 0
 619         self._num_videos = 0
 620         self._playlist_level = 0
 621         self._playlist_urls = set()
 622         self.cache = Cache(self)
 623         self.__header_cookies = []
 624
 625         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 626         self._out_files = Namespace(
 627             out=stdout,
 628             error=sys.stderr,
 629             screen=sys.stderr if self.params.get('quiet') else stdout,
 630             console=None if compat_os_name == 'nt' else next(
 631                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
 632         )
 633
 634         try:
 635             windows_enable_vt_mode()
 636         except Exception as e:
 637             self.write_debug(f'Failed to enable VT mode: {e}')
 638
 639         if self.params.get('no_color'):
 640             if self.params.get('color') is not None:
 641                 self.params.setdefault('_warnings', []).append(
 642                     'Overwriting params from "color" with "no_color"')
 643             self.params['color'] = 'no_color'
 644
 645         term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
 646         no_color = bool(os.getenv('NO_COLOR'))
 647
 648         def process_color_policy(stream):
 649             stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
 650             policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
 651             if policy in ('auto', None):
 652                 if term_allow_color and supports_terminal_sequences(stream):
 653                     return 'no_color' if no_color else True
 654                 return False
 655             assert policy in ('always', 'never', 'no_color'), policy
 656             return {'always': True, 'never': False}.get(policy, policy)
 657
 658         self._allow_colors = Namespace(**{
 659             name: process_color_policy(stream)
 660             for name, stream in self._out_files.items_ if name != 'console'
 661         })
 662
 663         system_deprecation = _get_system_deprecation()
 664         if system_deprecation:
 665             self.deprecated_feature(system_deprecation.replace('\n', '\n                    '))
 666
 667         if self.params.get('allow_unplayable_formats'):
 668             self.report_warning(
 669                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 670                 'This is a developer option intended for debugging. \n'
 671                 '         If you experience any issues while using this option, '
 672                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 673
 674         if self.params.get('bidi_workaround', False):
 675             try:
 676                 import pty
 677                 master, slave = pty.openpty()
 678                 width = shutil.get_terminal_size().columns
 679                 width_args = [] if width is None else ['-w', str(width)]
 680                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 681                 try:
 682                     self._output_process = Popen(['bidiv', *width_args], **sp_kwargs)
 683                 except OSError:
 684                     self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs)
 685                 self._output_channel = os.fdopen(master, 'rb')
 686             except OSError as ose:
 687                 if ose.errno == errno.ENOENT:
 688                     self.report_warning(
 689                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 690                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 691                 else:
 692                     raise
 693
 694         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 695         self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
 696         self._load_cookies(self.params['http_headers'].get('Cookie'))  # compat
 697         self.params['http_headers'].pop('Cookie', None)
 698
 699         if auto_init and auto_init != 'no_verbose_header':
 700             self.print_debug_header()
 701
 702         def check_deprecated(param, option, suggestion):
 703             if self.params.get(param) is not None:
 704                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 705                 return True
 706             return False
 707
 708         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 709             if self.params.get('geo_verification_proxy') is None:
 710                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 711
 712         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 713         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 714         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 715
 716         for msg in self.params.get('_warnings', []):
 717             self.report_warning(msg)
 718         for msg in self.params.get('_deprecation_warnings', []):
 719             self.deprecated_feature(msg)
 720
 721         if impersonate_target := self.params.get('impersonate'):
 722             if not self._impersonate_target_available(impersonate_target):
 723                 raise YoutubeDLError(
 724                     f'Impersonate target "{impersonate_target}" is not available. '
 725                     f'Use --list-impersonate-targets to see available targets. '
 726                     f'You may be missing dependencies required to support this target.')
 727
 728         if 'list-formats' in self.params['compat_opts']:
 729             self.params['listformats_table'] = False
 730
 731         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 732             # nooverwrites was unnecessarily changed to overwrites
 733             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 734             # This ensures compatibility with both keys
 735             self.params['overwrites'] = not self.params['nooverwrites']
 736         elif self.params.get('overwrites') is None:
 737             self.params.pop('overwrites', None)
 738         else:
 739             self.params['nooverwrites'] = not self.params['overwrites']
 740
 741         if self.params.get('simulate') is None and any((
 742             self.params.get('list_thumbnails'),
 743             self.params.get('listformats'),
 744             self.params.get('listsubtitles'),
 745         )):
 746             self.params['simulate'] = 'list_only'
 747
 748         self.params.setdefault('forceprint', {})
 749         self.params.setdefault('print_to_file', {})
 750
 751         # Compatibility with older syntax
 752         if not isinstance(params['forceprint'], dict):
 753             self.params['forceprint'] = {'video': params['forceprint']}
 754
 755         if auto_init:
 756             self.add_default_info_extractors()
 757
 758         if (sys.platform != 'win32'
 759                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 760                 and not self.params.get('restrictfilenames', False)):
 761             # Unicode filesystem API will throw errors (#1474, #13027)
 762             self.report_warning(
 763                 'Assuming --restrict-filenames since file system encoding '
 764                 'cannot encode all characters. '
 765                 'Set the LC_ALL environment variable to fix this.')
 766             self.params['restrictfilenames'] = True
 767
 768         self._parse_outtmpl()
 769
 770         # Creating format selector here allows us to catch syntax errors before the extraction
 771         self.format_selector = (
 772             self.params.get('format') if self.params.get('format') in (None, '-')
 773             else self.params['format'] if callable(self.params['format'])
 774             else self.build_format_selector(self.params['format']))
 775
 776         hooks = {
 777             'post_hooks': self.add_post_hook,
 778             'progress_hooks': self.add_progress_hook,
 779             'postprocessor_hooks': self.add_postprocessor_hook,
 780         }
 781         for opt, fn in hooks.items():
 782             for ph in self.params.get(opt, []):
 783                 fn(ph)
 784
 785         for pp_def_raw in self.params.get('postprocessors', []):
 786             pp_def = dict(pp_def_raw)
 787             when = pp_def.pop('when', 'post_process')
 788             self.add_post_processor(
 789                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 790                 when=when)
 791
 792         def preload_download_archive(fn):
 793             """Preload the archive, if any is specified"""
 794             archive = set()
 795             if fn is None:
 796                 return archive
 797             elif not is_path_like(fn):
 798                 return fn
 799
 800             self.write_debug(f'Loading archive file {fn!r}')
 801             try:
 802                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 803                     for line in archive_file:
 804                         archive.add(line.strip())
 805             except OSError as ioe:
 806                 if ioe.errno != errno.ENOENT:
 807                     raise
 808             return archive
 809
 810         self.archive = preload_download_archive(self.params.get('download_archive'))
 811
 812     def warn_if_short_id(self, argv):
 813         # short YouTube ID starting with dash?
 814         idxs = [
 815             i for i, a in enumerate(argv)
 816             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 817         if idxs:
 818             correct_argv = (
 819                 ['yt-dlp']
 820                 + [a for i, a in enumerate(argv) if i not in idxs]
 821                 + ['--'] + [argv[i] for i in idxs]
 822             )
 823             self.report_warning(
 824                 'Long argument string detected. '
 825                 f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}')
 826
 827     def add_info_extractor(self, ie):
 828         """Add an InfoExtractor object to the end of the list."""
 829         ie_key = ie.ie_key()
 830         self._ies[ie_key] = ie
 831         if not isinstance(ie, type):
 832             self._ies_instances[ie_key] = ie
 833             ie.set_downloader(self)
 834
 835     def get_info_extractor(self, ie_key):
 836         """
 837         Get an instance of an IE with name ie_key, it will try to get one from
 838         the _ies list, if there's no instance it will create a new one and add
 839         it to the extractor list.
 840         """
 841         ie = self._ies_instances.get(ie_key)
 842         if ie is None:
 843             ie = get_info_extractor(ie_key)()
 844             self.add_info_extractor(ie)
 845         return ie
 846
 847     def add_default_info_extractors(self):
 848         """
 849         Add the InfoExtractors returned by gen_extractors to the end of the list
 850         """
 851         all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
 852         all_ies['end'] = UnsupportedURLIE()
 853         try:
 854             ie_names = orderedSet_from_options(
 855                 self.params.get('allowed_extractors', ['default']), {
 856                     'all': list(all_ies),
 857                     'default': [name for name, ie in all_ies.items() if ie._ENABLED],
 858                 }, use_regex=True)
 859         except re.error as e:
 860             raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
 861         for name in ie_names:
 862             self.add_info_extractor(all_ies[name])
 863         self.write_debug(f'Loaded {len(ie_names)} extractors')
 864
 865     def add_post_processor(self, pp, when='post_process'):
 866         """Add a PostProcessor object to the end of the chain."""
 867         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 868         self._pps[when].append(pp)
 869         pp.set_downloader(self)
 870
 871     def add_post_hook(self, ph):
 872         """Add the post hook"""
 873         self._post_hooks.append(ph)
 874
 875     def add_progress_hook(self, ph):
 876         """Add the download progress hook"""
 877         self._progress_hooks.append(ph)
 878
 879     def add_postprocessor_hook(self, ph):
 880         """Add the postprocessing progress hook"""
 881         self._postprocessor_hooks.append(ph)
 882         for pps in self._pps.values():
 883             for pp in pps:
 884                 pp.add_progress_hook(ph)
 885
 886     def _bidi_workaround(self, message):
 887         if not hasattr(self, '_output_channel'):
 888             return message
 889
 890         assert hasattr(self, '_output_process')
 891         assert isinstance(message, str)
 892         line_count = message.count('\n') + 1
 893         self._output_process.stdin.write((message + '\n').encode())
 894         self._output_process.stdin.flush()
 895         res = ''.join(self._output_channel.readline().decode()
 896                       for _ in range(line_count))
 897         return res[:-len('\n')]
 898
 899     def _write_string(self, message, out=None, only_once=False):
 900         if only_once:
 901             if message in self._printed_messages:
 902                 return
 903             self._printed_messages.add(message)
 904         write_string(message, out=out, encoding=self.params.get('encoding'))
 905
 906     def to_stdout(self, message, skip_eol=False, quiet=None):
 907         """Print message to stdout"""
 908         if quiet is not None:
 909             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
 910                                      'Use "YoutubeDL.to_screen" instead')
 911         if skip_eol is not False:
 912             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
 913                                      'Use "YoutubeDL.to_screen" instead')
 914         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 915
 916     def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
 917         """Print message to screen if not in quiet mode"""
 918         if self.params.get('logger'):
 919             self.params['logger'].debug(message)
 920             return
 921         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 922             return
 923         self._write_string(
 924             '{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')),
 925             self._out_files.screen, only_once=only_once)
 926
 927     def to_stderr(self, message, only_once=False):
 928         """Print message to stderr"""
 929         assert isinstance(message, str)
 930         if self.params.get('logger'):
 931             self.params['logger'].error(message)
 932         else:
 933             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 934
 935     def _send_console_code(self, code):
 936         if compat_os_name == 'nt' or not self._out_files.console:
 937             return
 938         self._write_string(code, self._out_files.console)
 939
 940     def to_console_title(self, message):
 941         if not self.params.get('consoletitle', False):
 942             return
 943         message = remove_terminal_sequences(message)
 944         if compat_os_name == 'nt':
 945             if ctypes.windll.kernel32.GetConsoleWindow():
 946                 # c_wchar_p() might not be necessary if `message` is
 947                 # already of type unicode()
 948                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 949         else:
 950             self._send_console_code(f'\033]0;{message}\007')
 951
 952     def save_console_title(self):
 953         if not self.params.get('consoletitle') or self.params.get('simulate'):
 954             return
 955         self._send_console_code('\033[22;0t')  # Save the title on stack
 956
 957     def restore_console_title(self):
 958         if not self.params.get('consoletitle') or self.params.get('simulate'):
 959             return
 960         self._send_console_code('\033[23;0t')  # Restore the title from stack
 961
 962     def __enter__(self):
 963         self.save_console_title()
 964         return self
 965
 966     def save_cookies(self):
 967         if self.params.get('cookiefile') is not None:
 968             self.cookiejar.save()
 969
 970     def __exit__(self, *args):
 971         self.restore_console_title()
 972         self.close()
 973
 974     def close(self):
 975         self.save_cookies()
 976         if '_request_director' in self.__dict__:
 977             self._request_director.close()
 978             del self._request_director
 979
 980     def trouble(self, message=None, tb=None, is_error=True):
 981         """Determine action to take when a download problem appears.
 982
 983         Depending on if the downloader has been configured to ignore
 984         download errors or not, this method may throw an exception or
 985         not when errors are found, after printing the message.
 986
 987         @param tb          If given, is additional traceback information
 988         @param is_error    Whether to raise error according to ignorerrors
 989         """
 990         if message is not None:
 991             self.to_stderr(message)
 992         if self.params.get('verbose'):
 993             if tb is None:
 994                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 995                     tb = ''
 996                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 997                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 998                     tb += encode_compat_str(traceback.format_exc())
 999                 else:
1000                     tb_data = traceback.format_list(traceback.extract_stack())
1001                     tb = ''.join(tb_data)
1002             if tb:
1003                 self.to_stderr(tb)
1004         if not is_error:
1005             return
1006         if not self.params.get('ignoreerrors'):
1007             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
1008                 exc_info = sys.exc_info()[1].exc_info
1009             else:
1010                 exc_info = sys.exc_info()
1011             raise DownloadError(message, exc_info)
1012         self._download_retcode = 1
1013
1014     Styles = Namespace(
1015         HEADERS='yellow',
1016         EMPHASIS='light blue',
1017         FILENAME='green',
1018         ID='green',
1019         DELIM='blue',
1020         ERROR='red',
1021         BAD_FORMAT='light red',
1022         WARNING='yellow',
1023         SUPPRESS='light black',
1024     )
1025
1026     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1027         text = str(text)
1028         if test_encoding:
1029             original_text = text
1030             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1031             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1032             text = text.encode(encoding, 'ignore').decode(encoding)
1033             if fallback is not None and text != original_text:
1034                 text = fallback
1035         return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1036
1037     def _format_out(self, *args, **kwargs):
1038         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1039
1040     def _format_screen(self, *args, **kwargs):
1041         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1042
1043     def _format_err(self, *args, **kwargs):
1044         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1045
1046     def report_warning(self, message, only_once=False):
1047         """
1048         Print the message to stderr, it will be prefixed with 'WARNING:'
1049         If stderr is a tty file the 'WARNING:' will be colored
1050         """
1051         if self.params.get('logger') is not None:
1052             self.params['logger'].warning(message)
1053         else:
1054             if self.params.get('no_warnings'):
1055                 return
1056             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1057
1058     def deprecation_warning(self, message, *, stacklevel=0):
1059         deprecation_warning(
1060             message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1061
1062     def deprecated_feature(self, message):
1063         if self.params.get('logger') is not None:
1064             self.params['logger'].warning(f'Deprecated Feature: {message}')
1065         self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1066
1067     def report_error(self, message, *args, **kwargs):
1068         """
1069         Do the same as trouble, but prefixes the message with 'ERROR:', colored
1070         in red if stderr is a tty file.
1071         """
1072         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1073
1074     def write_debug(self, message, only_once=False):
1075         """Log debug message or Print message to stderr"""
1076         if not self.params.get('verbose', False):
1077             return
1078         message = f'[debug] {message}'
1079         if self.params.get('logger'):
1080             self.params['logger'].debug(message)
1081         else:
1082             self.to_stderr(message, only_once)
1083
1084     def report_file_already_downloaded(self, file_name):
1085         """Report file has already been fully downloaded."""
1086         try:
1087             self.to_screen(f'[download] {file_name} has already been downloaded')
1088         except UnicodeEncodeError:
1089             self.to_screen('[download] The file has already been downloaded')
1090
1091     def report_file_delete(self, file_name):
1092         """Report that existing file will be deleted."""
1093         try:
1094             self.to_screen(f'Deleting existing file {file_name}')
1095         except UnicodeEncodeError:
1096             self.to_screen('Deleting existing file')
1097
1098     def raise_no_formats(self, info, forced=False, *, msg=None):
1099         has_drm = info.get('_has_drm')
1100         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1101         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1102         if forced or not ignored:
1103             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1104                                  expected=has_drm or ignored or expected)
1105         else:
1106             self.report_warning(msg)
1107
1108     def parse_outtmpl(self):
1109         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1110         self._parse_outtmpl()
1111         return self.params['outtmpl']
1112
1113     def _parse_outtmpl(self):
1114         sanitize = IDENTITY
1115         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1116             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1117
1118         outtmpl = self.params.setdefault('outtmpl', {})
1119         if not isinstance(outtmpl, dict):
1120             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1121         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1122
1123     def get_output_path(self, dir_type='', filename=None):
1124         paths = self.params.get('paths', {})
1125         assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1126         path = os.path.join(
1127             expand_path(paths.get('home', '').strip()),
1128             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1129             filename or '')
1130         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1131
1132     @staticmethod
1133     def _outtmpl_expandpath(outtmpl):
1134         # expand_path translates '%%' into '%' and '$$' into '$'
1135         # correspondingly that is not what we want since we need to keep
1136         # '%%' intact for template dict substitution step. Working around
1137         # with boundary-alike separator hack.
1138         sep = ''.join(random.choices(string.ascii_letters, k=32))
1139         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1140
1141         # outtmpl should be expand_path'ed before template dict substitution
1142         # because meta fields may contain env variables we don't want to
1143         # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1144         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1145         return expand_path(outtmpl).replace(sep, '')
1146
1147     @staticmethod
1148     def escape_outtmpl(outtmpl):
1149         """ Escape any remaining strings like %s, %abc% etc. """
1150         return re.sub(
1151             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1152             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1153             outtmpl)
1154
1155     @classmethod
1156     def validate_outtmpl(cls, outtmpl):
1157         """ @return None or Exception object """
1158         outtmpl = re.sub(
1159             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1160             lambda mobj: f'{mobj.group(0)[:-1]}s',
1161             cls._outtmpl_expandpath(outtmpl))
1162         try:
1163             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1164             return None
1165         except ValueError as err:
1166             return err
1167
1168     @staticmethod
1169     def _copy_infodict(info_dict):
1170         info_dict = dict(info_dict)
1171         info_dict.pop('__postprocessors', None)
1172         info_dict.pop('__pending_error', None)
1173         return info_dict
1174
1175     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1176         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1177         @param sanitize    Whether to sanitize the output as a filename.
1178                            For backward compatibility, a function can also be passed
1179         """
1180
1181         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1182
1183         info_dict = self._copy_infodict(info_dict)
1184         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1185             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1186             if info_dict.get('duration', None) is not None
1187             else None)
1188         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1189         info_dict['video_autonumber'] = self._num_videos
1190         if info_dict.get('resolution') is None:
1191             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1192
1193         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1194         # of %(field)s to %(field)0Nd for backward compatibility
1195         field_size_compat_map = {
1196             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1197             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1198             'autonumber': self.params.get('autonumber_size') or 5,
1199         }
1200
1201         TMPL_DICT = {}
1202         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1203         MATH_FUNCTIONS = {
1204             '+': float.__add__,
1205             '-': float.__sub__,
1206             '*': float.__mul__,
1207         }
1208         # Field is of the form key1.key2...
1209         # where keys (except first) can be string, int, slice or "{field, ...}"
1210         FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}  # noqa: UP031
1211         FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {  # noqa: UP031
1212             'inner': FIELD_INNER_RE,
1213             'field': rf'\w*(?:\.{FIELD_INNER_RE})*',
1214         }
1215         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1216         MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys())))
1217         INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1218             (?P<negate>-)?
1219             (?P<fields>{FIELD_RE})
1220             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1221             (?:>(?P<strf_format>.+?))?
1222             (?P<remaining>
1223                 (?P<alternate>(?<!\\),[^|&)]+)?
1224                 (?:&(?P<replacement>.*?))?
1225                 (?:\|(?P<default>.*?))?
1226             )$''')
1227
1228         def _from_user_input(field):
1229             if field == ':':
1230                 return ...
1231             elif ':' in field:
1232                 return slice(*map(int_or_none, field.split(':')))
1233             elif int_or_none(field) is not None:
1234                 return int(field)
1235             return field
1236
1237         def _traverse_infodict(fields):
1238             fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1239                       for f in ([x] if x.startswith('{') else x.split('.'))]
1240             for i in (0, -1):
1241                 if fields and not fields[i]:
1242                     fields.pop(i)
1243
1244             for i, f in enumerate(fields):
1245                 if not f.startswith('{'):
1246                     fields[i] = _from_user_input(f)
1247                     continue
1248                 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1249                 fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
1250
1251             return traverse_obj(info_dict, fields, traverse_string=True)
1252
1253         def get_value(mdict):
1254             # Object traversal
1255             value = _traverse_infodict(mdict['fields'])
1256             # Negative
1257             if mdict['negate']:
1258                 value = float_or_none(value)
1259                 if value is not None:
1260                     value *= -1
1261             # Do maths
1262             offset_key = mdict['maths']
1263             if offset_key:
1264                 value = float_or_none(value)
1265                 operator = None
1266                 while offset_key:
1267                     item = re.match(
1268                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1269                         offset_key).group(0)
1270                     offset_key = offset_key[len(item):]
1271                     if operator is None:
1272                         operator = MATH_FUNCTIONS[item]
1273                         continue
1274                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1275                     offset = float_or_none(item)
1276                     if offset is None:
1277                         offset = float_or_none(_traverse_infodict(item))
1278                     try:
1279                         value = operator(value, multiplier * offset)
1280                     except (TypeError, ZeroDivisionError):
1281                         return None
1282                     operator = None
1283             # Datetime formatting
1284             if mdict['strf_format']:
1285                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1286
1287             # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1288             if sanitize and value == '':
1289                 value = None
1290             return value
1291
1292         na = self.params.get('outtmpl_na_placeholder', 'NA')
1293
1294         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1295             return sanitize_filename(str(value), restricted=restricted, is_id=(
1296                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1297                 if 'filename-sanitization' in self.params['compat_opts']
1298                 else NO_DEFAULT))
1299
1300         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1301         sanitize = bool(sanitize)
1302
1303         def _dumpjson_default(obj):
1304             if isinstance(obj, (set, LazyList)):
1305                 return list(obj)
1306             return repr(obj)
1307
1308         class _ReplacementFormatter(string.Formatter):
1309             def get_field(self, field_name, args, kwargs):
1310                 if field_name.isdigit():
1311                     return args[0], -1
1312                 raise ValueError('Unsupported field')
1313
1314         replacement_formatter = _ReplacementFormatter()
1315
1316         def create_key(outer_mobj):
1317             if not outer_mobj.group('has_key'):
1318                 return outer_mobj.group(0)
1319             key = outer_mobj.group('key')
1320             mobj = re.match(INTERNAL_FORMAT_RE, key)
1321             value, replacement, default, last_field = None, None, na, ''
1322             while mobj:
1323                 mobj = mobj.groupdict()
1324                 default = mobj['default'] if mobj['default'] is not None else default
1325                 value = get_value(mobj)
1326                 last_field, replacement = mobj['fields'], mobj['replacement']
1327                 if value is None and mobj['alternate']:
1328                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1329                 else:
1330                     break
1331
1332             if None not in (value, replacement):
1333                 try:
1334                     value = replacement_formatter.format(replacement, value)
1335                 except ValueError:
1336                     value, default = None, na
1337
1338             fmt = outer_mobj.group('format')
1339             if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int):
1340                 fmt = f'0{field_size_compat_map[last_field]:d}d'
1341
1342             flags = outer_mobj.group('conversion') or ''
1343             str_fmt = f'{fmt[:-1]}s'
1344             if value is None:
1345                 value, fmt = default, 's'
1346             elif fmt[-1] == 'l':  # list
1347                 delim = '\n' if '#' in flags else ', '
1348                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1349             elif fmt[-1] == 'j':  # json
1350                 value, fmt = json.dumps(
1351                     value, default=_dumpjson_default,
1352                     indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1353             elif fmt[-1] == 'h':  # html
1354                 value, fmt = escapeHTML(str(value)), str_fmt
1355             elif fmt[-1] == 'q':  # quoted
1356                 value = map(str, variadic(value) if '#' in flags else [value])
1357                 value, fmt = shell_quote(value, shell=True), str_fmt
1358             elif fmt[-1] == 'B':  # bytes
1359                 value = f'%{str_fmt}'.encode() % str(value).encode()
1360                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1361             elif fmt[-1] == 'U':  # unicode normalized
1362                 value, fmt = unicodedata.normalize(
1363                     # "+" = compatibility equivalence, "#" = NFD
1364                     'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1365                     value), str_fmt
1366             elif fmt[-1] == 'D':  # decimal suffix
1367                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1368                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1369                                               factor=1024 if '#' in flags else 1000)
1370             elif fmt[-1] == 'S':  # filename sanitization
1371                 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1372             elif fmt[-1] == 'c':
1373                 if value:
1374                     value = str(value)[0]
1375                 else:
1376                     fmt = str_fmt
1377             elif fmt[-1] not in 'rsa':  # numeric
1378                 value = float_or_none(value)
1379                 if value is None:
1380                     value, fmt = default, 's'
1381
1382             if sanitize:
1383                 # If value is an object, sanitize might convert it to a string
1384                 # So we convert it to repr first
1385                 if fmt[-1] == 'r':
1386                     value, fmt = repr(value), str_fmt
1387                 elif fmt[-1] == 'a':
1388                     value, fmt = ascii(value), str_fmt
1389                 if fmt[-1] in 'csra':
1390                     value = sanitizer(last_field, value)
1391
1392             key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
1393             TMPL_DICT[key] = value
1394             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1395
1396         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1397
1398     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1399         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1400         return self.escape_outtmpl(outtmpl) % info_dict
1401
1402     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1403         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1404         if outtmpl is None:
1405             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1406         try:
1407             outtmpl = self._outtmpl_expandpath(outtmpl)
1408             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1409             if not filename:
1410                 return None
1411
1412             if tmpl_type in ('', 'temp'):
1413                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1414                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1415                     filename = replace_extension(filename, ext, final_ext)
1416             elif tmpl_type:
1417                 force_ext = OUTTMPL_TYPES[tmpl_type]
1418                 if force_ext:
1419                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1420
1421             # https://github.com/blackjack4494/youtube-dlc/issues/85
1422             trim_file_name = self.params.get('trim_file_name', False)
1423             if trim_file_name:
1424                 no_ext, *ext = filename.rsplit('.', 2)
1425                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1426
1427             return filename
1428         except ValueError as err:
1429             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1430             return None
1431
1432     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1433         """Generate the output filename"""
1434         if outtmpl:
1435             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1436             dir_type = None
1437         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1438         if not filename and dir_type not in ('', 'temp'):
1439             return ''
1440
1441         if warn:
1442             if not self.params.get('paths'):
1443                 pass
1444             elif filename == '-':
1445                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1446             elif os.path.isabs(filename):
1447                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1448         if filename == '-' or not filename:
1449             return filename
1450
1451         return self.get_output_path(dir_type, filename)
1452
1453     def _match_entry(self, info_dict, incomplete=False, silent=False):
1454         """Returns None if the file should be downloaded"""
1455         _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1456         assert incomplete or _type == 'video', 'Only video result can be considered complete'
1457
1458         video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1459
1460         def check_filter():
1461             if _type in ('playlist', 'multi_video'):
1462                 return
1463             elif _type in ('url', 'url_transparent') and not try_call(
1464                     lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1465                 return
1466
1467             if 'title' in info_dict:
1468                 # This can happen when we're just evaluating the playlist
1469                 title = info_dict['title']
1470                 matchtitle = self.params.get('matchtitle', False)
1471                 if matchtitle:
1472                     if not re.search(matchtitle, title, re.IGNORECASE):
1473                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1474                 rejecttitle = self.params.get('rejecttitle', False)
1475                 if rejecttitle:
1476                     if re.search(rejecttitle, title, re.IGNORECASE):
1477                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1478
1479             date = info_dict.get('upload_date')
1480             if date is not None:
1481                 date_range = self.params.get('daterange', DateRange())
1482                 if date not in date_range:
1483                     return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}'
1484             view_count = info_dict.get('view_count')
1485             if view_count is not None:
1486                 min_views = self.params.get('min_views')
1487                 if min_views is not None and view_count < min_views:
1488                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1489                 max_views = self.params.get('max_views')
1490                 if max_views is not None and view_count > max_views:
1491                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1492             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1493                 return f'Skipping "{video_title}" because it is age restricted'
1494
1495             match_filter = self.params.get('match_filter')
1496             if match_filter is None:
1497                 return None
1498
1499             cancelled = None
1500             try:
1501                 try:
1502                     ret = match_filter(info_dict, incomplete=incomplete)
1503                 except TypeError:
1504                     # For backward compatibility
1505                     ret = None if incomplete else match_filter(info_dict)
1506             except DownloadCancelled as err:
1507                 if err.msg is not NO_DEFAULT:
1508                     raise
1509                 ret, cancelled = err.msg, err
1510
1511             if ret is NO_DEFAULT:
1512                 while True:
1513                     filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1514                     reply = input(self._format_screen(
1515                         f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1516                     if reply in {'y', ''}:
1517                         return None
1518                     elif reply == 'n':
1519                         if cancelled:
1520                             raise type(cancelled)(f'Skipping {video_title}')
1521                         return f'Skipping {video_title}'
1522             return ret
1523
1524         if self.in_download_archive(info_dict):
1525             reason = ''.join((
1526                 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1527                 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1528                 'has already been recorded in the archive'))
1529             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1530         else:
1531             try:
1532                 reason = check_filter()
1533             except DownloadCancelled as e:
1534                 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1535             else:
1536                 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1537         if reason is not None:
1538             if not silent:
1539                 self.to_screen('[download] ' + reason)
1540             if self.params.get(break_opt, False):
1541                 raise break_err()
1542         return reason
1543
1544     @staticmethod
1545     def add_extra_info(info_dict, extra_info):
1546         """Set the keys from extra_info in info dict if they are missing"""
1547         for key, value in extra_info.items():
1548             info_dict.setdefault(key, value)
1549
1550     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1551                      process=True, force_generic_extractor=False):
1552         """
1553         Extract and return the information dictionary of the URL
1554
1555         Arguments:
1556         @param url          URL to extract
1557
1558         Keyword arguments:
1559         @param download     Whether to download videos
1560         @param process      Whether to resolve all unresolved references (URLs, playlist items).
1561                             Must be True for download to work
1562         @param ie_key       Use only the extractor with this key
1563
1564         @param extra_info   Dictionary containing the extra values to add to the info (For internal use only)
1565         @force_generic_extractor  Force using the generic extractor (Deprecated; use ie_key='Generic')
1566         """
1567
1568         if extra_info is None:
1569             extra_info = {}
1570
1571         if not ie_key and force_generic_extractor:
1572             ie_key = 'Generic'
1573
1574         if ie_key:
1575             ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1576         else:
1577             ies = self._ies
1578
1579         for key, ie in ies.items():
1580             if not ie.suitable(url):
1581                 continue
1582
1583             if not ie.working():
1584                 self.report_warning('The program functionality for this site has been marked as broken, '
1585                                     'and will probably not work.')
1586
1587             temp_id = ie.get_temp_id(url)
1588             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1589                 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1590                                'has already been recorded in the archive')
1591                 if self.params.get('break_on_existing', False):
1592                     raise ExistingVideoReached
1593                 break
1594             return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1595         else:
1596             extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1597             self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1598                               tb=False if extractors_restricted else None)
1599
1600     def _handle_extraction_exceptions(func):
1601         @functools.wraps(func)
1602         def wrapper(self, *args, **kwargs):
1603             while True:
1604                 try:
1605                     return func(self, *args, **kwargs)
1606                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1607                     raise
1608                 except ReExtractInfo as e:
1609                     if e.expected:
1610                         self.to_screen(f'{e}; Re-extracting data')
1611                     else:
1612                         self.to_stderr('\r')
1613                         self.report_warning(f'{e}; Re-extracting data')
1614                     continue
1615                 except GeoRestrictedError as e:
1616                     msg = e.msg
1617                     if e.countries:
1618                         msg += '\nThis video is available in {}.'.format(', '.join(
1619                             map(ISO3166Utils.short2full, e.countries)))
1620                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1621                     self.report_error(msg)
1622                 except ExtractorError as e:  # An error we somewhat expected
1623                     self.report_error(str(e), e.format_traceback())
1624                 except Exception as e:
1625                     if self.params.get('ignoreerrors'):
1626                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1627                     else:
1628                         raise
1629                 break
1630         return wrapper
1631
1632     def _wait_for_video(self, ie_result={}):
1633         if (not self.params.get('wait_for_video')
1634                 or ie_result.get('_type', 'video') != 'video'
1635                 or ie_result.get('formats') or ie_result.get('url')):
1636             return
1637
1638         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1639         last_msg = ''
1640
1641         def progress(msg):
1642             nonlocal last_msg
1643             full_msg = f'{msg}\n'
1644             if not self.params.get('noprogress'):
1645                 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1646             elif last_msg:
1647                 return
1648             self.to_screen(full_msg, skip_eol=True)
1649             last_msg = msg
1650
1651         min_wait, max_wait = self.params.get('wait_for_video')
1652         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1653         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1654             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1655             self.report_warning('Release time of video is not known')
1656         elif ie_result and (diff or 0) <= 0:
1657             self.report_warning('Video should already be available according to extracted info')
1658         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1659         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1660
1661         wait_till = time.time() + diff
1662         try:
1663             while True:
1664                 diff = wait_till - time.time()
1665                 if diff <= 0:
1666                     progress('')
1667                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1668                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1669                 time.sleep(1)
1670         except KeyboardInterrupt:
1671             progress('')
1672             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1673         except BaseException as e:
1674             if not isinstance(e, ReExtractInfo):
1675                 self.to_screen('')
1676             raise
1677
1678     def _load_cookies(self, data, *, autoscope=True):
1679         """Loads cookies from a `Cookie` header
1680
1681         This tries to work around the security vulnerability of passing cookies to every domain.
1682         See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1683
1684         @param data         The Cookie header as string to load the cookies from
1685         @param autoscope    If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1686                             If `True`, save cookies for later to be stored in the jar with a limited scope
1687                             If a URL, save cookies in the jar with the domain of the URL
1688         """
1689         for cookie in LenientSimpleCookie(data).values():
1690             if autoscope and any(cookie.values()):
1691                 raise ValueError('Invalid syntax in Cookie Header')
1692
1693             domain = cookie.get('domain') or ''
1694             expiry = cookie.get('expires')
1695             if expiry == '':  # 0 is valid
1696                 expiry = None
1697             prepared_cookie = http.cookiejar.Cookie(
1698                 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1699                 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1700                 cookie.get('secure') or False, expiry, False, None, None, {})
1701
1702             if domain:
1703                 self.cookiejar.set_cookie(prepared_cookie)
1704             elif autoscope is True:
1705                 self.deprecated_feature(
1706                     'Passing cookies as a header is a potential security risk; '
1707                     'they will be scoped to the domain of the downloaded urls. '
1708                     'Please consider loading cookies from a file or browser instead.')
1709                 self.__header_cookies.append(prepared_cookie)
1710             elif autoscope:
1711                 self.report_warning(
1712                     'The extractor result contains an unscoped cookie as an HTTP header. '
1713                     f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1714                     only_once=True)
1715                 self._apply_header_cookies(autoscope, [prepared_cookie])
1716             else:
1717                 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1718                                   tb=False, is_error=False)
1719
1720     def _apply_header_cookies(self, url, cookies=None):
1721         """Applies stray header cookies to the provided url
1722
1723         This loads header cookies and scopes them to the domain provided in `url`.
1724         While this is not ideal, it helps reduce the risk of them being sent
1725         to an unintended destination while mostly maintaining compatibility.
1726         """
1727         parsed = urllib.parse.urlparse(url)
1728         if not parsed.hostname:
1729             return
1730
1731         for cookie in map(copy.copy, cookies or self.__header_cookies):
1732             cookie.domain = f'.{parsed.hostname}'
1733             self.cookiejar.set_cookie(cookie)
1734
1735     @_handle_extraction_exceptions
1736     def __extract_info(self, url, ie, download, extra_info, process):
1737         self._apply_header_cookies(url)
1738
1739         try:
1740             ie_result = ie.extract(url)
1741         except UserNotLive as e:
1742             if process:
1743                 if self.params.get('wait_for_video'):
1744                     self.report_warning(e)
1745                 self._wait_for_video()
1746             raise
1747         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1748             self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1749             return
1750         if isinstance(ie_result, list):
1751             # Backwards compatibility: old IE result format
1752             ie_result = {
1753                 '_type': 'compat_list',
1754                 'entries': ie_result,
1755             }
1756         if extra_info.get('original_url'):
1757             ie_result.setdefault('original_url', extra_info['original_url'])
1758         self.add_default_extra_info(ie_result, ie, url)
1759         if process:
1760             self._wait_for_video(ie_result)
1761             return self.process_ie_result(ie_result, download, extra_info)
1762         else:
1763             return ie_result
1764
1765     def add_default_extra_info(self, ie_result, ie, url):
1766         if url is not None:
1767             self.add_extra_info(ie_result, {
1768                 'webpage_url': url,
1769                 'original_url': url,
1770             })
1771         webpage_url = ie_result.get('webpage_url')
1772         if webpage_url:
1773             self.add_extra_info(ie_result, {
1774                 'webpage_url_basename': url_basename(webpage_url),
1775                 'webpage_url_domain': get_domain(webpage_url),
1776             })
1777         if ie is not None:
1778             self.add_extra_info(ie_result, {
1779                 'extractor': ie.IE_NAME,
1780                 'extractor_key': ie.ie_key(),
1781             })
1782
1783     def process_ie_result(self, ie_result, download=True, extra_info=None):
1784         """
1785         Take the result of the ie(may be modified) and resolve all unresolved
1786         references (URLs, playlist items).
1787
1788         It will also download the videos if 'download'.
1789         Returns the resolved ie_result.
1790         """
1791         if extra_info is None:
1792             extra_info = {}
1793         result_type = ie_result.get('_type', 'video')
1794
1795         if result_type in ('url', 'url_transparent'):
1796             ie_result['url'] = sanitize_url(
1797                 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1798             if ie_result.get('original_url') and not extra_info.get('original_url'):
1799                 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1800
1801             extract_flat = self.params.get('extract_flat', False)
1802             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1803                     or extract_flat is True):
1804                 info_copy = ie_result.copy()
1805                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1806                 if ie and not ie_result.get('id'):
1807                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1808                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1809                 self.add_extra_info(info_copy, extra_info)
1810                 info_copy, _ = self.pre_process(info_copy)
1811                 self._fill_common_fields(info_copy, False)
1812                 self.__forced_printings(info_copy)
1813                 self._raise_pending_errors(info_copy)
1814                 if self.params.get('force_write_download_archive', False):
1815                     self.record_download_archive(info_copy)
1816                 return ie_result
1817
1818         if result_type == 'video':
1819             self.add_extra_info(ie_result, extra_info)
1820             ie_result = self.process_video_result(ie_result, download=download)
1821             self._raise_pending_errors(ie_result)
1822             additional_urls = (ie_result or {}).get('additional_urls')
1823             if additional_urls:
1824                 # TODO: Improve MetadataParserPP to allow setting a list
1825                 if isinstance(additional_urls, str):
1826                     additional_urls = [additional_urls]
1827                 self.to_screen(
1828                     '[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls)))
1829                 self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls)))
1830                 ie_result['additional_entries'] = [
1831                     self.extract_info(
1832                         url, download, extra_info=extra_info,
1833                         force_generic_extractor=self.params.get('force_generic_extractor'))
1834                     for url in additional_urls
1835                 ]
1836             return ie_result
1837         elif result_type == 'url':
1838             # We have to add extra_info to the results because it may be
1839             # contained in a playlist
1840             return self.extract_info(
1841                 ie_result['url'], download,
1842                 ie_key=ie_result.get('ie_key'),
1843                 extra_info=extra_info)
1844         elif result_type == 'url_transparent':
1845             # Use the information from the embedding page
1846             info = self.extract_info(
1847                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1848                 extra_info=extra_info, download=False, process=False)
1849
1850             # extract_info may return None when ignoreerrors is enabled and
1851             # extraction failed with an error, don't crash and return early
1852             # in this case
1853             if not info:
1854                 return info
1855
1856             exempted_fields = {'_type', 'url', 'ie_key'}
1857             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1858                 # For video clips, the id etc of the clip extractor should be used
1859                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1860
1861             new_result = info.copy()
1862             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1863
1864             # Extracted info may not be a video result (i.e.
1865             # info.get('_type', 'video') != video) but rather an url or
1866             # url_transparent. In such cases outer metadata (from ie_result)
1867             # should be propagated to inner one (info). For this to happen
1868             # _type of info should be overridden with url_transparent. This
1869             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1870             if new_result.get('_type') == 'url':
1871                 new_result['_type'] = 'url_transparent'
1872
1873             return self.process_ie_result(
1874                 new_result, download=download, extra_info=extra_info)
1875         elif result_type in ('playlist', 'multi_video'):
1876             # Protect from infinite recursion due to recursively nested playlists
1877             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1878             webpage_url = ie_result.get('webpage_url')  # Playlists maynot have webpage_url
1879             if webpage_url and webpage_url in self._playlist_urls:
1880                 self.to_screen(
1881                     '[download] Skipping already downloaded playlist: {}'.format(
1882                         ie_result.get('title')) or ie_result.get('id'))
1883                 return
1884
1885             self._playlist_level += 1
1886             self._playlist_urls.add(webpage_url)
1887             self._fill_common_fields(ie_result, False)
1888             self._sanitize_thumbnails(ie_result)
1889             try:
1890                 return self.__process_playlist(ie_result, download)
1891             finally:
1892                 self._playlist_level -= 1
1893                 if not self._playlist_level:
1894                     self._playlist_urls.clear()
1895         elif result_type == 'compat_list':
1896             self.report_warning(
1897                 'Extractor {} returned a compat_list result. '
1898                 'It needs to be updated.'.format(ie_result.get('extractor')))
1899
1900             def _fixup(r):
1901                 self.add_extra_info(r, {
1902                     'extractor': ie_result['extractor'],
1903                     'webpage_url': ie_result['webpage_url'],
1904                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1905                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1906                     'extractor_key': ie_result['extractor_key'],
1907                 })
1908                 return r
1909             ie_result['entries'] = [
1910                 self.process_ie_result(_fixup(r), download, extra_info)
1911                 for r in ie_result['entries']
1912             ]
1913             return ie_result
1914         else:
1915             raise Exception(f'Invalid result type: {result_type}')
1916
1917     def _ensure_dir_exists(self, path):
1918         return make_dir(path, self.report_error)
1919
1920     @staticmethod
1921     def _playlist_infodict(ie_result, strict=False, **kwargs):
1922         info = {
1923             'playlist_count': ie_result.get('playlist_count'),
1924             'playlist': ie_result.get('title') or ie_result.get('id'),
1925             'playlist_id': ie_result.get('id'),
1926             'playlist_title': ie_result.get('title'),
1927             'playlist_uploader': ie_result.get('uploader'),
1928             'playlist_uploader_id': ie_result.get('uploader_id'),
1929             **kwargs,
1930         }
1931         if strict:
1932             return info
1933         if ie_result.get('webpage_url'):
1934             info.update({
1935                 'webpage_url': ie_result['webpage_url'],
1936                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1937                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1938             })
1939         return {
1940             **info,
1941             'playlist_index': 0,
1942             '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1943             'extractor': ie_result['extractor'],
1944             'extractor_key': ie_result['extractor_key'],
1945         }
1946
1947     def __process_playlist(self, ie_result, download):
1948         """Process each entry in the playlist"""
1949         assert ie_result['_type'] in ('playlist', 'multi_video')
1950
1951         common_info = self._playlist_infodict(ie_result, strict=True)
1952         title = common_info.get('playlist') or '<Untitled>'
1953         if self._match_entry(common_info, incomplete=True) is not None:
1954             return
1955         self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1956
1957         all_entries = PlaylistEntries(self, ie_result)
1958         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1959
1960         lazy = self.params.get('lazy_playlist')
1961         if lazy:
1962             resolved_entries, n_entries = [], 'N/A'
1963             ie_result['requested_entries'], ie_result['entries'] = None, None
1964         else:
1965             entries = resolved_entries = list(entries)
1966             n_entries = len(resolved_entries)
1967             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1968         if not ie_result.get('playlist_count'):
1969             # Better to do this after potentially exhausting entries
1970             ie_result['playlist_count'] = all_entries.get_full_count()
1971
1972         extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1973         ie_copy = collections.ChainMap(ie_result, extra)
1974
1975         _infojson_written = False
1976         write_playlist_files = self.params.get('allow_playlist_files', True)
1977         if write_playlist_files and self.params.get('list_thumbnails'):
1978             self.list_thumbnails(ie_result)
1979         if write_playlist_files and not self.params.get('simulate'):
1980             _infojson_written = self._write_info_json(
1981                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1982             if _infojson_written is None:
1983                 return
1984             if self._write_description('playlist', ie_result,
1985                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1986                 return
1987             # TODO: This should be passed to ThumbnailsConvertor if necessary
1988             self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1989
1990         if lazy:
1991             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1992                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1993         elif self.params.get('playlistreverse'):
1994             entries.reverse()
1995         elif self.params.get('playlistrandom'):
1996             random.shuffle(entries)
1997
1998         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1999                        f'{format_field(ie_result, "playlist_count", " of %s")}')
2000
2001         keep_resolved_entries = self.params.get('extract_flat') != 'discard'
2002         if self.params.get('extract_flat') == 'discard_in_playlist':
2003             keep_resolved_entries = ie_result['_type'] != 'playlist'
2004         if keep_resolved_entries:
2005             self.write_debug('The information of all playlist entries will be held in memory')
2006
2007         failures = 0
2008         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
2009         for i, (playlist_index, entry) in enumerate(entries):
2010             if lazy:
2011                 resolved_entries.append((playlist_index, entry))
2012             if not entry:
2013                 continue
2014
2015             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
2016             if not lazy and 'playlist-index' in self.params['compat_opts']:
2017                 playlist_index = ie_result['requested_entries'][i]
2018
2019             entry_copy = collections.ChainMap(entry, {
2020                 **common_info,
2021                 'n_entries': int_or_none(n_entries),
2022                 'playlist_index': playlist_index,
2023                 'playlist_autonumber': i + 1,
2024             })
2025
2026             if self._match_entry(entry_copy, incomplete=True) is not None:
2027                 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
2028                 resolved_entries[i] = (playlist_index, NO_DEFAULT)
2029                 continue
2030
2031             self.to_screen(
2032                 f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} '
2033                 f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}')
2034
2035             entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2036                 'playlist_index': playlist_index,
2037                 'playlist_autonumber': i + 1,
2038             }, extra))
2039             if not entry_result:
2040                 failures += 1
2041             if failures >= max_failures:
2042                 self.report_error(
2043                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2044                 break
2045             if keep_resolved_entries:
2046                 resolved_entries[i] = (playlist_index, entry_result)
2047
2048         # Update with processed data
2049         ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2050         ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2051         if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2052             # Do not set for full playlist
2053             ie_result.pop('requested_entries')
2054
2055         # Write the updated info to json
2056         if _infojson_written is True and self._write_info_json(
2057                 'updated playlist', ie_result,
2058                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2059             return
2060
2061         ie_result = self.run_all_pps('playlist', ie_result)
2062         self.to_screen(f'[download] Finished downloading playlist: {title}')
2063         return ie_result
2064
2065     @_handle_extraction_exceptions
2066     def __process_iterable_entry(self, entry, download, extra_info):
2067         return self.process_ie_result(
2068             entry, download=download, extra_info=extra_info)
2069
2070     def _build_format_filter(self, filter_spec):
2071         " Returns a function to filter the formats according to the filter_spec "
2072
2073         OPERATORS = {
2074             '<': operator.lt,
2075             '<=': operator.le,
2076             '>': operator.gt,
2077             '>=': operator.ge,
2078             '=': operator.eq,
2079             '!=': operator.ne,
2080         }
2081         operator_rex = re.compile(r'''(?x)\s*
2082             (?P<key>[\w.-]+)\s*
2083             (?P<op>{})(?P<none_inclusive>\s*\?)?\s*
2084             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2085             '''.format('|'.join(map(re.escape, OPERATORS.keys()))))
2086         m = operator_rex.fullmatch(filter_spec)
2087         if m:
2088             try:
2089                 comparison_value = int(m.group('value'))
2090             except ValueError:
2091                 comparison_value = parse_filesize(m.group('value'))
2092                 if comparison_value is None:
2093                     comparison_value = parse_filesize(m.group('value') + 'B')
2094                 if comparison_value is None:
2095                     raise ValueError(
2096                         'Invalid value {!r} in format specification {!r}'.format(
2097                             m.group('value'), filter_spec))
2098             op = OPERATORS[m.group('op')]
2099
2100         if not m:
2101             STR_OPERATORS = {
2102                 '=': operator.eq,
2103                 '^=': lambda attr, value: attr.startswith(value),
2104                 '$=': lambda attr, value: attr.endswith(value),
2105                 '*=': lambda attr, value: value in attr,
2106                 '~=': lambda attr, value: value.search(attr) is not None,
2107             }
2108             str_operator_rex = re.compile(r'''(?x)\s*
2109                 (?P<key>[a-zA-Z0-9._-]+)\s*
2110                 (?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)?
2111                 (?P<quote>["'])?
2112                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2113                 (?(quote)(?P=quote))\s*
2114                 '''.format('|'.join(map(re.escape, STR_OPERATORS.keys()))))
2115             m = str_operator_rex.fullmatch(filter_spec)
2116             if m:
2117                 if m.group('op') == '~=':
2118                     comparison_value = re.compile(m.group('value'))
2119                 else:
2120                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2121                 str_op = STR_OPERATORS[m.group('op')]
2122                 if m.group('negation'):
2123                     op = lambda attr, value: not str_op(attr, value)
2124                 else:
2125                     op = str_op
2126
2127         if not m:
2128             raise SyntaxError(f'Invalid filter specification {filter_spec!r}')
2129
2130         def _filter(f):
2131             actual_value = f.get(m.group('key'))
2132             if actual_value is None:
2133                 return m.group('none_inclusive')
2134             return op(actual_value, comparison_value)
2135         return _filter
2136
2137     def _check_formats(self, formats):
2138         for f in formats:
2139             working = f.get('__working')
2140             if working is not None:
2141                 if working:
2142                     yield f
2143                 continue
2144             self.to_screen('[info] Testing format {}'.format(f['format_id']))
2145             path = self.get_output_path('temp')
2146             if not self._ensure_dir_exists(f'{path}/'):
2147                 continue
2148             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2149             temp_file.close()
2150             try:
2151                 success, _ = self.dl(temp_file.name, f, test=True)
2152             except (DownloadError, OSError, ValueError, *network_exceptions):
2153                 success = False
2154             finally:
2155                 if os.path.exists(temp_file.name):
2156                     try:
2157                         os.remove(temp_file.name)
2158                     except OSError:
2159                         self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
2160             f['__working'] = success
2161             if success:
2162                 yield f
2163             else:
2164                 self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))
2165
2166     def _select_formats(self, formats, selector):
2167         return list(selector({
2168             'formats': formats,
2169             'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2170             'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
2171                                    or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
2172         }))
2173
2174     def _default_format_spec(self, info_dict, download=True):
2175         download = download and not self.params.get('simulate')
2176         prefer_best = download and (
2177             self.params['outtmpl']['default'] == '-'
2178             or info_dict.get('is_live') and not self.params.get('live_from_start'))
2179
2180         def can_merge():
2181             merger = FFmpegMergerPP(self)
2182             return merger.available and merger.can_merge()
2183
2184         if not prefer_best and download and not can_merge():
2185             prefer_best = True
2186             formats = self._get_formats(info_dict)
2187             evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
2188             if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
2189                 self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
2190                                     'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
2191
2192         compat = (self.params.get('allow_multiple_audio_streams')
2193                   or 'format-spec' in self.params['compat_opts'])
2194
2195         return ('best/bestvideo+bestaudio' if prefer_best
2196                 else 'bestvideo+bestaudio/best' if compat
2197                 else 'bestvideo*+bestaudio/best')
2198
2199     def build_format_selector(self, format_spec):
2200         def syntax_error(note, start):
2201             message = (
2202                 'Invalid format specification: '
2203                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2204             return SyntaxError(message)
2205
2206         PICKFIRST = 'PICKFIRST'
2207         MERGE = 'MERGE'
2208         SINGLE = 'SINGLE'
2209         GROUP = 'GROUP'
2210         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2211
2212         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2213                                   'video': self.params.get('allow_multiple_video_streams', False)}
2214
2215         def _parse_filter(tokens):
2216             filter_parts = []
2217             for type_, string_, _start, _, _ in tokens:
2218                 if type_ == tokenize.OP and string_ == ']':
2219                     return ''.join(filter_parts)
2220                 else:
2221                     filter_parts.append(string_)
2222
2223         def _remove_unused_ops(tokens):
2224             # Remove operators that we don't use and join them with the surrounding strings.
2225             # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2226             ALLOWED_OPS = ('/', '+', ',', '(', ')')
2227             last_string, last_start, last_end, last_line = None, None, None, None
2228             for type_, string_, start, end, line in tokens:
2229                 if type_ == tokenize.OP and string_ == '[':
2230                     if last_string:
2231                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2232                         last_string = None
2233                     yield type_, string_, start, end, line
2234                     # everything inside brackets will be handled by _parse_filter
2235                     for type_, string_, start, end, line in tokens:
2236                         yield type_, string_, start, end, line
2237                         if type_ == tokenize.OP and string_ == ']':
2238                             break
2239                 elif type_ == tokenize.OP and string_ in ALLOWED_OPS:
2240                     if last_string:
2241                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2242                         last_string = None
2243                     yield type_, string_, start, end, line
2244                 elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2245                     if not last_string:
2246                         last_string = string_
2247                         last_start = start
2248                         last_end = end
2249                     else:
2250                         last_string += string_
2251             if last_string:
2252                 yield tokenize.NAME, last_string, last_start, last_end, last_line
2253
2254         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2255             selectors = []
2256             current_selector = None
2257             for type_, string_, start, _, _ in tokens:
2258                 # ENCODING is only defined in Python 3.x
2259                 if type_ == getattr(tokenize, 'ENCODING', None):
2260                     continue
2261                 elif type_ in [tokenize.NAME, tokenize.NUMBER]:
2262                     current_selector = FormatSelector(SINGLE, string_, [])
2263                 elif type_ == tokenize.OP:
2264                     if string_ == ')':
2265                         if not inside_group:
2266                             # ')' will be handled by the parentheses group
2267                             tokens.restore_last_token()
2268                         break
2269                     elif inside_merge and string_ in ['/', ',']:
2270                         tokens.restore_last_token()
2271                         break
2272                     elif inside_choice and string_ == ',':
2273                         tokens.restore_last_token()
2274                         break
2275                     elif string_ == ',':
2276                         if not current_selector:
2277                             raise syntax_error('"," must follow a format selector', start)
2278                         selectors.append(current_selector)
2279                         current_selector = None
2280                     elif string_ == '/':
2281                         if not current_selector:
2282                             raise syntax_error('"/" must follow a format selector', start)
2283                         first_choice = current_selector
2284                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2285                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2286                     elif string_ == '[':
2287                         if not current_selector:
2288                             current_selector = FormatSelector(SINGLE, 'best', [])
2289                         format_filter = _parse_filter(tokens)
2290                         current_selector.filters.append(format_filter)
2291                     elif string_ == '(':
2292                         if current_selector:
2293                             raise syntax_error('Unexpected "("', start)
2294                         group = _parse_format_selection(tokens, inside_group=True)
2295                         current_selector = FormatSelector(GROUP, group, [])
2296                     elif string_ == '+':
2297                         if not current_selector:
2298                             raise syntax_error('Unexpected "+"', start)
2299                         selector_1 = current_selector
2300                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2301                         if not selector_2:
2302                             raise syntax_error('Expected a selector', start)
2303                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2304                     else:
2305                         raise syntax_error(f'Operator not recognized: "{string_}"', start)
2306                 elif type_ == tokenize.ENDMARKER:
2307                     break
2308             if current_selector:
2309                 selectors.append(current_selector)
2310             return selectors
2311
2312         def _merge(formats_pair):
2313             format_1, format_2 = formats_pair
2314
2315             formats_info = []
2316             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2317             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2318
2319             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2320                 get_no_more = {'video': False, 'audio': False}
2321                 for (i, fmt_info) in enumerate(formats_info):
2322                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2323                         formats_info.pop(i)
2324                         continue
2325                     for aud_vid in ['audio', 'video']:
2326                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2327                             if get_no_more[aud_vid]:
2328                                 formats_info.pop(i)
2329                                 break
2330                             get_no_more[aud_vid] = True
2331
2332             if len(formats_info) == 1:
2333                 return formats_info[0]
2334
2335             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2336             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2337
2338             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2339             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2340
2341             output_ext = get_compatible_ext(
2342                 vcodecs=[f.get('vcodec') for f in video_fmts],
2343                 acodecs=[f.get('acodec') for f in audio_fmts],
2344                 vexts=[f['ext'] for f in video_fmts],
2345                 aexts=[f['ext'] for f in audio_fmts],
2346                 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2347                              or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2348
2349             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2350
2351             new_dict = {
2352                 'requested_formats': formats_info,
2353                 'format': '+'.join(filtered('format')),
2354                 'format_id': '+'.join(filtered('format_id')),
2355                 'ext': output_ext,
2356                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2357                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2358                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2359                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2360                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2361             }
2362
2363             if the_only_video:
2364                 new_dict.update({
2365                     'width': the_only_video.get('width'),
2366                     'height': the_only_video.get('height'),
2367                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2368                     'fps': the_only_video.get('fps'),
2369                     'dynamic_range': the_only_video.get('dynamic_range'),
2370                     'vcodec': the_only_video.get('vcodec'),
2371                     'vbr': the_only_video.get('vbr'),
2372                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2373                     'aspect_ratio': the_only_video.get('aspect_ratio'),
2374                 })
2375
2376             if the_only_audio:
2377                 new_dict.update({
2378                     'acodec': the_only_audio.get('acodec'),
2379                     'abr': the_only_audio.get('abr'),
2380                     'asr': the_only_audio.get('asr'),
2381                     'audio_channels': the_only_audio.get('audio_channels'),
2382                 })
2383
2384             return new_dict
2385
2386         def _check_formats(formats):
2387             if self.params.get('check_formats') == 'selected':
2388                 yield from self._check_formats(formats)
2389                 return
2390             elif (self.params.get('check_formats') is not None
2391                     or self.params.get('allow_unplayable_formats')):
2392                 yield from formats
2393                 return
2394
2395             for f in formats:
2396                 if f.get('has_drm') or f.get('__needs_testing'):
2397                     yield from self._check_formats([f])
2398                 else:
2399                     yield f
2400
2401         def _build_selector_function(selector):
2402             if isinstance(selector, list):  # ,
2403                 fs = [_build_selector_function(s) for s in selector]
2404
2405                 def selector_function(ctx):
2406                     for f in fs:
2407                         yield from f(ctx)
2408                 return selector_function
2409
2410             elif selector.type == GROUP:  # ()
2411                 selector_function = _build_selector_function(selector.selector)
2412
2413             elif selector.type == PICKFIRST:  # /
2414                 fs = [_build_selector_function(s) for s in selector.selector]
2415
2416                 def selector_function(ctx):
2417                     for f in fs:
2418                         picked_formats = list(f(ctx))
2419                         if picked_formats:
2420                             return picked_formats
2421                     return []
2422
2423             elif selector.type == MERGE:  # +
2424                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2425
2426                 def selector_function(ctx):
2427                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2428                         yield _merge(pair)
2429
2430             elif selector.type == SINGLE:  # atom
2431                 format_spec = selector.selector or 'best'
2432
2433                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2434                 if format_spec == 'all':
2435                     def selector_function(ctx):
2436                         yield from _check_formats(ctx['formats'][::-1])
2437                 elif format_spec == 'mergeall':
2438                     def selector_function(ctx):
2439                         formats = list(_check_formats(
2440                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2441                         if not formats:
2442                             return
2443                         merged_format = formats[-1]
2444                         for f in formats[-2::-1]:
2445                             merged_format = _merge((merged_format, f))
2446                         yield merged_format
2447
2448                 else:
2449                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2450                     mobj = re.match(
2451                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2452                         format_spec)
2453                     if mobj is not None:
2454                         format_idx = int_or_none(mobj.group('n'), default=1)
2455                         format_reverse = mobj.group('bw')[0] == 'b'
2456                         format_type = (mobj.group('type') or [None])[0]
2457                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2458                         format_modified = mobj.group('mod') is not None
2459
2460                         format_fallback = not format_type and not format_modified  # for b, w
2461                         _filter_f = (
2462                             (lambda f: f.get(f'{format_type}codec') != 'none')
2463                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2464                             else (lambda f: f.get(f'{not_format_type}codec') == 'none')
2465                             if format_type  # bv, ba, wv, wa
2466                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2467                             if not format_modified  # b, w
2468                             else lambda f: True)  # b*, w*
2469                         filter_f = lambda f: _filter_f(f) and (
2470                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2471                     else:
2472                         if format_spec in self._format_selection_exts['audio']:
2473                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2474                         elif format_spec in self._format_selection_exts['video']:
2475                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2476                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2477                         elif format_spec in self._format_selection_exts['storyboards']:
2478                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2479                         else:
2480                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2481
2482                     def selector_function(ctx):
2483                         formats = list(ctx['formats'])
2484                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2485                         if not matches:
2486                             if format_fallback and ctx['incomplete_formats']:
2487                                 # for extractors with incomplete formats (audio only (soundcloud)
2488                                 # or video only (imgur)) best/worst will fallback to
2489                                 # best/worst {video,audio}-only format
2490                                 matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
2491                             elif seperate_fallback and not ctx['has_merged_format']:
2492                                 # for compatibility with youtube-dl when there is no pre-merged format
2493                                 matches = list(filter(seperate_fallback, formats))
2494                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2495                         try:
2496                             yield matches[format_idx - 1]
2497                         except LazyList.IndexError:
2498                             return
2499
2500             filters = [self._build_format_filter(f) for f in selector.filters]
2501
2502             def final_selector(ctx):
2503                 ctx_copy = dict(ctx)
2504                 for _filter in filters:
2505                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2506                 return selector_function(ctx_copy)
2507             return final_selector
2508
2509         # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
2510         #       Prefix numbers with random letters to avoid it being classified as a number
2511         #       See: https://github.com/yt-dlp/yt-dlp/pulls/8797
2512         # TODO: Implement parser not reliant on tokenize.tokenize
2513         prefix = ''.join(random.choices(string.ascii_letters, k=32))
2514         stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
2515         try:
2516             tokens = list(_remove_unused_ops(
2517                 token._replace(string=token.string.replace(prefix, ''))
2518                 for token in tokenize.tokenize(stream.readline)))
2519         except tokenize.TokenError:
2520             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2521
2522         class TokenIterator:
2523             def __init__(self, tokens):
2524                 self.tokens = tokens
2525                 self.counter = 0
2526
2527             def __iter__(self):
2528                 return self
2529
2530             def __next__(self):
2531                 if self.counter >= len(self.tokens):
2532                     raise StopIteration
2533                 value = self.tokens[self.counter]
2534                 self.counter += 1
2535                 return value
2536
2537             next = __next__
2538
2539             def restore_last_token(self):
2540                 self.counter -= 1
2541
2542         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2543         return _build_selector_function(parsed_selector)
2544
2545     def _calc_headers(self, info_dict, load_cookies=False):
2546         res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2547         clean_headers(res)
2548
2549         if load_cookies:  # For --load-info-json
2550             self._load_cookies(res.get('Cookie'), autoscope=info_dict['url'])  # compat
2551             self._load_cookies(info_dict.get('cookies'), autoscope=False)
2552         # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2553         # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2554         res.pop('Cookie', None)
2555         cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2556         if cookies:
2557             encoder = LenientSimpleCookie()
2558             values = []
2559             for cookie in cookies:
2560                 _, value = encoder.value_encode(cookie.value)
2561                 values.append(f'{cookie.name}={value}')
2562                 if cookie.domain:
2563                     values.append(f'Domain={cookie.domain}')
2564                 if cookie.path:
2565                     values.append(f'Path={cookie.path}')
2566                 if cookie.secure:
2567                     values.append('Secure')
2568                 if cookie.expires:
2569                     values.append(f'Expires={cookie.expires}')
2570                 if cookie.version:
2571                     values.append(f'Version={cookie.version}')
2572             info_dict['cookies'] = '; '.join(values)
2573
2574         if 'X-Forwarded-For' not in res:
2575             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2576             if x_forwarded_for_ip:
2577                 res['X-Forwarded-For'] = x_forwarded_for_ip
2578
2579         return res
2580
2581     def _calc_cookies(self, url):
2582         self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2583         return self.cookiejar.get_cookie_header(url)
2584
2585     def _sort_thumbnails(self, thumbnails):
2586         thumbnails.sort(key=lambda t: (
2587             t.get('preference') if t.get('preference') is not None else -1,
2588             t.get('width') if t.get('width') is not None else -1,
2589             t.get('height') if t.get('height') is not None else -1,
2590             t.get('id') if t.get('id') is not None else '',
2591             t.get('url')))
2592
2593     def _sanitize_thumbnails(self, info_dict):
2594         thumbnails = info_dict.get('thumbnails')
2595         if thumbnails is None:
2596             thumbnail = info_dict.get('thumbnail')
2597             if thumbnail:
2598                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2599         if not thumbnails:
2600             return
2601
2602         def check_thumbnails(thumbnails):
2603             for t in thumbnails:
2604                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2605                 try:
2606                     self.urlopen(HEADRequest(t['url']))
2607                 except network_exceptions as err:
2608                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2609                     continue
2610                 yield t
2611
2612         self._sort_thumbnails(thumbnails)
2613         for i, t in enumerate(thumbnails):
2614             if t.get('id') is None:
2615                 t['id'] = str(i)
2616             if t.get('width') and t.get('height'):
2617                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2618             t['url'] = sanitize_url(t['url'])
2619
2620         if self.params.get('check_formats') is True:
2621             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2622         else:
2623             info_dict['thumbnails'] = thumbnails
2624
2625     def _fill_common_fields(self, info_dict, final=True):
2626         # TODO: move sanitization here
2627         if final:
2628             title = info_dict['fulltitle'] = info_dict.get('title')
2629             if not title:
2630                 if title == '':
2631                     self.write_debug('Extractor gave empty title. Creating a generic title')
2632                 else:
2633                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2634                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2635
2636         if info_dict.get('duration') is not None:
2637             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2638
2639         for ts_key, date_key in (
2640                 ('timestamp', 'upload_date'),
2641                 ('release_timestamp', 'release_date'),
2642                 ('modified_timestamp', 'modified_date'),
2643         ):
2644             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2645                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2646                 # see http://bugs.python.org/issue1646728)
2647                 with contextlib.suppress(ValueError, OverflowError, OSError):
2648                     upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc)
2649                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2650
2651         if not info_dict.get('release_year'):
2652             info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
2653
2654         live_keys = ('is_live', 'was_live')
2655         live_status = info_dict.get('live_status')
2656         if live_status is None:
2657             for key in live_keys:
2658                 if info_dict.get(key) is False:
2659                     continue
2660                 if info_dict.get(key):
2661                     live_status = key
2662                 break
2663             if all(info_dict.get(key) is False for key in live_keys):
2664                 live_status = 'not_live'
2665         if live_status:
2666             info_dict['live_status'] = live_status
2667             for key in live_keys:
2668                 if info_dict.get(key) is None:
2669                     info_dict[key] = (live_status == key)
2670         if live_status == 'post_live':
2671             info_dict['was_live'] = True
2672
2673         # Auto generate title fields corresponding to the *_number fields when missing
2674         # in order to always have clean titles. This is very common for TV series.
2675         for field in ('chapter', 'season', 'episode'):
2676             if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field):
2677                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number'])
2678
2679         for old_key, new_key in self._deprecated_multivalue_fields.items():
2680             if new_key in info_dict and old_key in info_dict:
2681                 if '_version' not in info_dict:  # HACK: Do not warn when using --load-info-json
2682                     self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
2683             elif old_value := info_dict.get(old_key):
2684                 info_dict[new_key] = old_value.split(', ')
2685             elif new_value := info_dict.get(new_key):
2686                 info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
2687
2688     def _raise_pending_errors(self, info):
2689         err = info.pop('__pending_error', None)
2690         if err:
2691             self.report_error(err, tb=False)
2692
2693     def sort_formats(self, info_dict):
2694         formats = self._get_formats(info_dict)
2695         formats.sort(key=FormatSorter(
2696             self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2697
2698     def process_video_result(self, info_dict, download=True):
2699         assert info_dict.get('_type', 'video') == 'video'
2700         self._num_videos += 1
2701
2702         if 'id' not in info_dict:
2703             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2704         elif not info_dict.get('id'):
2705             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2706
2707         def report_force_conversion(field, field_not, conversion):
2708             self.report_warning(
2709                 f'"{field}" field is not {field_not} - forcing {conversion} conversion, '
2710                 'there is an error in extractor')
2711
2712         def sanitize_string_field(info, string_field):
2713             field = info.get(string_field)
2714             if field is None or isinstance(field, str):
2715                 return
2716             report_force_conversion(string_field, 'a string', 'string')
2717             info[string_field] = str(field)
2718
2719         def sanitize_numeric_fields(info):
2720             for numeric_field in self._NUMERIC_FIELDS:
2721                 field = info.get(numeric_field)
2722                 if field is None or isinstance(field, (int, float)):
2723                     continue
2724                 report_force_conversion(numeric_field, 'numeric', 'int')
2725                 info[numeric_field] = int_or_none(field)
2726
2727         sanitize_string_field(info_dict, 'id')
2728         sanitize_numeric_fields(info_dict)
2729         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2730             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2731         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2732             self.report_warning('"duration" field is negative, there is an error in extractor')
2733
2734         chapters = info_dict.get('chapters') or []
2735         if chapters and chapters[0].get('start_time'):
2736             chapters.insert(0, {'start_time': 0})
2737
2738         dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2739         for idx, (prev, current, next_) in enumerate(zip(
2740                 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2741             if current.get('start_time') is None:
2742                 current['start_time'] = prev.get('end_time')
2743             if not current.get('end_time'):
2744                 current['end_time'] = next_.get('start_time')
2745             if not current.get('title'):
2746                 current['title'] = f'<Untitled Chapter {idx}>'
2747
2748         if 'playlist' not in info_dict:
2749             # It isn't part of a playlist
2750             info_dict['playlist'] = None
2751             info_dict['playlist_index'] = None
2752
2753         self._sanitize_thumbnails(info_dict)
2754
2755         thumbnail = info_dict.get('thumbnail')
2756         thumbnails = info_dict.get('thumbnails')
2757         if thumbnail:
2758             info_dict['thumbnail'] = sanitize_url(thumbnail)
2759         elif thumbnails:
2760             info_dict['thumbnail'] = thumbnails[-1]['url']
2761
2762         if info_dict.get('display_id') is None and 'id' in info_dict:
2763             info_dict['display_id'] = info_dict['id']
2764
2765         self._fill_common_fields(info_dict)
2766
2767         for cc_kind in ('subtitles', 'automatic_captions'):
2768             cc = info_dict.get(cc_kind)
2769             if cc:
2770                 for _, subtitle in cc.items():
2771                     for subtitle_format in subtitle:
2772                         if subtitle_format.get('url'):
2773                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2774                         if subtitle_format.get('ext') is None:
2775                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2776
2777         automatic_captions = info_dict.get('automatic_captions')
2778         subtitles = info_dict.get('subtitles')
2779
2780         info_dict['requested_subtitles'] = self.process_subtitles(
2781             info_dict['id'], subtitles, automatic_captions)
2782
2783         formats = self._get_formats(info_dict)
2784
2785         # Backward compatibility with InfoExtractor._sort_formats
2786         field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2787         if field_preference:
2788             info_dict['_format_sort_fields'] = field_preference
2789
2790         info_dict['_has_drm'] = any(  # or None ensures --clean-infojson removes it
2791             f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2792         if not self.params.get('allow_unplayable_formats'):
2793             formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2794
2795         if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2796             self.report_warning(
2797                 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2798                 'only images are available for download. Use --list-formats to see them'.capitalize())
2799
2800         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2801         if not get_from_start:
2802             info_dict['title'] += ' ' + dt.datetime.now().strftime('%Y-%m-%d %H:%M')
2803         if info_dict.get('is_live') and formats:
2804             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2805             if get_from_start and not formats:
2806                 self.raise_no_formats(info_dict, msg=(
2807                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2808                     'If you want to download from the current time, use --no-live-from-start'))
2809
2810         def is_wellformed(f):
2811             url = f.get('url')
2812             if not url:
2813                 self.report_warning(
2814                     '"url" field is missing or empty - skipping format, '
2815                     'there is an error in extractor')
2816                 return False
2817             if isinstance(url, bytes):
2818                 sanitize_string_field(f, 'url')
2819             return True
2820
2821         # Filter out malformed formats for better extraction robustness
2822         formats = list(filter(is_wellformed, formats or []))
2823
2824         if not formats:
2825             self.raise_no_formats(info_dict)
2826
2827         for fmt in formats:
2828             sanitize_string_field(fmt, 'format_id')
2829             sanitize_numeric_fields(fmt)
2830             fmt['url'] = sanitize_url(fmt['url'])
2831             if fmt.get('ext') is None:
2832                 fmt['ext'] = determine_ext(fmt['url']).lower()
2833             if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
2834                 if fmt.get('acodec') is None:
2835                     fmt['acodec'] = fmt['ext']
2836             if fmt.get('protocol') is None:
2837                 fmt['protocol'] = determine_protocol(fmt)
2838             if fmt.get('resolution') is None:
2839                 fmt['resolution'] = self.format_resolution(fmt, default=None)
2840             if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':
2841                 fmt['dynamic_range'] = 'SDR'
2842             if fmt.get('aspect_ratio') is None:
2843                 fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2))
2844             # For fragmented formats, "tbr" is often max bitrate and not average
2845             if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url'))
2846                     and not fmt.get('filesize') and not fmt.get('filesize_approx')):
2847                 fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration'))
2848             fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True)
2849
2850         # Safeguard against old/insecure infojson when using --load-info-json
2851         if info_dict.get('http_headers'):
2852             info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2853             info_dict['http_headers'].pop('Cookie', None)
2854
2855         # This is copied to http_headers by the above _calc_headers and can now be removed
2856         if '__x_forwarded_for_ip' in info_dict:
2857             del info_dict['__x_forwarded_for_ip']
2858
2859         self.sort_formats({
2860             'formats': formats,
2861             '_format_sort_fields': info_dict.get('_format_sort_fields'),
2862         })
2863
2864         # Sanitize and group by format_id
2865         formats_dict = {}
2866         for i, fmt in enumerate(formats):
2867             if not fmt.get('format_id'):
2868                 fmt['format_id'] = str(i)
2869             else:
2870                 # Sanitize format_id from characters used in format selector expression
2871                 fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id'])
2872             formats_dict.setdefault(fmt['format_id'], []).append(fmt)
2873
2874         # Make sure all formats have unique format_id
2875         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2876         for format_id, ambiguous_formats in formats_dict.items():
2877             ambigious_id = len(ambiguous_formats) > 1
2878             for i, fmt in enumerate(ambiguous_formats):
2879                 if ambigious_id:
2880                     fmt['format_id'] = f'{format_id}-{i}'
2881                 # Ensure there is no conflict between id and ext in format selection
2882                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2883                 if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts:
2884                     fmt['format_id'] = 'f{}'.format(fmt['format_id'])
2885
2886                 if fmt.get('format') is None:
2887                     fmt['format'] = '{id} - {res}{note}'.format(
2888                         id=fmt['format_id'],
2889                         res=self.format_resolution(fmt),
2890                         note=format_field(fmt, 'format_note', ' (%s)'),
2891                     )
2892
2893         if self.params.get('check_formats') is True:
2894             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2895
2896         if not formats or formats[0] is not info_dict:
2897             # only set the 'formats' fields if the original info_dict list them
2898             # otherwise we end up with a circular reference, the first (and unique)
2899             # element in the 'formats' field in info_dict is info_dict itself,
2900             # which can't be exported to json
2901             info_dict['formats'] = formats
2902
2903         info_dict, _ = self.pre_process(info_dict)
2904
2905         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2906             return info_dict
2907
2908         self.post_extract(info_dict)
2909         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2910
2911         # The pre-processors may have modified the formats
2912         formats = self._get_formats(info_dict)
2913
2914         list_only = self.params.get('simulate') == 'list_only'
2915         interactive_format_selection = not list_only and self.format_selector == '-'
2916         if self.params.get('list_thumbnails'):
2917             self.list_thumbnails(info_dict)
2918         if self.params.get('listsubtitles'):
2919             if 'automatic_captions' in info_dict:
2920                 self.list_subtitles(
2921                     info_dict['id'], automatic_captions, 'automatic captions')
2922             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2923         if self.params.get('listformats') or interactive_format_selection:
2924             self.list_formats(info_dict)
2925         if list_only:
2926             # Without this printing, -F --print-json will not work
2927             self.__forced_printings(info_dict)
2928             return info_dict
2929
2930         format_selector = self.format_selector
2931         while True:
2932             if interactive_format_selection:
2933                 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2934                                    + '(Press ENTER for default, or Ctrl+C to quit)'
2935                                    + self._format_screen(': ', self.Styles.EMPHASIS))
2936                 try:
2937                     format_selector = self.build_format_selector(req_format) if req_format else None
2938                 except SyntaxError as err:
2939                     self.report_error(err, tb=False, is_error=False)
2940                     continue
2941
2942             if format_selector is None:
2943                 req_format = self._default_format_spec(info_dict, download=download)
2944                 self.write_debug(f'Default format spec: {req_format}')
2945                 format_selector = self.build_format_selector(req_format)
2946
2947             formats_to_download = self._select_formats(formats, format_selector)
2948             if interactive_format_selection and not formats_to_download:
2949                 self.report_error('Requested format is not available', tb=False, is_error=False)
2950                 continue
2951             break
2952
2953         if not formats_to_download:
2954             if not self.params.get('ignore_no_formats_error'):
2955                 raise ExtractorError(
2956                     'Requested format is not available. Use --list-formats for a list of available formats',
2957                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2958             self.report_warning('Requested format is not available')
2959             # Process what we can, even without any available formats.
2960             formats_to_download = [{}]
2961
2962         requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2963         best_format, downloaded_formats = formats_to_download[-1], []
2964         if download:
2965             if best_format and requested_ranges:
2966                 def to_screen(*msg):
2967                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2968
2969                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2970                           (f['format_id'] for f in formats_to_download))
2971                 if requested_ranges != ({}, ):
2972                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2973                               (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2974             max_downloads_reached = False
2975
2976             for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2977                 new_info = self._copy_infodict(info_dict)
2978                 new_info.update(fmt)
2979                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2980                 end_time = offset + min(chapter.get('end_time', duration), duration)
2981                 # duration may not be accurate. So allow deviations <1sec
2982                 if end_time == float('inf') or end_time > offset + duration + 1:
2983                     end_time = None
2984                 if chapter or offset:
2985                     new_info.update({
2986                         'section_start': offset + chapter.get('start_time', 0),
2987                         'section_end': end_time,
2988                         'section_title': chapter.get('title'),
2989                         'section_number': chapter.get('index'),
2990                     })
2991                 downloaded_formats.append(new_info)
2992                 try:
2993                     self.process_info(new_info)
2994                 except MaxDownloadsReached:
2995                     max_downloads_reached = True
2996                 self._raise_pending_errors(new_info)
2997                 # Remove copied info
2998                 for key, val in tuple(new_info.items()):
2999                     if info_dict.get(key) == val:
3000                         new_info.pop(key)
3001                 if max_downloads_reached:
3002                     break
3003
3004             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
3005             assert write_archive.issubset({True, False, 'ignore'})
3006             if True in write_archive and False not in write_archive:
3007                 self.record_download_archive(info_dict)
3008
3009             info_dict['requested_downloads'] = downloaded_formats
3010             info_dict = self.run_all_pps('after_video', info_dict)
3011             if max_downloads_reached:
3012                 raise MaxDownloadsReached
3013
3014         # We update the info dict with the selected best quality format (backwards compatibility)
3015         info_dict.update(best_format)
3016         return info_dict
3017
3018     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
3019         """Select the requested subtitles and their format"""
3020         available_subs, normal_sub_langs = {}, []
3021         if normal_subtitles and self.params.get('writesubtitles'):
3022             available_subs.update(normal_subtitles)
3023             normal_sub_langs = tuple(normal_subtitles.keys())
3024         if automatic_captions and self.params.get('writeautomaticsub'):
3025             for lang, cap_info in automatic_captions.items():
3026                 if lang not in available_subs:
3027                     available_subs[lang] = cap_info
3028
3029         if not available_subs or (
3030                 not self.params.get('writesubtitles')
3031                 and not self.params.get('writeautomaticsub')):
3032             return None
3033
3034         all_sub_langs = tuple(available_subs.keys())
3035         if self.params.get('allsubtitles', False):
3036             requested_langs = all_sub_langs
3037         elif self.params.get('subtitleslangs', False):
3038             try:
3039                 requested_langs = orderedSet_from_options(
3040                     self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
3041             except re.error as e:
3042                 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
3043         else:
3044             requested_langs = LazyList(itertools.chain(
3045                 ['en'] if 'en' in normal_sub_langs else [],
3046                 filter(lambda f: f.startswith('en'), normal_sub_langs),
3047                 ['en'] if 'en' in all_sub_langs else [],
3048                 filter(lambda f: f.startswith('en'), all_sub_langs),
3049                 normal_sub_langs, all_sub_langs,
3050             ))[:1]
3051         if requested_langs:
3052             self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
3053
3054         formats_query = self.params.get('subtitlesformat', 'best')
3055         formats_preference = formats_query.split('/') if formats_query else []
3056         subs = {}
3057         for lang in requested_langs:
3058             formats = available_subs.get(lang)
3059             if formats is None:
3060                 self.report_warning(f'{lang} subtitles not available for {video_id}')
3061                 continue
3062             for ext in formats_preference:
3063                 if ext == 'best':
3064                     f = formats[-1]
3065                     break
3066                 matches = list(filter(lambda f: f['ext'] == ext, formats))
3067                 if matches:
3068                     f = matches[-1]
3069                     break
3070             else:
3071                 f = formats[-1]
3072                 self.report_warning(
3073                     'No subtitle format found matching "{}" for language {}, '
3074                     'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext']))
3075             subs[lang] = f
3076         return subs
3077
3078     def _forceprint(self, key, info_dict):
3079         if info_dict is None:
3080             return
3081         info_copy = info_dict.copy()
3082         info_copy.setdefault('filename', self.prepare_filename(info_dict))
3083         if info_dict.get('requested_formats') is not None:
3084             # For RTMP URLs, also include the playpath
3085             info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3086         elif info_dict.get('url'):
3087             info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3088         info_copy['formats_table'] = self.render_formats_table(info_dict)
3089         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3090         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3091         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3092
3093         def format_tmpl(tmpl):
3094             mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3095             if not mobj:
3096                 return tmpl
3097
3098             fmt = '%({})s'
3099             if tmpl.startswith('{'):
3100                 tmpl, fmt = f'.{tmpl}', '%({})j'
3101             if tmpl.endswith('='):
3102                 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3103             return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3104
3105         for tmpl in self.params['forceprint'].get(key, []):
3106             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3107
3108         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3109             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3110             tmpl = format_tmpl(tmpl)
3111             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3112             if self._ensure_dir_exists(filename):
3113                 with open(filename, 'a', encoding='utf-8', newline='') as f:
3114                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3115
3116         return info_copy
3117
3118     def __forced_printings(self, info_dict, filename=None, incomplete=True):
3119         if (self.params.get('forcejson')
3120                 or self.params['forceprint'].get('video')
3121                 or self.params['print_to_file'].get('video')):
3122             self.post_extract(info_dict)
3123         if filename:
3124             info_dict['filename'] = filename
3125         info_copy = self._forceprint('video', info_dict)
3126
3127         def print_field(field, actual_field=None, optional=False):
3128             if actual_field is None:
3129                 actual_field = field
3130             if self.params.get(f'force{field}') and (
3131                     info_copy.get(field) is not None or (not optional and not incomplete)):
3132                 self.to_stdout(info_copy[actual_field])
3133
3134         print_field('title')
3135         print_field('id')
3136         print_field('url', 'urls')
3137         print_field('thumbnail', optional=True)
3138         print_field('description', optional=True)
3139         print_field('filename')
3140         if self.params.get('forceduration') and info_copy.get('duration') is not None:
3141             self.to_stdout(formatSeconds(info_copy['duration']))
3142         print_field('format')
3143
3144         if self.params.get('forcejson'):
3145             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3146
3147     def dl(self, name, info, subtitle=False, test=False):
3148         if not info.get('url'):
3149             self.raise_no_formats(info, True)
3150
3151         if test:
3152             verbose = self.params.get('verbose')
3153             params = {
3154                 'test': True,
3155                 'quiet': self.params.get('quiet') or not verbose,
3156                 'verbose': verbose,
3157                 'noprogress': not verbose,
3158                 'nopart': True,
3159                 'skip_unavailable_fragments': False,
3160                 'keep_fragments': False,
3161                 'overwrites': True,
3162                 '_no_ytdl_file': True,
3163             }
3164         else:
3165             params = self.params
3166         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3167         if not test:
3168             for ph in self._progress_hooks:
3169                 fd.add_progress_hook(ph)
3170             urls = '", "'.join(
3171                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3172                 for f in info.get('requested_formats', []) or [info])
3173             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3174
3175         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3176         # But it may contain objects that are not deep-copyable
3177         new_info = self._copy_infodict(info)
3178         if new_info.get('http_headers') is None:
3179             new_info['http_headers'] = self._calc_headers(new_info)
3180         return fd.download(name, new_info, subtitle)
3181
3182     def existing_file(self, filepaths, *, default_overwrite=True):
3183         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3184         if existing_files and not self.params.get('overwrites', default_overwrite):
3185             return existing_files[0]
3186
3187         for file in existing_files:
3188             self.report_file_delete(file)
3189             os.remove(file)
3190         return None
3191
3192     def process_info(self, info_dict):
3193         """Process a single resolved IE result. (Modifies it in-place)"""
3194
3195         assert info_dict.get('_type', 'video') == 'video'
3196         original_infodict = info_dict
3197
3198         if 'format' not in info_dict and 'ext' in info_dict:
3199             info_dict['format'] = info_dict['ext']
3200
3201         if self._match_entry(info_dict) is not None:
3202             info_dict['__write_download_archive'] = 'ignore'
3203             return
3204
3205         # Does nothing under normal operation - for backward compatibility of process_info
3206         self.post_extract(info_dict)
3207
3208         def replace_info_dict(new_info):
3209             nonlocal info_dict
3210             if new_info == info_dict:
3211                 return
3212             info_dict.clear()
3213             info_dict.update(new_info)
3214
3215         new_info, _ = self.pre_process(info_dict, 'video')
3216         replace_info_dict(new_info)
3217         self._num_downloads += 1
3218
3219         # info_dict['_filename'] needs to be set for backward compatibility
3220         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3221         temp_filename = self.prepare_filename(info_dict, 'temp')
3222         files_to_move = {}
3223
3224         # Forced printings
3225         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3226
3227         def check_max_downloads():
3228             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3229                 raise MaxDownloadsReached
3230
3231         if self.params.get('simulate'):
3232             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3233             check_max_downloads()
3234             return
3235
3236         if full_filename is None:
3237             return
3238         if not self._ensure_dir_exists(encodeFilename(full_filename)):
3239             return
3240         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3241             return
3242
3243         if self._write_description('video', info_dict,
3244                                    self.prepare_filename(info_dict, 'description')) is None:
3245             return
3246
3247         sub_files = self._write_subtitles(info_dict, temp_filename)
3248         if sub_files is None:
3249             return
3250         files_to_move.update(dict(sub_files))
3251
3252         thumb_files = self._write_thumbnails(
3253             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3254         if thumb_files is None:
3255             return
3256         files_to_move.update(dict(thumb_files))
3257
3258         infofn = self.prepare_filename(info_dict, 'infojson')
3259         _infojson_written = self._write_info_json('video', info_dict, infofn)
3260         if _infojson_written:
3261             info_dict['infojson_filename'] = infofn
3262             # For backward compatibility, even though it was a private field
3263             info_dict['__infojson_filename'] = infofn
3264         elif _infojson_written is None:
3265             return
3266
3267         # Note: Annotations are deprecated
3268         annofn = None
3269         if self.params.get('writeannotations', False):
3270             annofn = self.prepare_filename(info_dict, 'annotation')
3271         if annofn:
3272             if not self._ensure_dir_exists(encodeFilename(annofn)):
3273                 return
3274             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3275                 self.to_screen('[info] Video annotations are already present')
3276             elif not info_dict.get('annotations'):
3277                 self.report_warning('There are no annotations to write.')
3278             else:
3279                 try:
3280                     self.to_screen('[info] Writing video annotations to: ' + annofn)
3281                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3282                         annofile.write(info_dict['annotations'])
3283                 except (KeyError, TypeError):
3284                     self.report_warning('There are no annotations to write.')
3285                 except OSError:
3286                     self.report_error('Cannot write annotations file: ' + annofn)
3287                     return
3288
3289         # Write internet shortcut files
3290         def _write_link_file(link_type):
3291             url = try_get(info_dict['webpage_url'], iri_to_uri)
3292             if not url:
3293                 self.report_warning(
3294                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3295                 return True
3296             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3297             if not self._ensure_dir_exists(encodeFilename(linkfn)):
3298                 return False
3299             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3300                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3301                 return True
3302             try:
3303                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3304                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3305                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3306                     template_vars = {'url': url}
3307                     if link_type == 'desktop':
3308                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3309                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3310             except OSError:
3311                 self.report_error(f'Cannot write internet shortcut {linkfn}')
3312                 return False
3313             return True
3314
3315         write_links = {
3316             'url': self.params.get('writeurllink'),
3317             'webloc': self.params.get('writewebloclink'),
3318             'desktop': self.params.get('writedesktoplink'),
3319         }
3320         if self.params.get('writelink'):
3321             link_type = ('webloc' if sys.platform == 'darwin'
3322                          else 'desktop' if sys.platform.startswith('linux')
3323                          else 'url')
3324             write_links[link_type] = True
3325
3326         if any(should_write and not _write_link_file(link_type)
3327                for link_type, should_write in write_links.items()):
3328             return
3329
3330         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3331         replace_info_dict(new_info)
3332
3333         if self.params.get('skip_download'):
3334             info_dict['filepath'] = temp_filename
3335             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3336             info_dict['__files_to_move'] = files_to_move
3337             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3338             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3339         else:
3340             # Download
3341             info_dict.setdefault('__postprocessors', [])
3342             try:
3343
3344                 def existing_video_file(*filepaths):
3345                     ext = info_dict.get('ext')
3346                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3347                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3348                                               default_overwrite=False)
3349                     if file:
3350                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3351                     return file
3352
3353                 fd, success = None, True
3354                 if info_dict.get('protocol') or info_dict.get('url'):
3355                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3356                     if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3357                             info_dict.get('section_start') or info_dict.get('section_end')):
3358                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3359                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
3360                         self.report_error(f'{msg}. Aborting')
3361                         return
3362
3363                 if info_dict.get('requested_formats') is not None:
3364                     old_ext = info_dict['ext']
3365                     if self.params.get('merge_output_format') is None:
3366                         if (info_dict['ext'] == 'webm'
3367                                 and info_dict.get('thumbnails')
3368                                 # check with type instead of pp_key, __name__, or isinstance
3369                                 # since we dont want any custom PPs to trigger this
3370                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3371                             info_dict['ext'] = 'mkv'
3372                             self.report_warning(
3373                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3374                     new_ext = info_dict['ext']
3375
3376                     def correct_ext(filename, ext=new_ext):
3377                         if filename == '-':
3378                             return filename
3379                         filename_real_ext = os.path.splitext(filename)[1][1:]
3380                         filename_wo_ext = (
3381                             os.path.splitext(filename)[0]
3382                             if filename_real_ext in (old_ext, new_ext)
3383                             else filename)
3384                         return f'{filename_wo_ext}.{ext}'
3385
3386                     # Ensure filename always has a correct extension for successful merge
3387                     full_filename = correct_ext(full_filename)
3388                     temp_filename = correct_ext(temp_filename)
3389                     dl_filename = existing_video_file(full_filename, temp_filename)
3390
3391                     info_dict['__real_download'] = False
3392                     # NOTE: Copy so that original format dicts are not modified
3393                     info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3394
3395                     merger = FFmpegMergerPP(self)
3396                     downloaded = []
3397                     if dl_filename is not None:
3398                         self.report_file_already_downloaded(dl_filename)
3399                     elif fd:
3400                         for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3401                             f['filepath'] = fname = prepend_extension(
3402                                 correct_ext(temp_filename, info_dict['ext']),
3403                                 'f{}'.format(f['format_id']), info_dict['ext'])
3404                             downloaded.append(fname)
3405                         info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3406                         success, real_download = self.dl(temp_filename, info_dict)
3407                         info_dict['__real_download'] = real_download
3408                     else:
3409                         if self.params.get('allow_unplayable_formats'):
3410                             self.report_warning(
3411                                 'You have requested merging of multiple formats '
3412                                 'while also allowing unplayable formats to be downloaded. '
3413                                 'The formats won\'t be merged to prevent data corruption.')
3414                         elif not merger.available:
3415                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3416                             if not self.params.get('ignoreerrors'):
3417                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3418                                 return
3419                             self.report_warning(f'{msg}. The formats won\'t be merged')
3420
3421                         if temp_filename == '-':
3422                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3423                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3424                                       else 'but ffmpeg is not installed')
3425                             self.report_warning(
3426                                 f'You have requested downloading multiple formats to stdout {reason}. '
3427                                 'The formats will be streamed one after the other')
3428                             fname = temp_filename
3429                         for f in info_dict['requested_formats']:
3430                             new_info = dict(info_dict)
3431                             del new_info['requested_formats']
3432                             new_info.update(f)
3433                             if temp_filename != '-':
3434                                 fname = prepend_extension(
3435                                     correct_ext(temp_filename, new_info['ext']),
3436                                     'f{}'.format(f['format_id']), new_info['ext'])
3437                                 if not self._ensure_dir_exists(fname):
3438                                     return
3439                                 f['filepath'] = fname
3440                                 downloaded.append(fname)
3441                             partial_success, real_download = self.dl(fname, new_info)
3442                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3443                             success = success and partial_success
3444
3445                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3446                         info_dict['__postprocessors'].append(merger)
3447                         info_dict['__files_to_merge'] = downloaded
3448                         # Even if there were no downloads, it is being merged only now
3449                         info_dict['__real_download'] = True
3450                     else:
3451                         for file in downloaded:
3452                             files_to_move[file] = None
3453                 else:
3454                     # Just a single file
3455                     dl_filename = existing_video_file(full_filename, temp_filename)
3456                     if dl_filename is None or dl_filename == temp_filename:
3457                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3458                         # So we should try to resume the download
3459                         success, real_download = self.dl(temp_filename, info_dict)
3460                         info_dict['__real_download'] = real_download
3461                     else:
3462                         self.report_file_already_downloaded(dl_filename)
3463
3464                 dl_filename = dl_filename or temp_filename
3465                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3466
3467             except network_exceptions as err:
3468                 self.report_error(f'unable to download video data: {err}')
3469                 return
3470             except OSError as err:
3471                 raise UnavailableVideoError(err)
3472             except ContentTooShortError as err:
3473                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3474                 return
3475
3476             self._raise_pending_errors(info_dict)
3477             if success and full_filename != '-':
3478
3479                 def fixup():
3480                     do_fixup = True
3481                     fixup_policy = self.params.get('fixup')
3482                     vid = info_dict['id']
3483
3484                     if fixup_policy in ('ignore', 'never'):
3485                         return
3486                     elif fixup_policy == 'warn':
3487                         do_fixup = 'warn'
3488                     elif fixup_policy != 'force':
3489                         assert fixup_policy in ('detect_or_warn', None)
3490                         if not info_dict.get('__real_download'):
3491                             do_fixup = False
3492
3493                     def ffmpeg_fixup(cndn, msg, cls):
3494                         if not (do_fixup and cndn):
3495                             return
3496                         elif do_fixup == 'warn':
3497                             self.report_warning(f'{vid}: {msg}')
3498                             return
3499                         pp = cls(self)
3500                         if pp.available:
3501                             info_dict['__postprocessors'].append(pp)
3502                         else:
3503                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3504
3505                     stretched_ratio = info_dict.get('stretched_ratio')
3506                     ffmpeg_fixup(stretched_ratio not in (1, None),
3507                                  f'Non-uniform pixel ratio {stretched_ratio}',
3508                                  FFmpegFixupStretchedPP)
3509
3510                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3511                     downloader = downloader.FD_NAME if downloader else None
3512
3513                     ext = info_dict.get('ext')
3514                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3515                         isinstance(pp, FFmpegVideoConvertorPP)
3516                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3517                     ) for pp in self._pps['post_process'])
3518
3519                     if not postprocessed_by_ffmpeg:
3520                         ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
3521                                      and info_dict.get('container') == 'm4a_dash',
3522                                      'writing DASH m4a. Only some players support this container',
3523                                      FFmpegFixupM4aPP)
3524                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3525                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3526                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3527                                      FFmpegFixupM3u8PP)
3528                         ffmpeg_fixup(downloader == 'dashsegments'
3529                                      and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
3530                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3531
3532                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3533                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3534
3535                 fixup()
3536                 try:
3537                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3538                 except PostProcessingError as err:
3539                     self.report_error(f'Postprocessing: {err}')
3540                     return
3541                 try:
3542                     for ph in self._post_hooks:
3543                         ph(info_dict['filepath'])
3544                 except Exception as err:
3545                     self.report_error(f'post hooks: {err}')
3546                     return
3547                 info_dict['__write_download_archive'] = True
3548
3549         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3550         if self.params.get('force_write_download_archive'):
3551             info_dict['__write_download_archive'] = True
3552         check_max_downloads()
3553
3554     def __download_wrapper(self, func):
3555         @functools.wraps(func)
3556         def wrapper(*args, **kwargs):
3557             try:
3558                 res = func(*args, **kwargs)
3559             except UnavailableVideoError as e:
3560                 self.report_error(e)
3561             except DownloadCancelled as e:
3562                 self.to_screen(f'[info] {e}')
3563                 if not self.params.get('break_per_url'):
3564                     raise
3565                 self._num_downloads = 0
3566             else:
3567                 if self.params.get('dump_single_json', False):
3568                     self.post_extract(res)
3569                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3570         return wrapper
3571
3572     def download(self, url_list):
3573         """Download a given list of URLs."""
3574         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3575         outtmpl = self.params['outtmpl']['default']
3576         if (len(url_list) > 1
3577                 and outtmpl != '-'
3578                 and '%' not in outtmpl
3579                 and self.params.get('max_downloads') != 1):
3580             raise SameFileError(outtmpl)
3581
3582         for url in url_list:
3583             self.__download_wrapper(self.extract_info)(
3584                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3585
3586         return self._download_retcode
3587
3588     def download_with_info_file(self, info_filename):
3589         with contextlib.closing(fileinput.FileInput(
3590                 [info_filename], mode='r',
3591                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3592             # FileInput doesn't have a read method, we can't call json.load
3593             infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3594                      for info in variadic(json.loads('\n'.join(f)))]
3595         for info in infos:
3596             try:
3597                 self.__download_wrapper(self.process_ie_result)(info, download=True)
3598             except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3599                 if not isinstance(e, EntryNotInPlaylist):
3600                     self.to_stderr('\r')
3601                 webpage_url = info.get('webpage_url')
3602                 if webpage_url is None:
3603                     raise
3604                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3605                 self.download([webpage_url])
3606             except ExtractorError as e:
3607                 self.report_error(e)
3608         return self._download_retcode
3609
3610     @staticmethod
3611     def sanitize_info(info_dict, remove_private_keys=False):
3612         """ Sanitize the infodict for converting to json """
3613         if info_dict is None:
3614             return info_dict
3615         info_dict.setdefault('epoch', int(time.time()))
3616         info_dict.setdefault('_type', 'video')
3617         info_dict.setdefault('_version', {
3618             'version': __version__,
3619             'current_git_head': current_git_head(),
3620             'release_git_head': RELEASE_GIT_HEAD,
3621             'repository': ORIGIN,
3622         })
3623
3624         if remove_private_keys:
3625             reject = lambda k, v: v is None or k.startswith('__') or k in {
3626                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3627                 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3628                 'playlist_autonumber',
3629             }
3630         else:
3631             reject = lambda k, v: False
3632
3633         def filter_fn(obj):
3634             if isinstance(obj, dict):
3635                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3636             elif isinstance(obj, (list, tuple, set, LazyList)):
3637                 return list(map(filter_fn, obj))
3638             elif obj is None or isinstance(obj, (str, int, float, bool)):
3639                 return obj
3640             else:
3641                 return repr(obj)
3642
3643         return filter_fn(info_dict)
3644
3645     @staticmethod
3646     def filter_requested_info(info_dict, actually_filter=True):
3647         """ Alias of sanitize_info for backward compatibility """
3648         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3649
3650     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3651         for filename in set(filter(None, files_to_delete)):
3652             if msg:
3653                 self.to_screen(msg % filename)
3654             try:
3655                 os.remove(filename)
3656             except OSError:
3657                 self.report_warning(f'Unable to delete file {filename}')
3658             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3659                 del info['__files_to_move'][filename]
3660
3661     @staticmethod
3662     def post_extract(info_dict):
3663         def actual_post_extract(info_dict):
3664             if info_dict.get('_type') in ('playlist', 'multi_video'):
3665                 for video_dict in info_dict.get('entries', {}):
3666                     actual_post_extract(video_dict or {})
3667                 return
3668
3669             post_extractor = info_dict.pop('__post_extractor', None) or dict
3670             info_dict.update(post_extractor())
3671
3672         actual_post_extract(info_dict or {})
3673
3674     def run_pp(self, pp, infodict):
3675         files_to_delete = []
3676         if '__files_to_move' not in infodict:
3677             infodict['__files_to_move'] = {}
3678         try:
3679             files_to_delete, infodict = pp.run(infodict)
3680         except PostProcessingError as e:
3681             # Must be True and not 'only_download'
3682             if self.params.get('ignoreerrors') is True:
3683                 self.report_error(e)
3684                 return infodict
3685             raise
3686
3687         if not files_to_delete:
3688             return infodict
3689         if self.params.get('keepvideo', False):
3690             for f in files_to_delete:
3691                 infodict['__files_to_move'].setdefault(f, '')
3692         else:
3693             self._delete_downloaded_files(
3694                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3695         return infodict
3696
3697     def run_all_pps(self, key, info, *, additional_pps=None):
3698         if key != 'video':
3699             self._forceprint(key, info)
3700         for pp in (additional_pps or []) + self._pps[key]:
3701             info = self.run_pp(pp, info)
3702         return info
3703
3704     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3705         info = dict(ie_info)
3706         info['__files_to_move'] = files_to_move or {}
3707         try:
3708             info = self.run_all_pps(key, info)
3709         except PostProcessingError as err:
3710             msg = f'Preprocessing: {err}'
3711             info.setdefault('__pending_error', msg)
3712             self.report_error(msg, is_error=False)
3713         return info, info.pop('__files_to_move', None)
3714
3715     def post_process(self, filename, info, files_to_move=None):
3716         """Run all the postprocessors on the given file."""
3717         info['filepath'] = filename
3718         info['__files_to_move'] = files_to_move or {}
3719         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3720         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3721         del info['__files_to_move']
3722         return self.run_all_pps('after_move', info)
3723
3724     def _make_archive_id(self, info_dict):
3725         video_id = info_dict.get('id')
3726         if not video_id:
3727             return
3728         # Future-proof against any change in case
3729         # and backwards compatibility with prior versions
3730         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3731         if extractor is None:
3732             url = str_or_none(info_dict.get('url'))
3733             if not url:
3734                 return
3735             # Try to find matching extractor for the URL and take its ie_key
3736             for ie_key, ie in self._ies.items():
3737                 if ie.suitable(url):
3738                     extractor = ie_key
3739                     break
3740             else:
3741                 return
3742         return make_archive_id(extractor, video_id)
3743
3744     def in_download_archive(self, info_dict):
3745         if not self.archive:
3746             return False
3747
3748         vid_ids = [self._make_archive_id(info_dict)]
3749         vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3750         return any(id_ in self.archive for id_ in vid_ids)
3751
3752     def record_download_archive(self, info_dict):
3753         fn = self.params.get('download_archive')
3754         if fn is None:
3755             return
3756         vid_id = self._make_archive_id(info_dict)
3757         assert vid_id
3758
3759         self.write_debug(f'Adding to archive: {vid_id}')
3760         if is_path_like(fn):
3761             with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3762                 archive_file.write(vid_id + '\n')
3763         self.archive.add(vid_id)
3764
3765     @staticmethod
3766     def format_resolution(format, default='unknown'):
3767         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3768             return 'audio only'
3769         if format.get('resolution') is not None:
3770             return format['resolution']
3771         if format.get('width') and format.get('height'):
3772             return '%dx%d' % (format['width'], format['height'])
3773         elif format.get('height'):
3774             return '{}p'.format(format['height'])
3775         elif format.get('width'):
3776             return '%dx?' % format['width']
3777         return default
3778
3779     def _list_format_headers(self, *headers):
3780         if self.params.get('listformats_table', True) is not False:
3781             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3782         return headers
3783
3784     def _format_note(self, fdict):
3785         res = ''
3786         if fdict.get('ext') in ['f4f', 'f4m']:
3787             res += '(unsupported)'
3788         if fdict.get('language'):
3789             if res:
3790                 res += ' '
3791             res += '[{}]'.format(fdict['language'])
3792         if fdict.get('format_note') is not None:
3793             if res:
3794                 res += ' '
3795             res += fdict['format_note']
3796         if fdict.get('tbr') is not None:
3797             if res:
3798                 res += ', '
3799             res += '%4dk' % fdict['tbr']
3800         if fdict.get('container') is not None:
3801             if res:
3802                 res += ', '
3803             res += '{} container'.format(fdict['container'])
3804         if (fdict.get('vcodec') is not None
3805                 and fdict.get('vcodec') != 'none'):
3806             if res:
3807                 res += ', '
3808             res += fdict['vcodec']
3809             if fdict.get('vbr') is not None:
3810                 res += '@'
3811         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3812             res += 'video@'
3813         if fdict.get('vbr') is not None:
3814             res += '%4dk' % fdict['vbr']
3815         if fdict.get('fps') is not None:
3816             if res:
3817                 res += ', '
3818             res += '{}fps'.format(fdict['fps'])
3819         if fdict.get('acodec') is not None:
3820             if res:
3821                 res += ', '
3822             if fdict['acodec'] == 'none':
3823                 res += 'video only'
3824             else:
3825                 res += '%-5s' % fdict['acodec']
3826         elif fdict.get('abr') is not None:
3827             if res:
3828                 res += ', '
3829             res += 'audio'
3830         if fdict.get('abr') is not None:
3831             res += '@%3dk' % fdict['abr']
3832         if fdict.get('asr') is not None:
3833             res += ' (%5dHz)' % fdict['asr']
3834         if fdict.get('filesize') is not None:
3835             if res:
3836                 res += ', '
3837             res += format_bytes(fdict['filesize'])
3838         elif fdict.get('filesize_approx') is not None:
3839             if res:
3840                 res += ', '
3841             res += '~' + format_bytes(fdict['filesize_approx'])
3842         return res
3843
3844     def _get_formats(self, info_dict):
3845         if info_dict.get('formats') is None:
3846             if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3847                 return [info_dict]
3848             return []
3849         return info_dict['formats']
3850
3851     def render_formats_table(self, info_dict):
3852         formats = self._get_formats(info_dict)
3853         if not formats:
3854             return
3855         if not self.params.get('listformats_table', True) is not False:
3856             table = [
3857                 [
3858                     format_field(f, 'format_id'),
3859                     format_field(f, 'ext'),
3860                     self.format_resolution(f),
3861                     self._format_note(f),
3862                 ] for f in formats if (f.get('preference') or 0) >= -1000]
3863             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3864
3865         def simplified_codec(f, field):
3866             assert field in ('acodec', 'vcodec')
3867             codec = f.get(field)
3868             if not codec:
3869                 return 'unknown'
3870             elif codec != 'none':
3871                 return '.'.join(codec.split('.')[:4])
3872
3873             if field == 'vcodec' and f.get('acodec') == 'none':
3874                 return 'images'
3875             elif field == 'acodec' and f.get('vcodec') == 'none':
3876                 return ''
3877             return self._format_out('audio only' if field == 'vcodec' else 'video only',
3878                                     self.Styles.SUPPRESS)
3879
3880         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3881         table = [
3882             [
3883                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3884                 format_field(f, 'ext'),
3885                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3886                 format_field(f, 'fps', '\t%d', func=round),
3887                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3888                 format_field(f, 'audio_channels', '\t%s'),
3889                 delim, (
3890                     format_field(f, 'filesize', ' \t%s', func=format_bytes)
3891                     or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3892                     or format_field(filesize_from_tbr(f.get('tbr'), info_dict.get('duration')), None,
3893                                     self._format_out('~\t%s', self.Styles.SUPPRESS), func=format_bytes)),
3894                 format_field(f, 'tbr', '\t%dk', func=round),
3895                 shorten_protocol_name(f.get('protocol', '')),
3896                 delim,
3897                 simplified_codec(f, 'vcodec'),
3898                 format_field(f, 'vbr', '\t%dk', func=round),
3899                 simplified_codec(f, 'acodec'),
3900                 format_field(f, 'abr', '\t%dk', func=round),
3901                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3902                 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3903                     self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3904                     (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3905                      else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3906                     format_field(f, 'format_note'),
3907                     format_field(f, 'container', ignore=(None, f.get('ext'))),
3908                     delim=', '), delim=' '),
3909             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3910         header_line = self._list_format_headers(
3911             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3912             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3913
3914         return render_table(
3915             header_line, table, hide_empty=True,
3916             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3917
3918     def render_thumbnails_table(self, info_dict):
3919         thumbnails = list(info_dict.get('thumbnails') or [])
3920         if not thumbnails:
3921             return None
3922         return render_table(
3923             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3924             [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3925
3926     def render_subtitles_table(self, video_id, subtitles):
3927         def _row(lang, formats):
3928             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3929             if len(set(names)) == 1:
3930                 names = [] if names[0] == 'unknown' else names[:1]
3931             return [lang, ', '.join(names), ', '.join(exts)]
3932
3933         if not subtitles:
3934             return None
3935         return render_table(
3936             self._list_format_headers('Language', 'Name', 'Formats'),
3937             [_row(lang, formats) for lang, formats in subtitles.items()],
3938             hide_empty=True)
3939
3940     def __list_table(self, video_id, name, func, *args):
3941         table = func(*args)
3942         if not table:
3943             self.to_screen(f'{video_id} has no {name}')
3944             return
3945         self.to_screen(f'[info] Available {name} for {video_id}:')
3946         self.to_stdout(table)
3947
3948     def list_formats(self, info_dict):
3949         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3950
3951     def list_thumbnails(self, info_dict):
3952         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3953
3954     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3955         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3956
3957     def print_debug_header(self):
3958         if not self.params.get('verbose'):
3959             return
3960
3961         from . import _IN_CLI  # Must be delayed import
3962
3963         # These imports can be slow. So import them only as needed
3964         from .extractor.extractors import _LAZY_LOADER
3965         from .extractor.extractors import (
3966             _PLUGIN_CLASSES as plugin_ies,
3967             _PLUGIN_OVERRIDES as plugin_ie_overrides,
3968         )
3969
3970         def get_encoding(stream):
3971             ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})'))
3972             additional_info = []
3973             if os.environ.get('TERM', '').lower() == 'dumb':
3974                 additional_info.append('dumb')
3975             if not supports_terminal_sequences(stream):
3976                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3977                 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3978             if additional_info:
3979                 ret = f'{ret} ({",".join(additional_info)})'
3980             return ret
3981
3982         encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format(
3983             locale.getpreferredencoding(),
3984             sys.getfilesystemencoding(),
3985             self.get_encoding(),
3986             ', '.join(
3987                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3988                 if stream is not None and key != 'console'),
3989         )
3990
3991         logger = self.params.get('logger')
3992         if logger:
3993             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3994             write_debug(encoding_str)
3995         else:
3996             write_string(f'[debug] {encoding_str}\n', encoding=None)
3997             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3998
3999         source = detect_variant()
4000         if VARIANT not in (None, 'pip'):
4001             source += '*'
4002         klass = type(self)
4003         write_debug(join_nonempty(
4004             f'{REPOSITORY.rpartition("/")[2]} version',
4005             _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
4006             f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
4007             '' if source == 'unknown' else f'({source})',
4008             '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
4009             delim=' '))
4010
4011         if not _IN_CLI:
4012             write_debug(f'params: {self.params}')
4013
4014         if not _LAZY_LOADER:
4015             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
4016                 write_debug('Lazy loading extractors is forcibly disabled')
4017             else:
4018                 write_debug('Lazy loading extractors is disabled')
4019         if self.params['compat_opts']:
4020             write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts'])))
4021
4022         if current_git_head():
4023             write_debug(f'Git HEAD: {current_git_head()}')
4024         write_debug(system_identifier())
4025
4026         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
4027         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
4028         if ffmpeg_features:
4029             exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features)))
4030
4031         exe_versions['rtmpdump'] = rtmpdump_version()
4032         exe_versions['phantomjs'] = PhantomJSwrapper._version()
4033         exe_str = ', '.join(
4034             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
4035         ) or 'none'
4036         write_debug(f'exe versions: {exe_str}')
4037
4038         from .compat.compat_utils import get_package_info
4039         from .dependencies import available_dependencies
4040
4041         write_debug('Optional libraries: %s' % (', '.join(sorted({
4042             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
4043         })) or 'none'))
4044
4045         write_debug(f'Proxy map: {self.proxies}')
4046         write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
4047         for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
4048             display_list = ['{}{}'.format(
4049                 klass.__name__, '' if klass.__name__ == name else f' as {name}')
4050                 for name, klass in plugins.items()]
4051             if plugin_type == 'Extractor':
4052                 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
4053                                     for parent, plugins in plugin_ie_overrides.items())
4054             if not display_list:
4055                 continue
4056             write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
4057
4058         plugin_dirs = plugin_directories()
4059         if plugin_dirs:
4060             write_debug(f'Plugin directories: {plugin_dirs}')
4061
4062         # Not implemented
4063         if False and self.params.get('call_home'):
4064             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
4065             write_debug(f'Public IP address: {ipaddr}')
4066             latest_version = self.urlopen(
4067                 'https://yt-dl.org/latest/version').read().decode()
4068             if version_tuple(latest_version) > version_tuple(__version__):
4069                 self.report_warning(
4070                     f'You are using an outdated version (newest version: {latest_version})! '
4071                     'See https://yt-dl.org/update if you need help updating.')
4072
4073     @functools.cached_property
4074     def proxies(self):
4075         """Global proxy configuration"""
4076         opts_proxy = self.params.get('proxy')
4077         if opts_proxy is not None:
4078             if opts_proxy == '':
4079                 opts_proxy = '__noproxy__'
4080             proxies = {'all': opts_proxy}
4081         else:
4082             proxies = urllib.request.getproxies()
4083             # compat. Set HTTPS_PROXY to __noproxy__ to revert
4084             if 'http' in proxies and 'https' not in proxies:
4085                 proxies['https'] = proxies['http']
4086
4087         return proxies
4088
4089     @functools.cached_property
4090     def cookiejar(self):
4091         """Global cookiejar instance"""
4092         return load_cookies(
4093             self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4094
4095     @property
4096     def _opener(self):
4097         """
4098         Get a urllib OpenerDirector from the Urllib handler (deprecated).
4099         """
4100         self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4101         handler = self._request_director.handlers['Urllib']
4102         return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4103
4104     def _get_available_impersonate_targets(self):
4105         # TODO(future): make available as public API
4106         return [
4107             (target, rh.RH_NAME)
4108             for rh in self._request_director.handlers.values()
4109             if isinstance(rh, ImpersonateRequestHandler)
4110             for target in rh.supported_targets
4111         ]
4112
4113     def _impersonate_target_available(self, target):
4114         # TODO(future): make available as public API
4115         return any(
4116             rh.is_supported_target(target)
4117             for rh in self._request_director.handlers.values()
4118             if isinstance(rh, ImpersonateRequestHandler))
4119
4120     def urlopen(self, req):
4121         """ Start an HTTP download """
4122         if isinstance(req, str):
4123             req = Request(req)
4124         elif isinstance(req, urllib.request.Request):
4125             self.deprecation_warning(
4126                 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4127                 'Use yt_dlp.networking.common.Request instead.')
4128             req = urllib_req_to_req(req)
4129         assert isinstance(req, Request)
4130
4131         # compat: Assume user:pass url params are basic auth
4132         url, basic_auth_header = extract_basic_auth(req.url)
4133         if basic_auth_header:
4134             req.headers['Authorization'] = basic_auth_header
4135         req.url = sanitize_url(url)
4136
4137         clean_proxies(proxies=req.proxies, headers=req.headers)
4138         clean_headers(req.headers)
4139
4140         try:
4141             return self._request_director.send(req)
4142         except NoSupportingHandlers as e:
4143             for ue in e.unsupported_errors:
4144                 # FIXME: This depends on the order of errors.
4145                 if not (ue.handler and ue.msg):
4146                     continue
4147                 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4148                     raise RequestError(
4149                         'file:// URLs are disabled by default in yt-dlp for security reasons. '
4150                         'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4151                 if (
4152                     'unsupported proxy type: "https"' in ue.msg.lower()
4153                     and 'requests' not in self._request_director.handlers
4154                     and 'curl_cffi' not in self._request_director.handlers
4155                 ):
4156                     raise RequestError(
4157                         'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi')
4158
4159                 elif (
4160                     re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
4161                     and 'websockets' not in self._request_director.handlers
4162                 ):
4163                     raise RequestError(
4164                         'This request requires WebSocket support. '
4165                         'Ensure one of the following dependencies are installed: websockets',
4166                         cause=ue) from ue
4167
4168                 elif re.match(r'unsupported (?:extensions: impersonate|impersonate target)', ue.msg.lower()):
4169                     raise RequestError(
4170                         f'Impersonate target "{req.extensions["impersonate"]}" is not available.'
4171                         f' See --list-impersonate-targets for available targets.'
4172                         f' This request requires browser impersonation, however you may be missing dependencies'
4173                         f' required to support this target.')
4174             raise
4175         except SSLError as e:
4176             if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4177                 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4178             elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4179                 raise RequestError(
4180                     'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4181                     'Try using --legacy-server-connect', cause=e) from e
4182             raise
4183
4184     def build_request_director(self, handlers, preferences=None):
4185         logger = _YDLLogger(self)
4186         headers = self.params['http_headers'].copy()
4187         proxies = self.proxies.copy()
4188         clean_headers(headers)
4189         clean_proxies(proxies, headers)
4190
4191         director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4192         for handler in handlers:
4193             director.add_handler(handler(
4194                 logger=logger,
4195                 headers=headers,
4196                 cookiejar=self.cookiejar,
4197                 proxies=proxies,
4198                 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4199                 verify=not self.params.get('nocheckcertificate'),
4200                 **traverse_obj(self.params, {
4201                     'verbose': 'debug_printtraffic',
4202                     'source_address': 'source_address',
4203                     'timeout': 'socket_timeout',
4204                     'legacy_ssl_support': 'legacyserverconnect',
4205                     'enable_file_urls': 'enable_file_urls',
4206                     'impersonate': 'impersonate',
4207                     'client_cert': {
4208                         'client_certificate': 'client_certificate',
4209                         'client_certificate_key': 'client_certificate_key',
4210                         'client_certificate_password': 'client_certificate_password',
4211                     },
4212                 }),
4213             ))
4214         director.preferences.update(preferences or [])
4215         if 'prefer-legacy-http-handler' in self.params['compat_opts']:
4216             director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
4217         return director
4218
4219     @functools.cached_property
4220     def _request_director(self):
4221         return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
4222
4223     def encode(self, s):
4224         if isinstance(s, bytes):
4225             return s  # Already encoded
4226
4227         try:
4228             return s.encode(self.get_encoding())
4229         except UnicodeEncodeError as err:
4230             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4231             raise
4232
4233     def get_encoding(self):
4234         encoding = self.params.get('encoding')
4235         if encoding is None:
4236             encoding = preferredencoding()
4237         return encoding
4238
4239     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4240         """ Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """
4241         if overwrite is None:
4242             overwrite = self.params.get('overwrites', True)
4243         if not self.params.get('writeinfojson'):
4244             return False
4245         elif not infofn:
4246             self.write_debug(f'Skipping writing {label} infojson')
4247             return False
4248         elif not self._ensure_dir_exists(infofn):
4249             return None
4250         elif not overwrite and os.path.exists(infofn):
4251             self.to_screen(f'[info] {label.title()} metadata is already present')
4252             return 'exists'
4253
4254         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4255         try:
4256             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4257             return True
4258         except OSError:
4259             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4260             return None
4261
4262     def _write_description(self, label, ie_result, descfn):
4263         """ Write description and returns True = written, False = skip, None = error """
4264         if not self.params.get('writedescription'):
4265             return False
4266         elif not descfn:
4267             self.write_debug(f'Skipping writing {label} description')
4268             return False
4269         elif not self._ensure_dir_exists(descfn):
4270             return None
4271         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4272             self.to_screen(f'[info] {label.title()} description is already present')
4273         elif ie_result.get('description') is None:
4274             self.to_screen(f'[info] There\'s no {label} description to write')
4275             return False
4276         else:
4277             try:
4278                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4279                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4280                     descfile.write(ie_result['description'])
4281             except OSError:
4282                 self.report_error(f'Cannot write {label} description file {descfn}')
4283                 return None
4284         return True
4285
4286     def _write_subtitles(self, info_dict, filename):
4287         """ Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error"""
4288         ret = []
4289         subtitles = info_dict.get('requested_subtitles')
4290         if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4291             # subtitles download errors are already managed as troubles in relevant IE
4292             # that way it will silently go on when used with unsupporting IE
4293             return ret
4294         elif not subtitles:
4295             self.to_screen('[info] There are no subtitles for the requested languages')
4296             return ret
4297         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4298         if not sub_filename_base:
4299             self.to_screen('[info] Skipping writing video subtitles')
4300             return ret
4301
4302         for sub_lang, sub_info in subtitles.items():
4303             sub_format = sub_info['ext']
4304             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4305             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4306             existing_sub = self.existing_file((sub_filename_final, sub_filename))
4307             if existing_sub:
4308                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4309                 sub_info['filepath'] = existing_sub
4310                 ret.append((existing_sub, sub_filename_final))
4311                 continue
4312
4313             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4314             if sub_info.get('data') is not None:
4315                 try:
4316                     # Use newline='' to prevent conversion of newline characters
4317                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
4318                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4319                         subfile.write(sub_info['data'])
4320                     sub_info['filepath'] = sub_filename
4321                     ret.append((sub_filename, sub_filename_final))
4322                     continue
4323                 except OSError:
4324                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
4325                     return None
4326
4327             try:
4328                 sub_copy = sub_info.copy()
4329                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4330                 self.dl(sub_filename, sub_copy, subtitle=True)
4331                 sub_info['filepath'] = sub_filename
4332                 ret.append((sub_filename, sub_filename_final))
4333             except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err:
4334                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4335                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
4336                     if not self.params.get('ignoreerrors'):
4337                         self.report_error(msg)
4338                     raise DownloadError(msg)
4339                 self.report_warning(msg)
4340         return ret
4341
4342     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4343         """ Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """
4344         write_all = self.params.get('write_all_thumbnails', False)
4345         thumbnails, ret = [], []
4346         if write_all or self.params.get('writethumbnail', False):
4347             thumbnails = info_dict.get('thumbnails') or []
4348             if not thumbnails:
4349                 self.to_screen(f'[info] There are no {label} thumbnails to download')
4350                 return ret
4351         multiple = write_all and len(thumbnails) > 1
4352
4353         if thumb_filename_base is None:
4354             thumb_filename_base = filename
4355         if thumbnails and not thumb_filename_base:
4356             self.write_debug(f'Skipping writing {label} thumbnail')
4357             return ret
4358
4359         if thumbnails and not self._ensure_dir_exists(filename):
4360             return None
4361
4362         for idx, t in list(enumerate(thumbnails))[::-1]:
4363             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4364             thumb_display_id = f'{label} thumbnail {t["id"]}'
4365             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4366             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4367
4368             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4369             if existing_thumb:
4370                 self.to_screen('[info] {} is already present'.format((
4371                     thumb_display_id if multiple else f'{label} thumbnail').capitalize()))
4372                 t['filepath'] = existing_thumb
4373                 ret.append((existing_thumb, thumb_filename_final))
4374             else:
4375                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4376                 try:
4377                     uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4378                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4379                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4380                         shutil.copyfileobj(uf, thumbf)
4381                     ret.append((thumb_filename, thumb_filename_final))
4382                     t['filepath'] = thumb_filename
4383                 except network_exceptions as err:
4384                     if isinstance(err, HTTPError) and err.status == 404:
4385                         self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4386                     else:
4387                         self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4388                     thumbnails.pop(idx)
4389             if ret and not write_all:
4390                 break
4391         return ret