yt_dlp/YoutubeDL.py

   1 import collections
   2 import contextlib
   3 import copy
   4 import datetime
   5 import errno
   6 import fileinput
   7 import http.cookiejar
   8 import io
   9 import itertools
  10 import json
  11 import locale
  12 import operator
  13 import os
  14 import random
  15 import re
  16 import shutil
  17 import string
  18 import subprocess
  19 import sys
  20 import tempfile
  21 import time
  22 import tokenize
  23 import traceback
  24 import unicodedata
  25
  26 from .cache import Cache
  27 from .compat import functools, urllib  # isort: split
  28 from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
  29 from .cookies import LenientSimpleCookie, load_cookies
  30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  31 from .downloader.rtmp import rtmpdump_version
  32 from .extractor import gen_extractor_classes, get_info_extractor
  33 from .extractor.common import UnsupportedURLIE
  34 from .extractor.openload import PhantomJSwrapper
  35 from .minicurses import format_text
  36 from .networking import HEADRequest, Request, RequestDirector
  37 from .networking.common import _REQUEST_HANDLERS
  38 from .networking.exceptions import (
  39     HTTPError,
  40     NoSupportingHandlers,
  41     RequestError,
  42     SSLError,
  43     _CompatHTTPError,
  44     network_exceptions,
  45 )
  46 from .plugins import directories as plugin_directories
  47 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
  48 from .postprocessor import (
  49     EmbedThumbnailPP,
  50     FFmpegFixupDuplicateMoovPP,
  51     FFmpegFixupDurationPP,
  52     FFmpegFixupM3u8PP,
  53     FFmpegFixupM4aPP,
  54     FFmpegFixupStretchedPP,
  55     FFmpegFixupTimestampPP,
  56     FFmpegMergerPP,
  57     FFmpegPostProcessor,
  58     FFmpegVideoConvertorPP,
  59     MoveFilesAfterDownloadPP,
  60     get_postprocessor,
  61 )
  62 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
  63 from .update import REPOSITORY, current_git_head, detect_variant
  64 from .utils import (
  65     DEFAULT_OUTTMPL,
  66     IDENTITY,
  67     LINK_TEMPLATES,
  68     MEDIA_EXTENSIONS,
  69     NO_DEFAULT,
  70     NUMBER_RE,
  71     OUTTMPL_TYPES,
  72     POSTPROCESS_WHEN,
  73     STR_FORMAT_RE_TMPL,
  74     STR_FORMAT_TYPES,
  75     ContentTooShortError,
  76     DateRange,
  77     DownloadCancelled,
  78     DownloadError,
  79     EntryNotInPlaylist,
  80     ExistingVideoReached,
  81     ExtractorError,
  82     FormatSorter,
  83     GeoRestrictedError,
  84     ISO3166Utils,
  85     LazyList,
  86     MaxDownloadsReached,
  87     Namespace,
  88     PagedList,
  89     PlaylistEntries,
  90     Popen,
  91     PostProcessingError,
  92     ReExtractInfo,
  93     RejectedVideoReached,
  94     SameFileError,
  95     UnavailableVideoError,
  96     UserNotLive,
  97     age_restricted,
  98     args_to_str,
  99     bug_reports_message,
 100     date_from_str,
 101     deprecation_warning,
 102     determine_ext,
 103     determine_protocol,
 104     encode_compat_str,
 105     encodeFilename,
 106     error_to_compat_str,
 107     escapeHTML,
 108     expand_path,
 109     extract_basic_auth,
 110     filter_dict,
 111     float_or_none,
 112     format_bytes,
 113     format_decimal_suffix,
 114     format_field,
 115     formatSeconds,
 116     get_compatible_ext,
 117     get_domain,
 118     int_or_none,
 119     iri_to_uri,
 120     is_path_like,
 121     join_nonempty,
 122     locked_file,
 123     make_archive_id,
 124     make_dir,
 125     number_of_digits,
 126     orderedSet,
 127     orderedSet_from_options,
 128     parse_filesize,
 129     preferredencoding,
 130     prepend_extension,
 131     remove_terminal_sequences,
 132     render_table,
 133     replace_extension,
 134     sanitize_filename,
 135     sanitize_path,
 136     sanitize_url,
 137     str_or_none,
 138     strftime_or_none,
 139     subtitles_filename,
 140     supports_terminal_sequences,
 141     system_identifier,
 142     timetuple_from_msec,
 143     to_high_limit_path,
 144     traverse_obj,
 145     try_call,
 146     try_get,
 147     url_basename,
 148     variadic,
 149     version_tuple,
 150     windows_enable_vt_mode,
 151     write_json_file,
 152     write_string,
 153 )
 154 from .utils._utils import _YDLLogger
 155 from .utils.networking import (
 156     HTTPHeaderDict,
 157     clean_headers,
 158     clean_proxies,
 159     std_headers,
 160 )
 161 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
 162
 163 if compat_os_name == 'nt':
 164     import ctypes
 165
 166
 167 class YoutubeDL:
 168     """YoutubeDL class.
 169
 170     YoutubeDL objects are the ones responsible of downloading the
 171     actual video file and writing it to disk if the user has requested
 172     it, among some other tasks. In most cases there should be one per
 173     program. As, given a video URL, the downloader doesn't know how to
 174     extract all the needed information, task that InfoExtractors do, it
 175     has to pass the URL to one of them.
 176
 177     For this, YoutubeDL objects have a method that allows
 178     InfoExtractors to be registered in a given order. When it is passed
 179     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 180     finds that reports being able to handle it. The InfoExtractor extracts
 181     all the information about the video or videos the URL refers to, and
 182     YoutubeDL process the extracted information, possibly using a File
 183     Downloader to download the video.
 184
 185     YoutubeDL objects accept a lot of parameters. In order not to saturate
 186     the object constructor with arguments, it receives a dictionary of
 187     options instead. These options are available through the params
 188     attribute for the InfoExtractors to use. The YoutubeDL also
 189     registers itself as the downloader in charge for the InfoExtractors
 190     that are added to it, so this is a "mutual registration".
 191
 192     Available options:
 193
 194     username:          Username for authentication purposes.
 195     password:          Password for authentication purposes.
 196     videopassword:     Password for accessing a video.
 197     ap_mso:            Adobe Pass multiple-system operator identifier.
 198     ap_username:       Multiple-system operator account username.
 199     ap_password:       Multiple-system operator account password.
 200     usenetrc:          Use netrc for authentication instead.
 201     netrc_location:    Location of the netrc file. Defaults to ~/.netrc.
 202     netrc_cmd:         Use a shell command to get credentials
 203     verbose:           Print additional info to stdout.
 204     quiet:             Do not print messages to stdout.
 205     no_warnings:       Do not print out anything for warnings.
 206     forceprint:        A dict with keys WHEN mapped to a list of templates to
 207                        print to stdout. The allowed keys are video or any of the
 208                        items in utils.POSTPROCESS_WHEN.
 209                        For compatibility, a single list is also accepted
 210     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 211                        a list of tuples with (template, filename)
 212     forcejson:         Force printing info_dict as JSON.
 213     dump_single_json:  Force printing the info_dict of the whole playlist
 214                        (or video) as a single JSON line.
 215     force_write_download_archive: Force writing download archive regardless
 216                        of 'skip_download' or 'simulate'.
 217     simulate:          Do not download the video files. If unset (or None),
 218                        simulate only if listsubtitles, listformats or list_thumbnails is used
 219     format:            Video format code. see "FORMAT SELECTION" for more details.
 220                        You can also pass a function. The function takes 'ctx' as
 221                        argument and returns the formats to download.
 222                        See "build_format_selector" for an implementation
 223     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 224     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 225                        extracting metadata even if the video is not actually
 226                        available for download (experimental)
 227     format_sort:       A list of fields by which to sort the video formats.
 228                        See "Sorting Formats" for more details.
 229     format_sort_force: Force the given format_sort. see "Sorting Formats"
 230                        for more details.
 231     prefer_free_formats: Whether to prefer video formats with free containers
 232                        over non-free ones of same quality.
 233     allow_multiple_video_streams:   Allow multiple video streams to be merged
 234                        into a single file
 235     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 236                        into a single file
 237     check_formats      Whether to test if the formats are downloadable.
 238                        Can be True (check all), False (check none),
 239                        'selected' (check selected formats),
 240                        or None (check only if requested by extractor)
 241     paths:             Dictionary of output paths. The allowed keys are 'home'
 242                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 243     outtmpl:           Dictionary of templates for output names. Allowed keys
 244                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 245                        For compatibility with youtube-dl, a single string can also be used
 246     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 247     restrictfilenames: Do not allow "&" and spaces in file names
 248     trim_file_name:    Limit length of filename (extension excluded)
 249     windowsfilenames:  Force the filenames to be windows compatible
 250     ignoreerrors:      Do not stop on download/postprocessing errors.
 251                        Can be 'only_download' to ignore only download errors.
 252                        Default is 'only_download' for CLI, but False for API
 253     skip_playlist_after_errors: Number of allowed failures until the rest of
 254                        the playlist is skipped
 255     allowed_extractors:  List of regexes to match against extractor names that are allowed
 256     overwrites:        Overwrite all video and metadata files if True,
 257                        overwrite only non-video files if None
 258                        and don't overwrite any file if False
 259                        For compatibility with youtube-dl,
 260                        "nooverwrites" may also be used instead
 261     playlist_items:    Specific indices of playlist to download.
 262     playlistrandom:    Download playlist items in random order.
 263     lazy_playlist:     Process playlist entries as they are received.
 264     matchtitle:        Download only matching titles.
 265     rejecttitle:       Reject downloads for matching titles.
 266     logger:            Log messages to a logging.Logger instance.
 267     logtostderr:       Print everything to stderr instead of stdout.
 268     consoletitle:      Display progress in console window's titlebar.
 269     writedescription:  Write the video description to a .description file
 270     writeinfojson:     Write the video description to a .info.json file
 271     clean_infojson:    Remove internal metadata from the infojson
 272     getcomments:       Extract video comments. This will not be written to disk
 273                        unless writeinfojson is also given
 274     writeannotations:  Write the video annotations to a .annotations.xml file
 275     writethumbnail:    Write the thumbnail image to a file
 276     allow_playlist_files: Whether to write playlists' description, infojson etc
 277                        also to disk when using the 'write*' options
 278     write_all_thumbnails:  Write all thumbnail formats to files
 279     writelink:         Write an internet shortcut file, depending on the
 280                        current platform (.url/.webloc/.desktop)
 281     writeurllink:      Write a Windows internet shortcut file (.url)
 282     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 283     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 284     writesubtitles:    Write the video subtitles to a file
 285     writeautomaticsub: Write the automatically generated subtitles to a file
 286     listsubtitles:     Lists all available subtitles for the video
 287     subtitlesformat:   The format code for subtitles
 288     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 289                        The list may contain "all" to refer to all the available
 290                        subtitles. The language can be prefixed with a "-" to
 291                        exclude it from the requested languages, e.g. ['all', '-live_chat']
 292     keepvideo:         Keep the video file after post-processing
 293     daterange:         A utils.DateRange object, download only if the upload_date is in the range.
 294     skip_download:     Skip the actual download of the video file
 295     cachedir:          Location of the cache files in the filesystem.
 296                        False to disable filesystem cache.
 297     noplaylist:        Download single video instead of a playlist if in doubt.
 298     age_limit:         An integer representing the user's age in years.
 299                        Unsuitable videos for the given age are skipped.
 300     min_views:         An integer representing the minimum view count the video
 301                        must have in order to not be skipped.
 302                        Videos without view count information are always
 303                        downloaded. None for no limit.
 304     max_views:         An integer representing the maximum view count.
 305                        Videos that are more popular than that are not
 306                        downloaded.
 307                        Videos without view count information are always
 308                        downloaded. None for no limit.
 309     download_archive:  A set, or the name of a file where all downloads are recorded.
 310                        Videos already present in the file are not downloaded again.
 311     break_on_existing: Stop the download process after attempting to download a
 312                        file that is in the archive.
 313     break_per_url:     Whether break_on_reject and break_on_existing
 314                        should act on each input URL as opposed to for the entire queue
 315     cookiefile:        File name or text stream from where cookies should be read and dumped to
 316     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 317                        name/path from where cookies are loaded, the name of the keyring,
 318                        and the container name, e.g. ('chrome', ) or
 319                        ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
 320     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 321                        support RFC 5746 secure renegotiation
 322     nocheckcertificate:  Do not verify SSL certificates
 323     client_certificate:  Path to client certificate file in PEM format. May include the private key
 324     client_certificate_key:  Path to private key file for client certificate
 325     client_certificate_password:  Password for client certificate private key, if encrypted.
 326                         If not provided and the key is encrypted, yt-dlp will ask interactively
 327     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 328                        (Only supported by some extractors)
 329     enable_file_urls:  Enable file:// URLs. This is disabled by default for security reasons.
 330     http_headers:      A dictionary of custom headers to be used for all requests
 331     proxy:             URL of the proxy server to use
 332     geo_verification_proxy:  URL of the proxy to use for IP address verification
 333                        on geo-restricted sites.
 334     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 335     bidi_workaround:   Work around buggy terminals without bidirectional text
 336                        support, using fridibi
 337     debug_printtraffic:Print out sent and received HTTP traffic
 338     default_search:    Prepend this string if an input url is not valid.
 339                        'auto' for elaborate guessing
 340     encoding:          Use this encoding instead of the system-specified.
 341     extract_flat:      Whether to resolve and process url_results further
 342                        * False:     Always process. Default for API
 343                        * True:      Never process
 344                        * 'in_playlist': Do not process inside playlist/multi_video
 345                        * 'discard': Always process, but don't return the result
 346                                     from inside playlist/multi_video
 347                        * 'discard_in_playlist': Same as "discard", but only for
 348                                     playlists (not multi_video). Default for CLI
 349     wait_for_video:    If given, wait for scheduled streams to become available.
 350                        The value should be a tuple containing the range
 351                        (min_secs, max_secs) to wait between retries
 352     postprocessors:    A list of dictionaries, each with an entry
 353                        * key:  The name of the postprocessor. See
 354                                yt_dlp/postprocessor/__init__.py for a list.
 355                        * when: When to run the postprocessor. Allowed values are
 356                                the entries of utils.POSTPROCESS_WHEN
 357                                Assumed to be 'post_process' if not given
 358     progress_hooks:    A list of functions that get called on download
 359                        progress, with a dictionary with the entries
 360                        * status: One of "downloading", "error", or "finished".
 361                                  Check this first and ignore unknown values.
 362                        * info_dict: The extracted info_dict
 363
 364                        If status is one of "downloading", or "finished", the
 365                        following properties may also be present:
 366                        * filename: The final filename (always present)
 367                        * tmpfilename: The filename we're currently writing to
 368                        * downloaded_bytes: Bytes on disk
 369                        * total_bytes: Size of the whole file, None if unknown
 370                        * total_bytes_estimate: Guess of the eventual file size,
 371                                                None if unavailable.
 372                        * elapsed: The number of seconds since download started.
 373                        * eta: The estimated time in seconds, None if unknown
 374                        * speed: The download speed in bytes/second, None if
 375                                 unknown
 376                        * fragment_index: The counter of the currently
 377                                          downloaded video fragment.
 378                        * fragment_count: The number of fragments (= individual
 379                                          files that will be merged)
 380
 381                        Progress hooks are guaranteed to be called at least once
 382                        (with status "finished") if the download is successful.
 383     postprocessor_hooks:  A list of functions that get called on postprocessing
 384                        progress, with a dictionary with the entries
 385                        * status: One of "started", "processing", or "finished".
 386                                  Check this first and ignore unknown values.
 387                        * postprocessor: Name of the postprocessor
 388                        * info_dict: The extracted info_dict
 389
 390                        Progress hooks are guaranteed to be called at least twice
 391                        (with status "started" and "finished") if the processing is successful.
 392     merge_output_format: "/" separated list of extensions to use when merging formats.
 393     final_ext:         Expected final extension; used to detect when the file was
 394                        already downloaded and converted
 395     fixup:             Automatically correct known faults of the file.
 396                        One of:
 397                        - "never": do nothing
 398                        - "warn": only emit a warning
 399                        - "detect_or_warn": check whether we can do anything
 400                                            about it, warn otherwise (default)
 401     source_address:    Client-side IP address to bind to.
 402     sleep_interval_requests: Number of seconds to sleep between requests
 403                        during extraction
 404     sleep_interval:    Number of seconds to sleep before each download when
 405                        used alone or a lower bound of a range for randomized
 406                        sleep before each download (minimum possible number
 407                        of seconds to sleep) when used along with
 408                        max_sleep_interval.
 409     max_sleep_interval:Upper bound of a range for randomized sleep before each
 410                        download (maximum possible number of seconds to sleep).
 411                        Must only be used along with sleep_interval.
 412                        Actual sleep time will be a random float from range
 413                        [sleep_interval; max_sleep_interval].
 414     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 415     listformats:       Print an overview of available video formats and exit.
 416     list_thumbnails:   Print a table of all thumbnails and exit.
 417     match_filter:      A function that gets called for every video with the signature
 418                        (info_dict, *, incomplete: bool) -> Optional[str]
 419                        For backward compatibility with youtube-dl, the signature
 420                        (info_dict) -> Optional[str] is also allowed.
 421                        - If it returns a message, the video is ignored.
 422                        - If it returns None, the video is downloaded.
 423                        - If it returns utils.NO_DEFAULT, the user is interactively
 424                          asked whether to download the video.
 425                        - Raise utils.DownloadCancelled(msg) to abort remaining
 426                          downloads when a video is rejected.
 427                        match_filter_func in utils.py is one example for this.
 428     color:             A Dictionary with output stream names as keys
 429                        and their respective color policy as values.
 430                        Can also just be a single color policy,
 431                        in which case it applies to all outputs.
 432                        Valid stream names are 'stdout' and 'stderr'.
 433                        Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
 434     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 435                        HTTP header
 436     geo_bypass_country:
 437                        Two-letter ISO 3166-2 country code that will be used for
 438                        explicit geographic restriction bypassing via faking
 439                        X-Forwarded-For HTTP header
 440     geo_bypass_ip_block:
 441                        IP range in CIDR notation that will be used similarly to
 442                        geo_bypass_country
 443     external_downloader: A dictionary of protocol keys and the executable of the
 444                        external downloader to use for it. The allowed protocols
 445                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 446                        Set the value to 'native' to use the native downloader
 447     compat_opts:       Compatibility options. See "Differences in default behavior".
 448                        The following options do not work when used through the API:
 449                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 450                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 451                        Refer __init__.py for their implementation
 452     progress_template: Dictionary of templates for progress outputs.
 453                        Allowed keys are 'download', 'postprocess',
 454                        'download-title' (console title) and 'postprocess-title'.
 455                        The template is mapped on a dictionary with keys 'progress' and 'info'
 456     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 457                        as argument and returns the time to sleep in seconds.
 458                        Allowed keys are 'http', 'fragment', 'file_access'
 459     download_ranges:   A callback function that gets called for every video with
 460                        the signature (info_dict, ydl) -> Iterable[Section].
 461                        Only the returned sections will be downloaded.
 462                        Each Section is a dict with the following keys:
 463                        * start_time: Start time of the section in seconds
 464                        * end_time: End time of the section in seconds
 465                        * title: Section title (Optional)
 466                        * index: Section number (Optional)
 467     force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
 468     noprogress:        Do not print the progress bar
 469     live_from_start:   Whether to download livestreams videos from the start
 470
 471     The following parameters are not used by YoutubeDL itself, they are used by
 472     the downloader (see yt_dlp/downloader/common.py):
 473     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 474     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 475     continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 476     external_downloader_args, concurrent_fragment_downloads.
 477
 478     The following options are used by the post processors:
 479     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 480                        to the binary or its containing directory.
 481     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 482                        and a list of additional command-line arguments for the
 483                        postprocessor/executable. The dict can also have "PP+EXE" keys
 484                        which are used when the given exe is used by the given PP.
 485                        Use 'default' as the name for arguments to passed to all PP
 486                        For compatibility with youtube-dl, a single list of args
 487                        can also be used
 488
 489     The following options are used by the extractors:
 490     extractor_retries: Number of times to retry for known errors (default: 3)
 491     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 492     hls_split_discontinuity: Split HLS playlists to different formats at
 493                        discontinuities such as ad breaks (default: False)
 494     extractor_args:    A dictionary of arguments to be passed to the extractors.
 495                        See "EXTRACTOR ARGUMENTS" for details.
 496                        E.g. {'youtube': {'skip': ['dash', 'hls']}}
 497     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 498
 499     The following options are deprecated and may be removed in the future:
 500
 501     break_on_reject:   Stop the download process when encountering a video that
 502                        has been filtered out.
 503                        - `raise DownloadCancelled(msg)` in match_filter instead
 504     force_generic_extractor: Force downloader to use the generic extractor
 505                        - Use allowed_extractors = ['generic', 'default']
 506     playliststart:     - Use playlist_items
 507                        Playlist item to start at.
 508     playlistend:       - Use playlist_items
 509                        Playlist item to end at.
 510     playlistreverse:   - Use playlist_items
 511                        Download playlist items in reverse order.
 512     forceurl:          - Use forceprint
 513                        Force printing final URL.
 514     forcetitle:        - Use forceprint
 515                        Force printing title.
 516     forceid:           - Use forceprint
 517                        Force printing ID.
 518     forcethumbnail:    - Use forceprint
 519                        Force printing thumbnail URL.
 520     forcedescription:  - Use forceprint
 521                        Force printing description.
 522     forcefilename:     - Use forceprint
 523                        Force printing final filename.
 524     forceduration:     - Use forceprint
 525                        Force printing duration.
 526     allsubtitles:      - Use subtitleslangs = ['all']
 527                        Downloads all the subtitles of the video
 528                        (requires writesubtitles or writeautomaticsub)
 529     include_ads:       - Doesn't work
 530                        Download ads as well
 531     call_home:         - Not implemented
 532                        Boolean, true iff we are allowed to contact the
 533                        yt-dlp servers for debugging.
 534     post_hooks:        - Register a custom postprocessor
 535                        A list of functions that get called as the final step
 536                        for each video file, after all postprocessors have been
 537                        called. The filename will be passed as the only argument.
 538     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 539                        Use the native HLS downloader instead of ffmpeg/avconv
 540                        if True, otherwise use ffmpeg/avconv if False, otherwise
 541                        use downloader suggested by extractor if None.
 542     prefer_ffmpeg:     - avconv support is deprecated
 543                        If False, use avconv instead of ffmpeg if both are available,
 544                        otherwise prefer ffmpeg.
 545     youtube_include_dash_manifest: - Use extractor_args
 546                        If True (default), DASH manifests and related
 547                        data will be downloaded and processed by extractor.
 548                        You can reduce network I/O by disabling it if you don't
 549                        care about DASH. (only for youtube)
 550     youtube_include_hls_manifest: - Use extractor_args
 551                        If True (default), HLS manifests and related
 552                        data will be downloaded and processed by extractor.
 553                        You can reduce network I/O by disabling it if you don't
 554                        care about HLS. (only for youtube)
 555     no_color:          Same as `color='no_color'`
 556     """
 557
 558     _NUMERIC_FIELDS = {
 559         'width', 'height', 'asr', 'audio_channels', 'fps',
 560         'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
 561         'timestamp', 'release_timestamp',
 562         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 563         'average_rating', 'comment_count', 'age_limit',
 564         'start_time', 'end_time',
 565         'chapter_number', 'season_number', 'episode_number',
 566         'track_number', 'disc_number', 'release_year',
 567     }
 568
 569     _format_fields = {
 570         # NB: Keep in sync with the docstring of extractor/common.py
 571         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 572         'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
 573         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
 574         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 575         'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
 576         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
 577         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 578     }
 579     _format_selection_exts = {
 580         'audio': set(MEDIA_EXTENSIONS.common_audio),
 581         'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
 582         'storyboards': set(MEDIA_EXTENSIONS.storyboards),
 583     }
 584
 585     def __init__(self, params=None, auto_init=True):
 586         """Create a FileDownloader object with the given options.
 587         @param auto_init    Whether to load the default extractors and print header (if verbose).
 588                             Set to 'no_verbose_header' to not print the header
 589         """
 590         if params is None:
 591             params = {}
 592         self.params = params
 593         self._ies = {}
 594         self._ies_instances = {}
 595         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 596         self._printed_messages = set()
 597         self._first_webpage_request = True
 598         self._post_hooks = []
 599         self._progress_hooks = []
 600         self._postprocessor_hooks = []
 601         self._download_retcode = 0
 602         self._num_downloads = 0
 603         self._num_videos = 0
 604         self._playlist_level = 0
 605         self._playlist_urls = set()
 606         self.cache = Cache(self)
 607
 608         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 609         self._out_files = Namespace(
 610             out=stdout,
 611             error=sys.stderr,
 612             screen=sys.stderr if self.params.get('quiet') else stdout,
 613             console=None if compat_os_name == 'nt' else next(
 614                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 615         )
 616
 617         try:
 618             windows_enable_vt_mode()
 619         except Exception as e:
 620             self.write_debug(f'Failed to enable VT mode: {e}')
 621
 622         if self.params.get('no_color'):
 623             if self.params.get('color') is not None:
 624                 self.params.setdefault('_warnings', []).append(
 625                     'Overwriting params from "color" with "no_color"')
 626             self.params['color'] = 'no_color'
 627
 628         term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
 629
 630         def process_color_policy(stream):
 631             stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
 632             policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
 633             if policy in ('auto', None):
 634                 return term_allow_color and supports_terminal_sequences(stream)
 635             assert policy in ('always', 'never', 'no_color')
 636             return {'always': True, 'never': False}.get(policy, policy)
 637
 638         self._allow_colors = Namespace(**{
 639             name: process_color_policy(stream)
 640             for name, stream in self._out_files.items_ if name != 'console'
 641         })
 642
 643         # The code is left like this to be reused for future deprecations
 644         MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
 645         current_version = sys.version_info[:2]
 646         if current_version < MIN_RECOMMENDED:
 647             msg = ('Support for Python version %d.%d has been deprecated. '
 648                    'See  https://github.com/yt-dlp/yt-dlp/issues/3764  for more details.'
 649                    '\n                    You will no longer receive updates on this version')
 650             if current_version < MIN_SUPPORTED:
 651                 msg = 'Python version %d.%d is no longer supported'
 652             self.deprecated_feature(
 653                 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
 654
 655         if self.params.get('allow_unplayable_formats'):
 656             self.report_warning(
 657                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 658                 'This is a developer option intended for debugging. \n'
 659                 '         If you experience any issues while using this option, '
 660                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 661
 662         if self.params.get('bidi_workaround', False):
 663             try:
 664                 import pty
 665                 master, slave = pty.openpty()
 666                 width = shutil.get_terminal_size().columns
 667                 width_args = [] if width is None else ['-w', str(width)]
 668                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 669                 try:
 670                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 671                 except OSError:
 672                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 673                 self._output_channel = os.fdopen(master, 'rb')
 674             except OSError as ose:
 675                 if ose.errno == errno.ENOENT:
 676                     self.report_warning(
 677                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 678                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 679                 else:
 680                     raise
 681
 682         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 683         self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
 684         self.__header_cookies = []
 685         self._load_cookies(self.params['http_headers'].get('Cookie'))  # compat
 686         self.params['http_headers'].pop('Cookie', None)
 687
 688         self._request_director = self.build_request_director(
 689             sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
 690         if auto_init and auto_init != 'no_verbose_header':
 691             self.print_debug_header()
 692
 693         def check_deprecated(param, option, suggestion):
 694             if self.params.get(param) is not None:
 695                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 696                 return True
 697             return False
 698
 699         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 700             if self.params.get('geo_verification_proxy') is None:
 701                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 702
 703         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 704         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 705         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 706
 707         for msg in self.params.get('_warnings', []):
 708             self.report_warning(msg)
 709         for msg in self.params.get('_deprecation_warnings', []):
 710             self.deprecated_feature(msg)
 711
 712         if 'list-formats' in self.params['compat_opts']:
 713             self.params['listformats_table'] = False
 714
 715         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 716             # nooverwrites was unnecessarily changed to overwrites
 717             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 718             # This ensures compatibility with both keys
 719             self.params['overwrites'] = not self.params['nooverwrites']
 720         elif self.params.get('overwrites') is None:
 721             self.params.pop('overwrites', None)
 722         else:
 723             self.params['nooverwrites'] = not self.params['overwrites']
 724
 725         if self.params.get('simulate') is None and any((
 726             self.params.get('list_thumbnails'),
 727             self.params.get('listformats'),
 728             self.params.get('listsubtitles'),
 729         )):
 730             self.params['simulate'] = 'list_only'
 731
 732         self.params.setdefault('forceprint', {})
 733         self.params.setdefault('print_to_file', {})
 734
 735         # Compatibility with older syntax
 736         if not isinstance(params['forceprint'], dict):
 737             self.params['forceprint'] = {'video': params['forceprint']}
 738
 739         if auto_init:
 740             self.add_default_info_extractors()
 741
 742         if (sys.platform != 'win32'
 743                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 744                 and not self.params.get('restrictfilenames', False)):
 745             # Unicode filesystem API will throw errors (#1474, #13027)
 746             self.report_warning(
 747                 'Assuming --restrict-filenames since file system encoding '
 748                 'cannot encode all characters. '
 749                 'Set the LC_ALL environment variable to fix this.')
 750             self.params['restrictfilenames'] = True
 751
 752         self._parse_outtmpl()
 753
 754         # Creating format selector here allows us to catch syntax errors before the extraction
 755         self.format_selector = (
 756             self.params.get('format') if self.params.get('format') in (None, '-')
 757             else self.params['format'] if callable(self.params['format'])
 758             else self.build_format_selector(self.params['format']))
 759
 760         hooks = {
 761             'post_hooks': self.add_post_hook,
 762             'progress_hooks': self.add_progress_hook,
 763             'postprocessor_hooks': self.add_postprocessor_hook,
 764         }
 765         for opt, fn in hooks.items():
 766             for ph in self.params.get(opt, []):
 767                 fn(ph)
 768
 769         for pp_def_raw in self.params.get('postprocessors', []):
 770             pp_def = dict(pp_def_raw)
 771             when = pp_def.pop('when', 'post_process')
 772             self.add_post_processor(
 773                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 774                 when=when)
 775
 776         def preload_download_archive(fn):
 777             """Preload the archive, if any is specified"""
 778             archive = set()
 779             if fn is None:
 780                 return archive
 781             elif not is_path_like(fn):
 782                 return fn
 783
 784             self.write_debug(f'Loading archive file {fn!r}')
 785             try:
 786                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 787                     for line in archive_file:
 788                         archive.add(line.strip())
 789             except OSError as ioe:
 790                 if ioe.errno != errno.ENOENT:
 791                     raise
 792             return archive
 793
 794         self.archive = preload_download_archive(self.params.get('download_archive'))
 795
 796     def warn_if_short_id(self, argv):
 797         # short YouTube ID starting with dash?
 798         idxs = [
 799             i for i, a in enumerate(argv)
 800             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 801         if idxs:
 802             correct_argv = (
 803                 ['yt-dlp']
 804                 + [a for i, a in enumerate(argv) if i not in idxs]
 805                 + ['--'] + [argv[i] for i in idxs]
 806             )
 807             self.report_warning(
 808                 'Long argument string detected. '
 809                 'Use -- to separate parameters and URLs, like this:\n%s' %
 810                 args_to_str(correct_argv))
 811
 812     def add_info_extractor(self, ie):
 813         """Add an InfoExtractor object to the end of the list."""
 814         ie_key = ie.ie_key()
 815         self._ies[ie_key] = ie
 816         if not isinstance(ie, type):
 817             self._ies_instances[ie_key] = ie
 818             ie.set_downloader(self)
 819
 820     def get_info_extractor(self, ie_key):
 821         """
 822         Get an instance of an IE with name ie_key, it will try to get one from
 823         the _ies list, if there's no instance it will create a new one and add
 824         it to the extractor list.
 825         """
 826         ie = self._ies_instances.get(ie_key)
 827         if ie is None:
 828             ie = get_info_extractor(ie_key)()
 829             self.add_info_extractor(ie)
 830         return ie
 831
 832     def add_default_info_extractors(self):
 833         """
 834         Add the InfoExtractors returned by gen_extractors to the end of the list
 835         """
 836         all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
 837         all_ies['end'] = UnsupportedURLIE()
 838         try:
 839             ie_names = orderedSet_from_options(
 840                 self.params.get('allowed_extractors', ['default']), {
 841                     'all': list(all_ies),
 842                     'default': [name for name, ie in all_ies.items() if ie._ENABLED],
 843                 }, use_regex=True)
 844         except re.error as e:
 845             raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
 846         for name in ie_names:
 847             self.add_info_extractor(all_ies[name])
 848         self.write_debug(f'Loaded {len(ie_names)} extractors')
 849
 850     def add_post_processor(self, pp, when='post_process'):
 851         """Add a PostProcessor object to the end of the chain."""
 852         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 853         self._pps[when].append(pp)
 854         pp.set_downloader(self)
 855
 856     def add_post_hook(self, ph):
 857         """Add the post hook"""
 858         self._post_hooks.append(ph)
 859
 860     def add_progress_hook(self, ph):
 861         """Add the download progress hook"""
 862         self._progress_hooks.append(ph)
 863
 864     def add_postprocessor_hook(self, ph):
 865         """Add the postprocessing progress hook"""
 866         self._postprocessor_hooks.append(ph)
 867         for pps in self._pps.values():
 868             for pp in pps:
 869                 pp.add_progress_hook(ph)
 870
 871     def _bidi_workaround(self, message):
 872         if not hasattr(self, '_output_channel'):
 873             return message
 874
 875         assert hasattr(self, '_output_process')
 876         assert isinstance(message, str)
 877         line_count = message.count('\n') + 1
 878         self._output_process.stdin.write((message + '\n').encode())
 879         self._output_process.stdin.flush()
 880         res = ''.join(self._output_channel.readline().decode()
 881                       for _ in range(line_count))
 882         return res[:-len('\n')]
 883
 884     def _write_string(self, message, out=None, only_once=False):
 885         if only_once:
 886             if message in self._printed_messages:
 887                 return
 888             self._printed_messages.add(message)
 889         write_string(message, out=out, encoding=self.params.get('encoding'))
 890
 891     def to_stdout(self, message, skip_eol=False, quiet=None):
 892         """Print message to stdout"""
 893         if quiet is not None:
 894             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
 895                                      'Use "YoutubeDL.to_screen" instead')
 896         if skip_eol is not False:
 897             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
 898                                      'Use "YoutubeDL.to_screen" instead')
 899         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 900
 901     def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
 902         """Print message to screen if not in quiet mode"""
 903         if self.params.get('logger'):
 904             self.params['logger'].debug(message)
 905             return
 906         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 907             return
 908         self._write_string(
 909             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 910             self._out_files.screen, only_once=only_once)
 911
 912     def to_stderr(self, message, only_once=False):
 913         """Print message to stderr"""
 914         assert isinstance(message, str)
 915         if self.params.get('logger'):
 916             self.params['logger'].error(message)
 917         else:
 918             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 919
 920     def _send_console_code(self, code):
 921         if compat_os_name == 'nt' or not self._out_files.console:
 922             return
 923         self._write_string(code, self._out_files.console)
 924
 925     def to_console_title(self, message):
 926         if not self.params.get('consoletitle', False):
 927             return
 928         message = remove_terminal_sequences(message)
 929         if compat_os_name == 'nt':
 930             if ctypes.windll.kernel32.GetConsoleWindow():
 931                 # c_wchar_p() might not be necessary if `message` is
 932                 # already of type unicode()
 933                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 934         else:
 935             self._send_console_code(f'\033]0;{message}\007')
 936
 937     def save_console_title(self):
 938         if not self.params.get('consoletitle') or self.params.get('simulate'):
 939             return
 940         self._send_console_code('\033[22;0t')  # Save the title on stack
 941
 942     def restore_console_title(self):
 943         if not self.params.get('consoletitle') or self.params.get('simulate'):
 944             return
 945         self._send_console_code('\033[23;0t')  # Restore the title from stack
 946
 947     def __enter__(self):
 948         self.save_console_title()
 949         return self
 950
 951     def save_cookies(self):
 952         if self.params.get('cookiefile') is not None:
 953             self.cookiejar.save()
 954
 955     def __exit__(self, *args):
 956         self.restore_console_title()
 957         self.close()
 958
 959     def close(self):
 960         self.save_cookies()
 961         self._request_director.close()
 962
 963     def trouble(self, message=None, tb=None, is_error=True):
 964         """Determine action to take when a download problem appears.
 965
 966         Depending on if the downloader has been configured to ignore
 967         download errors or not, this method may throw an exception or
 968         not when errors are found, after printing the message.
 969
 970         @param tb          If given, is additional traceback information
 971         @param is_error    Whether to raise error according to ignorerrors
 972         """
 973         if message is not None:
 974             self.to_stderr(message)
 975         if self.params.get('verbose'):
 976             if tb is None:
 977                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 978                     tb = ''
 979                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 980                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 981                     tb += encode_compat_str(traceback.format_exc())
 982                 else:
 983                     tb_data = traceback.format_list(traceback.extract_stack())
 984                     tb = ''.join(tb_data)
 985             if tb:
 986                 self.to_stderr(tb)
 987         if not is_error:
 988             return
 989         if not self.params.get('ignoreerrors'):
 990             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 991                 exc_info = sys.exc_info()[1].exc_info
 992             else:
 993                 exc_info = sys.exc_info()
 994             raise DownloadError(message, exc_info)
 995         self._download_retcode = 1
 996
 997     Styles = Namespace(
 998         HEADERS='yellow',
 999         EMPHASIS='light blue',
1000         FILENAME='green',
1001         ID='green',
1002         DELIM='blue',
1003         ERROR='red',
1004         BAD_FORMAT='light red',
1005         WARNING='yellow',
1006         SUPPRESS='light black',
1007     )
1008
1009     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1010         text = str(text)
1011         if test_encoding:
1012             original_text = text
1013             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1014             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1015             text = text.encode(encoding, 'ignore').decode(encoding)
1016             if fallback is not None and text != original_text:
1017                 text = fallback
1018         return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1019
1020     def _format_out(self, *args, **kwargs):
1021         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1022
1023     def _format_screen(self, *args, **kwargs):
1024         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1025
1026     def _format_err(self, *args, **kwargs):
1027         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1028
1029     def report_warning(self, message, only_once=False):
1030         '''
1031         Print the message to stderr, it will be prefixed with 'WARNING:'
1032         If stderr is a tty file the 'WARNING:' will be colored
1033         '''
1034         if self.params.get('logger') is not None:
1035             self.params['logger'].warning(message)
1036         else:
1037             if self.params.get('no_warnings'):
1038                 return
1039             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1040
1041     def deprecation_warning(self, message, *, stacklevel=0):
1042         deprecation_warning(
1043             message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1044
1045     def deprecated_feature(self, message):
1046         if self.params.get('logger') is not None:
1047             self.params['logger'].warning(f'Deprecated Feature: {message}')
1048         self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1049
1050     def report_error(self, message, *args, **kwargs):
1051         '''
1052         Do the same as trouble, but prefixes the message with 'ERROR:', colored
1053         in red if stderr is a tty file.
1054         '''
1055         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1056
1057     def write_debug(self, message, only_once=False):
1058         '''Log debug message or Print message to stderr'''
1059         if not self.params.get('verbose', False):
1060             return
1061         message = f'[debug] {message}'
1062         if self.params.get('logger'):
1063             self.params['logger'].debug(message)
1064         else:
1065             self.to_stderr(message, only_once)
1066
1067     def report_file_already_downloaded(self, file_name):
1068         """Report file has already been fully downloaded."""
1069         try:
1070             self.to_screen('[download] %s has already been downloaded' % file_name)
1071         except UnicodeEncodeError:
1072             self.to_screen('[download] The file has already been downloaded')
1073
1074     def report_file_delete(self, file_name):
1075         """Report that existing file will be deleted."""
1076         try:
1077             self.to_screen('Deleting existing file %s' % file_name)
1078         except UnicodeEncodeError:
1079             self.to_screen('Deleting existing file')
1080
1081     def raise_no_formats(self, info, forced=False, *, msg=None):
1082         has_drm = info.get('_has_drm')
1083         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1084         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1085         if forced or not ignored:
1086             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1087                                  expected=has_drm or ignored or expected)
1088         else:
1089             self.report_warning(msg)
1090
1091     def parse_outtmpl(self):
1092         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1093         self._parse_outtmpl()
1094         return self.params['outtmpl']
1095
1096     def _parse_outtmpl(self):
1097         sanitize = IDENTITY
1098         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1099             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1100
1101         outtmpl = self.params.setdefault('outtmpl', {})
1102         if not isinstance(outtmpl, dict):
1103             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1104         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1105
1106     def get_output_path(self, dir_type='', filename=None):
1107         paths = self.params.get('paths', {})
1108         assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1109         path = os.path.join(
1110             expand_path(paths.get('home', '').strip()),
1111             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1112             filename or '')
1113         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1114
1115     @staticmethod
1116     def _outtmpl_expandpath(outtmpl):
1117         # expand_path translates '%%' into '%' and '$$' into '$'
1118         # correspondingly that is not what we want since we need to keep
1119         # '%%' intact for template dict substitution step. Working around
1120         # with boundary-alike separator hack.
1121         sep = ''.join(random.choices(string.ascii_letters, k=32))
1122         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1123
1124         # outtmpl should be expand_path'ed before template dict substitution
1125         # because meta fields may contain env variables we don't want to
1126         # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1127         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1128         return expand_path(outtmpl).replace(sep, '')
1129
1130     @staticmethod
1131     def escape_outtmpl(outtmpl):
1132         ''' Escape any remaining strings like %s, %abc% etc. '''
1133         return re.sub(
1134             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1135             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1136             outtmpl)
1137
1138     @classmethod
1139     def validate_outtmpl(cls, outtmpl):
1140         ''' @return None or Exception object '''
1141         outtmpl = re.sub(
1142             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1143             lambda mobj: f'{mobj.group(0)[:-1]}s',
1144             cls._outtmpl_expandpath(outtmpl))
1145         try:
1146             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1147             return None
1148         except ValueError as err:
1149             return err
1150
1151     @staticmethod
1152     def _copy_infodict(info_dict):
1153         info_dict = dict(info_dict)
1154         info_dict.pop('__postprocessors', None)
1155         info_dict.pop('__pending_error', None)
1156         return info_dict
1157
1158     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1159         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1160         @param sanitize    Whether to sanitize the output as a filename.
1161                            For backward compatibility, a function can also be passed
1162         """
1163
1164         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1165
1166         info_dict = self._copy_infodict(info_dict)
1167         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1168             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1169             if info_dict.get('duration', None) is not None
1170             else None)
1171         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1172         info_dict['video_autonumber'] = self._num_videos
1173         if info_dict.get('resolution') is None:
1174             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1175
1176         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1177         # of %(field)s to %(field)0Nd for backward compatibility
1178         field_size_compat_map = {
1179             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1180             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1181             'autonumber': self.params.get('autonumber_size') or 5,
1182         }
1183
1184         TMPL_DICT = {}
1185         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1186         MATH_FUNCTIONS = {
1187             '+': float.__add__,
1188             '-': float.__sub__,
1189         }
1190         # Field is of the form key1.key2...
1191         # where keys (except first) can be string, int, slice or "{field, ...}"
1192         FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1193         FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1194             'inner': FIELD_INNER_RE,
1195             'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1196         }
1197         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1198         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1199         INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1200             (?P<negate>-)?
1201             (?P<fields>{FIELD_RE})
1202             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1203             (?:>(?P<strf_format>.+?))?
1204             (?P<remaining>
1205                 (?P<alternate>(?<!\\),[^|&)]+)?
1206                 (?:&(?P<replacement>.*?))?
1207                 (?:\|(?P<default>.*?))?
1208             )$''')
1209
1210         def _traverse_infodict(fields):
1211             fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1212                       for f in ([x] if x.startswith('{') else x.split('.'))]
1213             for i in (0, -1):
1214                 if fields and not fields[i]:
1215                     fields.pop(i)
1216
1217             for i, f in enumerate(fields):
1218                 if not f.startswith('{'):
1219                     continue
1220                 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1221                 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1222
1223             return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1224
1225         def get_value(mdict):
1226             # Object traversal
1227             value = _traverse_infodict(mdict['fields'])
1228             # Negative
1229             if mdict['negate']:
1230                 value = float_or_none(value)
1231                 if value is not None:
1232                     value *= -1
1233             # Do maths
1234             offset_key = mdict['maths']
1235             if offset_key:
1236                 value = float_or_none(value)
1237                 operator = None
1238                 while offset_key:
1239                     item = re.match(
1240                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1241                         offset_key).group(0)
1242                     offset_key = offset_key[len(item):]
1243                     if operator is None:
1244                         operator = MATH_FUNCTIONS[item]
1245                         continue
1246                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1247                     offset = float_or_none(item)
1248                     if offset is None:
1249                         offset = float_or_none(_traverse_infodict(item))
1250                     try:
1251                         value = operator(value, multiplier * offset)
1252                     except (TypeError, ZeroDivisionError):
1253                         return None
1254                     operator = None
1255             # Datetime formatting
1256             if mdict['strf_format']:
1257                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1258
1259             # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1260             if sanitize and value == '':
1261                 value = None
1262             return value
1263
1264         na = self.params.get('outtmpl_na_placeholder', 'NA')
1265
1266         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1267             return sanitize_filename(str(value), restricted=restricted, is_id=(
1268                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1269                 if 'filename-sanitization' in self.params['compat_opts']
1270                 else NO_DEFAULT))
1271
1272         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1273         sanitize = bool(sanitize)
1274
1275         def _dumpjson_default(obj):
1276             if isinstance(obj, (set, LazyList)):
1277                 return list(obj)
1278             return repr(obj)
1279
1280         class _ReplacementFormatter(string.Formatter):
1281             def get_field(self, field_name, args, kwargs):
1282                 if field_name.isdigit():
1283                     return args[0], -1
1284                 raise ValueError('Unsupported field')
1285
1286         replacement_formatter = _ReplacementFormatter()
1287
1288         def create_key(outer_mobj):
1289             if not outer_mobj.group('has_key'):
1290                 return outer_mobj.group(0)
1291             key = outer_mobj.group('key')
1292             mobj = re.match(INTERNAL_FORMAT_RE, key)
1293             value, replacement, default, last_field = None, None, na, ''
1294             while mobj:
1295                 mobj = mobj.groupdict()
1296                 default = mobj['default'] if mobj['default'] is not None else default
1297                 value = get_value(mobj)
1298                 last_field, replacement = mobj['fields'], mobj['replacement']
1299                 if value is None and mobj['alternate']:
1300                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1301                 else:
1302                     break
1303
1304             if None not in (value, replacement):
1305                 try:
1306                     value = replacement_formatter.format(replacement, value)
1307                 except ValueError:
1308                     value, default = None, na
1309
1310             fmt = outer_mobj.group('format')
1311             if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
1312                 fmt = f'0{field_size_compat_map[last_field]:d}d'
1313
1314             flags = outer_mobj.group('conversion') or ''
1315             str_fmt = f'{fmt[:-1]}s'
1316             if value is None:
1317                 value, fmt = default, 's'
1318             elif fmt[-1] == 'l':  # list
1319                 delim = '\n' if '#' in flags else ', '
1320                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1321             elif fmt[-1] == 'j':  # json
1322                 value, fmt = json.dumps(
1323                     value, default=_dumpjson_default,
1324                     indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1325             elif fmt[-1] == 'h':  # html
1326                 value, fmt = escapeHTML(str(value)), str_fmt
1327             elif fmt[-1] == 'q':  # quoted
1328                 value = map(str, variadic(value) if '#' in flags else [value])
1329                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1330             elif fmt[-1] == 'B':  # bytes
1331                 value = f'%{str_fmt}'.encode() % str(value).encode()
1332                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1333             elif fmt[-1] == 'U':  # unicode normalized
1334                 value, fmt = unicodedata.normalize(
1335                     # "+" = compatibility equivalence, "#" = NFD
1336                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1337                     value), str_fmt
1338             elif fmt[-1] == 'D':  # decimal suffix
1339                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1340                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1341                                               factor=1024 if '#' in flags else 1000)
1342             elif fmt[-1] == 'S':  # filename sanitization
1343                 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1344             elif fmt[-1] == 'c':
1345                 if value:
1346                     value = str(value)[0]
1347                 else:
1348                     fmt = str_fmt
1349             elif fmt[-1] not in 'rsa':  # numeric
1350                 value = float_or_none(value)
1351                 if value is None:
1352                     value, fmt = default, 's'
1353
1354             if sanitize:
1355                 # If value is an object, sanitize might convert it to a string
1356                 # So we convert it to repr first
1357                 if fmt[-1] == 'r':
1358                     value, fmt = repr(value), str_fmt
1359                 elif fmt[-1] == 'a':
1360                     value, fmt = ascii(value), str_fmt
1361                 if fmt[-1] in 'csra':
1362                     value = sanitizer(last_field, value)
1363
1364             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1365             TMPL_DICT[key] = value
1366             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1367
1368         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1369
1370     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1371         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1372         return self.escape_outtmpl(outtmpl) % info_dict
1373
1374     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1375         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1376         if outtmpl is None:
1377             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1378         try:
1379             outtmpl = self._outtmpl_expandpath(outtmpl)
1380             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1381             if not filename:
1382                 return None
1383
1384             if tmpl_type in ('', 'temp'):
1385                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1386                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1387                     filename = replace_extension(filename, ext, final_ext)
1388             elif tmpl_type:
1389                 force_ext = OUTTMPL_TYPES[tmpl_type]
1390                 if force_ext:
1391                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1392
1393             # https://github.com/blackjack4494/youtube-dlc/issues/85
1394             trim_file_name = self.params.get('trim_file_name', False)
1395             if trim_file_name:
1396                 no_ext, *ext = filename.rsplit('.', 2)
1397                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1398
1399             return filename
1400         except ValueError as err:
1401             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1402             return None
1403
1404     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1405         """Generate the output filename"""
1406         if outtmpl:
1407             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1408             dir_type = None
1409         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1410         if not filename and dir_type not in ('', 'temp'):
1411             return ''
1412
1413         if warn:
1414             if not self.params.get('paths'):
1415                 pass
1416             elif filename == '-':
1417                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1418             elif os.path.isabs(filename):
1419                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1420         if filename == '-' or not filename:
1421             return filename
1422
1423         return self.get_output_path(dir_type, filename)
1424
1425     def _match_entry(self, info_dict, incomplete=False, silent=False):
1426         """Returns None if the file should be downloaded"""
1427         _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1428         assert incomplete or _type == 'video', 'Only video result can be considered complete'
1429
1430         video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1431
1432         def check_filter():
1433             if _type in ('playlist', 'multi_video'):
1434                 return
1435             elif _type in ('url', 'url_transparent') and not try_call(
1436                     lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1437                 return
1438
1439             if 'title' in info_dict:
1440                 # This can happen when we're just evaluating the playlist
1441                 title = info_dict['title']
1442                 matchtitle = self.params.get('matchtitle', False)
1443                 if matchtitle:
1444                     if not re.search(matchtitle, title, re.IGNORECASE):
1445                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1446                 rejecttitle = self.params.get('rejecttitle', False)
1447                 if rejecttitle:
1448                     if re.search(rejecttitle, title, re.IGNORECASE):
1449                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1450
1451             date = info_dict.get('upload_date')
1452             if date is not None:
1453                 dateRange = self.params.get('daterange', DateRange())
1454                 if date not in dateRange:
1455                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1456             view_count = info_dict.get('view_count')
1457             if view_count is not None:
1458                 min_views = self.params.get('min_views')
1459                 if min_views is not None and view_count < min_views:
1460                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1461                 max_views = self.params.get('max_views')
1462                 if max_views is not None and view_count > max_views:
1463                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1464             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1465                 return 'Skipping "%s" because it is age restricted' % video_title
1466
1467             match_filter = self.params.get('match_filter')
1468             if match_filter is None:
1469                 return None
1470
1471             cancelled = None
1472             try:
1473                 try:
1474                     ret = match_filter(info_dict, incomplete=incomplete)
1475                 except TypeError:
1476                     # For backward compatibility
1477                     ret = None if incomplete else match_filter(info_dict)
1478             except DownloadCancelled as err:
1479                 if err.msg is not NO_DEFAULT:
1480                     raise
1481                 ret, cancelled = err.msg, err
1482
1483             if ret is NO_DEFAULT:
1484                 while True:
1485                     filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1486                     reply = input(self._format_screen(
1487                         f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1488                     if reply in {'y', ''}:
1489                         return None
1490                     elif reply == 'n':
1491                         if cancelled:
1492                             raise type(cancelled)(f'Skipping {video_title}')
1493                         return f'Skipping {video_title}'
1494             return ret
1495
1496         if self.in_download_archive(info_dict):
1497             reason = ''.join((
1498                 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1499                 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1500                 'has already been recorded in the archive'))
1501             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1502         else:
1503             try:
1504                 reason = check_filter()
1505             except DownloadCancelled as e:
1506                 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1507             else:
1508                 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1509         if reason is not None:
1510             if not silent:
1511                 self.to_screen('[download] ' + reason)
1512             if self.params.get(break_opt, False):
1513                 raise break_err()
1514         return reason
1515
1516     @staticmethod
1517     def add_extra_info(info_dict, extra_info):
1518         '''Set the keys from extra_info in info dict if they are missing'''
1519         for key, value in extra_info.items():
1520             info_dict.setdefault(key, value)
1521
1522     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1523                      process=True, force_generic_extractor=False):
1524         """
1525         Extract and return the information dictionary of the URL
1526
1527         Arguments:
1528         @param url          URL to extract
1529
1530         Keyword arguments:
1531         @param download     Whether to download videos
1532         @param process      Whether to resolve all unresolved references (URLs, playlist items).
1533                             Must be True for download to work
1534         @param ie_key       Use only the extractor with this key
1535
1536         @param extra_info   Dictionary containing the extra values to add to the info (For internal use only)
1537         @force_generic_extractor  Force using the generic extractor (Deprecated; use ie_key='Generic')
1538         """
1539
1540         if extra_info is None:
1541             extra_info = {}
1542
1543         if not ie_key and force_generic_extractor:
1544             ie_key = 'Generic'
1545
1546         if ie_key:
1547             ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1548         else:
1549             ies = self._ies
1550
1551         for key, ie in ies.items():
1552             if not ie.suitable(url):
1553                 continue
1554
1555             if not ie.working():
1556                 self.report_warning('The program functionality for this site has been marked as broken, '
1557                                     'and will probably not work.')
1558
1559             temp_id = ie.get_temp_id(url)
1560             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1561                 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1562                                'has already been recorded in the archive')
1563                 if self.params.get('break_on_existing', False):
1564                     raise ExistingVideoReached()
1565                 break
1566             return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1567         else:
1568             extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1569             self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1570                               tb=False if extractors_restricted else None)
1571
1572     def _handle_extraction_exceptions(func):
1573         @functools.wraps(func)
1574         def wrapper(self, *args, **kwargs):
1575             while True:
1576                 try:
1577                     return func(self, *args, **kwargs)
1578                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1579                     raise
1580                 except ReExtractInfo as e:
1581                     if e.expected:
1582                         self.to_screen(f'{e}; Re-extracting data')
1583                     else:
1584                         self.to_stderr('\r')
1585                         self.report_warning(f'{e}; Re-extracting data')
1586                     continue
1587                 except GeoRestrictedError as e:
1588                     msg = e.msg
1589                     if e.countries:
1590                         msg += '\nThis video is available in %s.' % ', '.join(
1591                             map(ISO3166Utils.short2full, e.countries))
1592                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1593                     self.report_error(msg)
1594                 except ExtractorError as e:  # An error we somewhat expected
1595                     self.report_error(str(e), e.format_traceback())
1596                 except Exception as e:
1597                     if self.params.get('ignoreerrors'):
1598                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1599                     else:
1600                         raise
1601                 break
1602         return wrapper
1603
1604     def _wait_for_video(self, ie_result={}):
1605         if (not self.params.get('wait_for_video')
1606                 or ie_result.get('_type', 'video') != 'video'
1607                 or ie_result.get('formats') or ie_result.get('url')):
1608             return
1609
1610         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1611         last_msg = ''
1612
1613         def progress(msg):
1614             nonlocal last_msg
1615             full_msg = f'{msg}\n'
1616             if not self.params.get('noprogress'):
1617                 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1618             elif last_msg:
1619                 return
1620             self.to_screen(full_msg, skip_eol=True)
1621             last_msg = msg
1622
1623         min_wait, max_wait = self.params.get('wait_for_video')
1624         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1625         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1626             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1627             self.report_warning('Release time of video is not known')
1628         elif ie_result and (diff or 0) <= 0:
1629             self.report_warning('Video should already be available according to extracted info')
1630         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1631         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1632
1633         wait_till = time.time() + diff
1634         try:
1635             while True:
1636                 diff = wait_till - time.time()
1637                 if diff <= 0:
1638                     progress('')
1639                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1640                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1641                 time.sleep(1)
1642         except KeyboardInterrupt:
1643             progress('')
1644             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1645         except BaseException as e:
1646             if not isinstance(e, ReExtractInfo):
1647                 self.to_screen('')
1648             raise
1649
1650     def _load_cookies(self, data, *, autoscope=True):
1651         """Loads cookies from a `Cookie` header
1652
1653         This tries to work around the security vulnerability of passing cookies to every domain.
1654         See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1655
1656         @param data         The Cookie header as string to load the cookies from
1657         @param autoscope    If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1658                             If `True`, save cookies for later to be stored in the jar with a limited scope
1659                             If a URL, save cookies in the jar with the domain of the URL
1660         """
1661         for cookie in LenientSimpleCookie(data).values():
1662             if autoscope and any(cookie.values()):
1663                 raise ValueError('Invalid syntax in Cookie Header')
1664
1665             domain = cookie.get('domain') or ''
1666             expiry = cookie.get('expires')
1667             if expiry == '':  # 0 is valid
1668                 expiry = None
1669             prepared_cookie = http.cookiejar.Cookie(
1670                 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1671                 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1672                 cookie.get('secure') or False, expiry, False, None, None, {})
1673
1674             if domain:
1675                 self.cookiejar.set_cookie(prepared_cookie)
1676             elif autoscope is True:
1677                 self.deprecated_feature(
1678                     'Passing cookies as a header is a potential security risk; '
1679                     'they will be scoped to the domain of the downloaded urls. '
1680                     'Please consider loading cookies from a file or browser instead.')
1681                 self.__header_cookies.append(prepared_cookie)
1682             elif autoscope:
1683                 self.report_warning(
1684                     'The extractor result contains an unscoped cookie as an HTTP header. '
1685                     f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1686                     only_once=True)
1687                 self._apply_header_cookies(autoscope, [prepared_cookie])
1688             else:
1689                 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1690                                   tb=False, is_error=False)
1691
1692     def _apply_header_cookies(self, url, cookies=None):
1693         """Applies stray header cookies to the provided url
1694
1695         This loads header cookies and scopes them to the domain provided in `url`.
1696         While this is not ideal, it helps reduce the risk of them being sent
1697         to an unintended destination while mostly maintaining compatibility.
1698         """
1699         parsed = urllib.parse.urlparse(url)
1700         if not parsed.hostname:
1701             return
1702
1703         for cookie in map(copy.copy, cookies or self.__header_cookies):
1704             cookie.domain = f'.{parsed.hostname}'
1705             self.cookiejar.set_cookie(cookie)
1706
1707     @_handle_extraction_exceptions
1708     def __extract_info(self, url, ie, download, extra_info, process):
1709         self._apply_header_cookies(url)
1710
1711         try:
1712             ie_result = ie.extract(url)
1713         except UserNotLive as e:
1714             if process:
1715                 if self.params.get('wait_for_video'):
1716                     self.report_warning(e)
1717                 self._wait_for_video()
1718             raise
1719         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1720             self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1721             return
1722         if isinstance(ie_result, list):
1723             # Backwards compatibility: old IE result format
1724             ie_result = {
1725                 '_type': 'compat_list',
1726                 'entries': ie_result,
1727             }
1728         if extra_info.get('original_url'):
1729             ie_result.setdefault('original_url', extra_info['original_url'])
1730         self.add_default_extra_info(ie_result, ie, url)
1731         if process:
1732             self._wait_for_video(ie_result)
1733             return self.process_ie_result(ie_result, download, extra_info)
1734         else:
1735             return ie_result
1736
1737     def add_default_extra_info(self, ie_result, ie, url):
1738         if url is not None:
1739             self.add_extra_info(ie_result, {
1740                 'webpage_url': url,
1741                 'original_url': url,
1742             })
1743         webpage_url = ie_result.get('webpage_url')
1744         if webpage_url:
1745             self.add_extra_info(ie_result, {
1746                 'webpage_url_basename': url_basename(webpage_url),
1747                 'webpage_url_domain': get_domain(webpage_url),
1748             })
1749         if ie is not None:
1750             self.add_extra_info(ie_result, {
1751                 'extractor': ie.IE_NAME,
1752                 'extractor_key': ie.ie_key(),
1753             })
1754
1755     def process_ie_result(self, ie_result, download=True, extra_info=None):
1756         """
1757         Take the result of the ie(may be modified) and resolve all unresolved
1758         references (URLs, playlist items).
1759
1760         It will also download the videos if 'download'.
1761         Returns the resolved ie_result.
1762         """
1763         if extra_info is None:
1764             extra_info = {}
1765         result_type = ie_result.get('_type', 'video')
1766
1767         if result_type in ('url', 'url_transparent'):
1768             ie_result['url'] = sanitize_url(
1769                 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1770             if ie_result.get('original_url') and not extra_info.get('original_url'):
1771                 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1772
1773             extract_flat = self.params.get('extract_flat', False)
1774             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1775                     or extract_flat is True):
1776                 info_copy = ie_result.copy()
1777                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1778                 if ie and not ie_result.get('id'):
1779                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1780                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1781                 self.add_extra_info(info_copy, extra_info)
1782                 info_copy, _ = self.pre_process(info_copy)
1783                 self._fill_common_fields(info_copy, False)
1784                 self.__forced_printings(info_copy)
1785                 self._raise_pending_errors(info_copy)
1786                 if self.params.get('force_write_download_archive', False):
1787                     self.record_download_archive(info_copy)
1788                 return ie_result
1789
1790         if result_type == 'video':
1791             self.add_extra_info(ie_result, extra_info)
1792             ie_result = self.process_video_result(ie_result, download=download)
1793             self._raise_pending_errors(ie_result)
1794             additional_urls = (ie_result or {}).get('additional_urls')
1795             if additional_urls:
1796                 # TODO: Improve MetadataParserPP to allow setting a list
1797                 if isinstance(additional_urls, str):
1798                     additional_urls = [additional_urls]
1799                 self.to_screen(
1800                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1801                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1802                 ie_result['additional_entries'] = [
1803                     self.extract_info(
1804                         url, download, extra_info=extra_info,
1805                         force_generic_extractor=self.params.get('force_generic_extractor'))
1806                     for url in additional_urls
1807                 ]
1808             return ie_result
1809         elif result_type == 'url':
1810             # We have to add extra_info to the results because it may be
1811             # contained in a playlist
1812             return self.extract_info(
1813                 ie_result['url'], download,
1814                 ie_key=ie_result.get('ie_key'),
1815                 extra_info=extra_info)
1816         elif result_type == 'url_transparent':
1817             # Use the information from the embedding page
1818             info = self.extract_info(
1819                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1820                 extra_info=extra_info, download=False, process=False)
1821
1822             # extract_info may return None when ignoreerrors is enabled and
1823             # extraction failed with an error, don't crash and return early
1824             # in this case
1825             if not info:
1826                 return info
1827
1828             exempted_fields = {'_type', 'url', 'ie_key'}
1829             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1830                 # For video clips, the id etc of the clip extractor should be used
1831                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1832
1833             new_result = info.copy()
1834             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1835
1836             # Extracted info may not be a video result (i.e.
1837             # info.get('_type', 'video') != video) but rather an url or
1838             # url_transparent. In such cases outer metadata (from ie_result)
1839             # should be propagated to inner one (info). For this to happen
1840             # _type of info should be overridden with url_transparent. This
1841             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1842             if new_result.get('_type') == 'url':
1843                 new_result['_type'] = 'url_transparent'
1844
1845             return self.process_ie_result(
1846                 new_result, download=download, extra_info=extra_info)
1847         elif result_type in ('playlist', 'multi_video'):
1848             # Protect from infinite recursion due to recursively nested playlists
1849             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1850             webpage_url = ie_result.get('webpage_url')  # Playlists maynot have webpage_url
1851             if webpage_url and webpage_url in self._playlist_urls:
1852                 self.to_screen(
1853                     '[download] Skipping already downloaded playlist: %s'
1854                     % ie_result.get('title') or ie_result.get('id'))
1855                 return
1856
1857             self._playlist_level += 1
1858             self._playlist_urls.add(webpage_url)
1859             self._fill_common_fields(ie_result, False)
1860             self._sanitize_thumbnails(ie_result)
1861             try:
1862                 return self.__process_playlist(ie_result, download)
1863             finally:
1864                 self._playlist_level -= 1
1865                 if not self._playlist_level:
1866                     self._playlist_urls.clear()
1867         elif result_type == 'compat_list':
1868             self.report_warning(
1869                 'Extractor %s returned a compat_list result. '
1870                 'It needs to be updated.' % ie_result.get('extractor'))
1871
1872             def _fixup(r):
1873                 self.add_extra_info(r, {
1874                     'extractor': ie_result['extractor'],
1875                     'webpage_url': ie_result['webpage_url'],
1876                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1877                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1878                     'extractor_key': ie_result['extractor_key'],
1879                 })
1880                 return r
1881             ie_result['entries'] = [
1882                 self.process_ie_result(_fixup(r), download, extra_info)
1883                 for r in ie_result['entries']
1884             ]
1885             return ie_result
1886         else:
1887             raise Exception('Invalid result type: %s' % result_type)
1888
1889     def _ensure_dir_exists(self, path):
1890         return make_dir(path, self.report_error)
1891
1892     @staticmethod
1893     def _playlist_infodict(ie_result, strict=False, **kwargs):
1894         info = {
1895             'playlist_count': ie_result.get('playlist_count'),
1896             'playlist': ie_result.get('title') or ie_result.get('id'),
1897             'playlist_id': ie_result.get('id'),
1898             'playlist_title': ie_result.get('title'),
1899             'playlist_uploader': ie_result.get('uploader'),
1900             'playlist_uploader_id': ie_result.get('uploader_id'),
1901             **kwargs,
1902         }
1903         if strict:
1904             return info
1905         if ie_result.get('webpage_url'):
1906             info.update({
1907                 'webpage_url': ie_result['webpage_url'],
1908                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1909                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1910             })
1911         return {
1912             **info,
1913             'playlist_index': 0,
1914             '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1915             'extractor': ie_result['extractor'],
1916             'extractor_key': ie_result['extractor_key'],
1917         }
1918
1919     def __process_playlist(self, ie_result, download):
1920         """Process each entry in the playlist"""
1921         assert ie_result['_type'] in ('playlist', 'multi_video')
1922
1923         common_info = self._playlist_infodict(ie_result, strict=True)
1924         title = common_info.get('playlist') or '<Untitled>'
1925         if self._match_entry(common_info, incomplete=True) is not None:
1926             return
1927         self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1928
1929         all_entries = PlaylistEntries(self, ie_result)
1930         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1931
1932         lazy = self.params.get('lazy_playlist')
1933         if lazy:
1934             resolved_entries, n_entries = [], 'N/A'
1935             ie_result['requested_entries'], ie_result['entries'] = None, None
1936         else:
1937             entries = resolved_entries = list(entries)
1938             n_entries = len(resolved_entries)
1939             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1940         if not ie_result.get('playlist_count'):
1941             # Better to do this after potentially exhausting entries
1942             ie_result['playlist_count'] = all_entries.get_full_count()
1943
1944         extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1945         ie_copy = collections.ChainMap(ie_result, extra)
1946
1947         _infojson_written = False
1948         write_playlist_files = self.params.get('allow_playlist_files', True)
1949         if write_playlist_files and self.params.get('list_thumbnails'):
1950             self.list_thumbnails(ie_result)
1951         if write_playlist_files and not self.params.get('simulate'):
1952             _infojson_written = self._write_info_json(
1953                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1954             if _infojson_written is None:
1955                 return
1956             if self._write_description('playlist', ie_result,
1957                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1958                 return
1959             # TODO: This should be passed to ThumbnailsConvertor if necessary
1960             self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1961
1962         if lazy:
1963             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1964                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1965         elif self.params.get('playlistreverse'):
1966             entries.reverse()
1967         elif self.params.get('playlistrandom'):
1968             random.shuffle(entries)
1969
1970         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1971                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1972
1973         keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1974         if self.params.get('extract_flat') == 'discard_in_playlist':
1975             keep_resolved_entries = ie_result['_type'] != 'playlist'
1976         if keep_resolved_entries:
1977             self.write_debug('The information of all playlist entries will be held in memory')
1978
1979         failures = 0
1980         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1981         for i, (playlist_index, entry) in enumerate(entries):
1982             if lazy:
1983                 resolved_entries.append((playlist_index, entry))
1984             if not entry:
1985                 continue
1986
1987             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1988             if not lazy and 'playlist-index' in self.params['compat_opts']:
1989                 playlist_index = ie_result['requested_entries'][i]
1990
1991             entry_copy = collections.ChainMap(entry, {
1992                 **common_info,
1993                 'n_entries': int_or_none(n_entries),
1994                 'playlist_index': playlist_index,
1995                 'playlist_autonumber': i + 1,
1996             })
1997
1998             if self._match_entry(entry_copy, incomplete=True) is not None:
1999                 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
2000                 resolved_entries[i] = (playlist_index, NO_DEFAULT)
2001                 continue
2002
2003             self.to_screen('[download] Downloading item %s of %s' % (
2004                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
2005
2006             entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2007                 'playlist_index': playlist_index,
2008                 'playlist_autonumber': i + 1,
2009             }, extra))
2010             if not entry_result:
2011                 failures += 1
2012             if failures >= max_failures:
2013                 self.report_error(
2014                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2015                 break
2016             if keep_resolved_entries:
2017                 resolved_entries[i] = (playlist_index, entry_result)
2018
2019         # Update with processed data
2020         ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2021         ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2022         if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2023             # Do not set for full playlist
2024             ie_result.pop('requested_entries')
2025
2026         # Write the updated info to json
2027         if _infojson_written is True and self._write_info_json(
2028                 'updated playlist', ie_result,
2029                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2030             return
2031
2032         ie_result = self.run_all_pps('playlist', ie_result)
2033         self.to_screen(f'[download] Finished downloading playlist: {title}')
2034         return ie_result
2035
2036     @_handle_extraction_exceptions
2037     def __process_iterable_entry(self, entry, download, extra_info):
2038         return self.process_ie_result(
2039             entry, download=download, extra_info=extra_info)
2040
2041     def _build_format_filter(self, filter_spec):
2042         " Returns a function to filter the formats according to the filter_spec "
2043
2044         OPERATORS = {
2045             '<': operator.lt,
2046             '<=': operator.le,
2047             '>': operator.gt,
2048             '>=': operator.ge,
2049             '=': operator.eq,
2050             '!=': operator.ne,
2051         }
2052         operator_rex = re.compile(r'''(?x)\s*
2053             (?P<key>[\w.-]+)\s*
2054             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2055             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2056             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2057         m = operator_rex.fullmatch(filter_spec)
2058         if m:
2059             try:
2060                 comparison_value = int(m.group('value'))
2061             except ValueError:
2062                 comparison_value = parse_filesize(m.group('value'))
2063                 if comparison_value is None:
2064                     comparison_value = parse_filesize(m.group('value') + 'B')
2065                 if comparison_value is None:
2066                     raise ValueError(
2067                         'Invalid value %r in format specification %r' % (
2068                             m.group('value'), filter_spec))
2069             op = OPERATORS[m.group('op')]
2070
2071         if not m:
2072             STR_OPERATORS = {
2073                 '=': operator.eq,
2074                 '^=': lambda attr, value: attr.startswith(value),
2075                 '$=': lambda attr, value: attr.endswith(value),
2076                 '*=': lambda attr, value: value in attr,
2077                 '~=': lambda attr, value: value.search(attr) is not None
2078             }
2079             str_operator_rex = re.compile(r'''(?x)\s*
2080                 (?P<key>[a-zA-Z0-9._-]+)\s*
2081                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2082                 (?P<quote>["'])?
2083                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2084                 (?(quote)(?P=quote))\s*
2085                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2086             m = str_operator_rex.fullmatch(filter_spec)
2087             if m:
2088                 if m.group('op') == '~=':
2089                     comparison_value = re.compile(m.group('value'))
2090                 else:
2091                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2092                 str_op = STR_OPERATORS[m.group('op')]
2093                 if m.group('negation'):
2094                     op = lambda attr, value: not str_op(attr, value)
2095                 else:
2096                     op = str_op
2097
2098         if not m:
2099             raise SyntaxError('Invalid filter specification %r' % filter_spec)
2100
2101         def _filter(f):
2102             actual_value = f.get(m.group('key'))
2103             if actual_value is None:
2104                 return m.group('none_inclusive')
2105             return op(actual_value, comparison_value)
2106         return _filter
2107
2108     def _check_formats(self, formats):
2109         for f in formats:
2110             self.to_screen('[info] Testing format %s' % f['format_id'])
2111             path = self.get_output_path('temp')
2112             if not self._ensure_dir_exists(f'{path}/'):
2113                 continue
2114             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2115             temp_file.close()
2116             try:
2117                 success, _ = self.dl(temp_file.name, f, test=True)
2118             except (DownloadError, OSError, ValueError) + network_exceptions:
2119                 success = False
2120             finally:
2121                 if os.path.exists(temp_file.name):
2122                     try:
2123                         os.remove(temp_file.name)
2124                     except OSError:
2125                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2126             if success:
2127                 yield f
2128             else:
2129                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2130
2131     def _default_format_spec(self, info_dict, download=True):
2132
2133         def can_merge():
2134             merger = FFmpegMergerPP(self)
2135             return merger.available and merger.can_merge()
2136
2137         prefer_best = (
2138             not self.params.get('simulate')
2139             and download
2140             and (
2141                 not can_merge()
2142                 or info_dict.get('is_live') and not self.params.get('live_from_start')
2143                 or self.params['outtmpl']['default'] == '-'))
2144         compat = (
2145             prefer_best
2146             or self.params.get('allow_multiple_audio_streams', False)
2147             or 'format-spec' in self.params['compat_opts'])
2148
2149         return (
2150             'best/bestvideo+bestaudio' if prefer_best
2151             else 'bestvideo*+bestaudio/best' if not compat
2152             else 'bestvideo+bestaudio/best')
2153
2154     def build_format_selector(self, format_spec):
2155         def syntax_error(note, start):
2156             message = (
2157                 'Invalid format specification: '
2158                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2159             return SyntaxError(message)
2160
2161         PICKFIRST = 'PICKFIRST'
2162         MERGE = 'MERGE'
2163         SINGLE = 'SINGLE'
2164         GROUP = 'GROUP'
2165         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2166
2167         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2168                                   'video': self.params.get('allow_multiple_video_streams', False)}
2169
2170         def _parse_filter(tokens):
2171             filter_parts = []
2172             for type, string_, start, _, _ in tokens:
2173                 if type == tokenize.OP and string_ == ']':
2174                     return ''.join(filter_parts)
2175                 else:
2176                     filter_parts.append(string_)
2177
2178         def _remove_unused_ops(tokens):
2179             # Remove operators that we don't use and join them with the surrounding strings.
2180             # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2181             ALLOWED_OPS = ('/', '+', ',', '(', ')')
2182             last_string, last_start, last_end, last_line = None, None, None, None
2183             for type, string_, start, end, line in tokens:
2184                 if type == tokenize.OP and string_ == '[':
2185                     if last_string:
2186                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2187                         last_string = None
2188                     yield type, string_, start, end, line
2189                     # everything inside brackets will be handled by _parse_filter
2190                     for type, string_, start, end, line in tokens:
2191                         yield type, string_, start, end, line
2192                         if type == tokenize.OP and string_ == ']':
2193                             break
2194                 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2195                     if last_string:
2196                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2197                         last_string = None
2198                     yield type, string_, start, end, line
2199                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2200                     if not last_string:
2201                         last_string = string_
2202                         last_start = start
2203                         last_end = end
2204                     else:
2205                         last_string += string_
2206             if last_string:
2207                 yield tokenize.NAME, last_string, last_start, last_end, last_line
2208
2209         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2210             selectors = []
2211             current_selector = None
2212             for type, string_, start, _, _ in tokens:
2213                 # ENCODING is only defined in python 3.x
2214                 if type == getattr(tokenize, 'ENCODING', None):
2215                     continue
2216                 elif type in [tokenize.NAME, tokenize.NUMBER]:
2217                     current_selector = FormatSelector(SINGLE, string_, [])
2218                 elif type == tokenize.OP:
2219                     if string_ == ')':
2220                         if not inside_group:
2221                             # ')' will be handled by the parentheses group
2222                             tokens.restore_last_token()
2223                         break
2224                     elif inside_merge and string_ in ['/', ',']:
2225                         tokens.restore_last_token()
2226                         break
2227                     elif inside_choice and string_ == ',':
2228                         tokens.restore_last_token()
2229                         break
2230                     elif string_ == ',':
2231                         if not current_selector:
2232                             raise syntax_error('"," must follow a format selector', start)
2233                         selectors.append(current_selector)
2234                         current_selector = None
2235                     elif string_ == '/':
2236                         if not current_selector:
2237                             raise syntax_error('"/" must follow a format selector', start)
2238                         first_choice = current_selector
2239                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2240                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2241                     elif string_ == '[':
2242                         if not current_selector:
2243                             current_selector = FormatSelector(SINGLE, 'best', [])
2244                         format_filter = _parse_filter(tokens)
2245                         current_selector.filters.append(format_filter)
2246                     elif string_ == '(':
2247                         if current_selector:
2248                             raise syntax_error('Unexpected "("', start)
2249                         group = _parse_format_selection(tokens, inside_group=True)
2250                         current_selector = FormatSelector(GROUP, group, [])
2251                     elif string_ == '+':
2252                         if not current_selector:
2253                             raise syntax_error('Unexpected "+"', start)
2254                         selector_1 = current_selector
2255                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2256                         if not selector_2:
2257                             raise syntax_error('Expected a selector', start)
2258                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2259                     else:
2260                         raise syntax_error(f'Operator not recognized: "{string_}"', start)
2261                 elif type == tokenize.ENDMARKER:
2262                     break
2263             if current_selector:
2264                 selectors.append(current_selector)
2265             return selectors
2266
2267         def _merge(formats_pair):
2268             format_1, format_2 = formats_pair
2269
2270             formats_info = []
2271             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2272             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2273
2274             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2275                 get_no_more = {'video': False, 'audio': False}
2276                 for (i, fmt_info) in enumerate(formats_info):
2277                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2278                         formats_info.pop(i)
2279                         continue
2280                     for aud_vid in ['audio', 'video']:
2281                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2282                             if get_no_more[aud_vid]:
2283                                 formats_info.pop(i)
2284                                 break
2285                             get_no_more[aud_vid] = True
2286
2287             if len(formats_info) == 1:
2288                 return formats_info[0]
2289
2290             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2291             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2292
2293             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2294             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2295
2296             output_ext = get_compatible_ext(
2297                 vcodecs=[f.get('vcodec') for f in video_fmts],
2298                 acodecs=[f.get('acodec') for f in audio_fmts],
2299                 vexts=[f['ext'] for f in video_fmts],
2300                 aexts=[f['ext'] for f in audio_fmts],
2301                 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2302                              or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2303
2304             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2305
2306             new_dict = {
2307                 'requested_formats': formats_info,
2308                 'format': '+'.join(filtered('format')),
2309                 'format_id': '+'.join(filtered('format_id')),
2310                 'ext': output_ext,
2311                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2312                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2313                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2314                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2315                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2316             }
2317
2318             if the_only_video:
2319                 new_dict.update({
2320                     'width': the_only_video.get('width'),
2321                     'height': the_only_video.get('height'),
2322                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2323                     'fps': the_only_video.get('fps'),
2324                     'dynamic_range': the_only_video.get('dynamic_range'),
2325                     'vcodec': the_only_video.get('vcodec'),
2326                     'vbr': the_only_video.get('vbr'),
2327                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2328                     'aspect_ratio': the_only_video.get('aspect_ratio'),
2329                 })
2330
2331             if the_only_audio:
2332                 new_dict.update({
2333                     'acodec': the_only_audio.get('acodec'),
2334                     'abr': the_only_audio.get('abr'),
2335                     'asr': the_only_audio.get('asr'),
2336                     'audio_channels': the_only_audio.get('audio_channels')
2337                 })
2338
2339             return new_dict
2340
2341         def _check_formats(formats):
2342             if (self.params.get('check_formats') is not None
2343                     or self.params.get('allow_unplayable_formats')):
2344                 yield from formats
2345                 return
2346             elif self.params.get('check_formats') == 'selected':
2347                 yield from self._check_formats(formats)
2348                 return
2349
2350             for f in formats:
2351                 if f.get('has_drm'):
2352                     yield from self._check_formats([f])
2353                 else:
2354                     yield f
2355
2356         def _build_selector_function(selector):
2357             if isinstance(selector, list):  # ,
2358                 fs = [_build_selector_function(s) for s in selector]
2359
2360                 def selector_function(ctx):
2361                     for f in fs:
2362                         yield from f(ctx)
2363                 return selector_function
2364
2365             elif selector.type == GROUP:  # ()
2366                 selector_function = _build_selector_function(selector.selector)
2367
2368             elif selector.type == PICKFIRST:  # /
2369                 fs = [_build_selector_function(s) for s in selector.selector]
2370
2371                 def selector_function(ctx):
2372                     for f in fs:
2373                         picked_formats = list(f(ctx))
2374                         if picked_formats:
2375                             return picked_formats
2376                     return []
2377
2378             elif selector.type == MERGE:  # +
2379                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2380
2381                 def selector_function(ctx):
2382                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2383                         yield _merge(pair)
2384
2385             elif selector.type == SINGLE:  # atom
2386                 format_spec = selector.selector or 'best'
2387
2388                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2389                 if format_spec == 'all':
2390                     def selector_function(ctx):
2391                         yield from _check_formats(ctx['formats'][::-1])
2392                 elif format_spec == 'mergeall':
2393                     def selector_function(ctx):
2394                         formats = list(_check_formats(
2395                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2396                         if not formats:
2397                             return
2398                         merged_format = formats[-1]
2399                         for f in formats[-2::-1]:
2400                             merged_format = _merge((merged_format, f))
2401                         yield merged_format
2402
2403                 else:
2404                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2405                     mobj = re.match(
2406                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2407                         format_spec)
2408                     if mobj is not None:
2409                         format_idx = int_or_none(mobj.group('n'), default=1)
2410                         format_reverse = mobj.group('bw')[0] == 'b'
2411                         format_type = (mobj.group('type') or [None])[0]
2412                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2413                         format_modified = mobj.group('mod') is not None
2414
2415                         format_fallback = not format_type and not format_modified  # for b, w
2416                         _filter_f = (
2417                             (lambda f: f.get('%scodec' % format_type) != 'none')
2418                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2419                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2420                             if format_type  # bv, ba, wv, wa
2421                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2422                             if not format_modified  # b, w
2423                             else lambda f: True)  # b*, w*
2424                         filter_f = lambda f: _filter_f(f) and (
2425                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2426                     else:
2427                         if format_spec in self._format_selection_exts['audio']:
2428                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2429                         elif format_spec in self._format_selection_exts['video']:
2430                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2431                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2432                         elif format_spec in self._format_selection_exts['storyboards']:
2433                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2434                         else:
2435                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2436
2437                     def selector_function(ctx):
2438                         formats = list(ctx['formats'])
2439                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2440                         if not matches:
2441                             if format_fallback and ctx['incomplete_formats']:
2442                                 # for extractors with incomplete formats (audio only (soundcloud)
2443                                 # or video only (imgur)) best/worst will fallback to
2444                                 # best/worst {video,audio}-only format
2445                                 matches = formats
2446                             elif seperate_fallback and not ctx['has_merged_format']:
2447                                 # for compatibility with youtube-dl when there is no pre-merged format
2448                                 matches = list(filter(seperate_fallback, formats))
2449                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2450                         try:
2451                             yield matches[format_idx - 1]
2452                         except LazyList.IndexError:
2453                             return
2454
2455             filters = [self._build_format_filter(f) for f in selector.filters]
2456
2457             def final_selector(ctx):
2458                 ctx_copy = dict(ctx)
2459                 for _filter in filters:
2460                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2461                 return selector_function(ctx_copy)
2462             return final_selector
2463
2464         stream = io.BytesIO(format_spec.encode())
2465         try:
2466             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2467         except tokenize.TokenError:
2468             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2469
2470         class TokenIterator:
2471             def __init__(self, tokens):
2472                 self.tokens = tokens
2473                 self.counter = 0
2474
2475             def __iter__(self):
2476                 return self
2477
2478             def __next__(self):
2479                 if self.counter >= len(self.tokens):
2480                     raise StopIteration()
2481                 value = self.tokens[self.counter]
2482                 self.counter += 1
2483                 return value
2484
2485             next = __next__
2486
2487             def restore_last_token(self):
2488                 self.counter -= 1
2489
2490         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2491         return _build_selector_function(parsed_selector)
2492
2493     def _calc_headers(self, info_dict, load_cookies=False):
2494         res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2495         clean_headers(res)
2496
2497         if load_cookies:  # For --load-info-json
2498             self._load_cookies(res.get('Cookie'), autoscope=info_dict['url'])  # compat
2499             self._load_cookies(info_dict.get('cookies'), autoscope=False)
2500         # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2501         # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2502         res.pop('Cookie', None)
2503         cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2504         if cookies:
2505             encoder = LenientSimpleCookie()
2506             values = []
2507             for cookie in cookies:
2508                 _, value = encoder.value_encode(cookie.value)
2509                 values.append(f'{cookie.name}={value}')
2510                 if cookie.domain:
2511                     values.append(f'Domain={cookie.domain}')
2512                 if cookie.path:
2513                     values.append(f'Path={cookie.path}')
2514                 if cookie.secure:
2515                     values.append('Secure')
2516                 if cookie.expires:
2517                     values.append(f'Expires={cookie.expires}')
2518                 if cookie.version:
2519                     values.append(f'Version={cookie.version}')
2520             info_dict['cookies'] = '; '.join(values)
2521
2522         if 'X-Forwarded-For' not in res:
2523             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2524             if x_forwarded_for_ip:
2525                 res['X-Forwarded-For'] = x_forwarded_for_ip
2526
2527         return res
2528
2529     def _calc_cookies(self, url):
2530         self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2531         return self.cookiejar.get_cookie_header(url)
2532
2533     def _sort_thumbnails(self, thumbnails):
2534         thumbnails.sort(key=lambda t: (
2535             t.get('preference') if t.get('preference') is not None else -1,
2536             t.get('width') if t.get('width') is not None else -1,
2537             t.get('height') if t.get('height') is not None else -1,
2538             t.get('id') if t.get('id') is not None else '',
2539             t.get('url')))
2540
2541     def _sanitize_thumbnails(self, info_dict):
2542         thumbnails = info_dict.get('thumbnails')
2543         if thumbnails is None:
2544             thumbnail = info_dict.get('thumbnail')
2545             if thumbnail:
2546                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2547         if not thumbnails:
2548             return
2549
2550         def check_thumbnails(thumbnails):
2551             for t in thumbnails:
2552                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2553                 try:
2554                     self.urlopen(HEADRequest(t['url']))
2555                 except network_exceptions as err:
2556                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2557                     continue
2558                 yield t
2559
2560         self._sort_thumbnails(thumbnails)
2561         for i, t in enumerate(thumbnails):
2562             if t.get('id') is None:
2563                 t['id'] = '%d' % i
2564             if t.get('width') and t.get('height'):
2565                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2566             t['url'] = sanitize_url(t['url'])
2567
2568         if self.params.get('check_formats') is True:
2569             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2570         else:
2571             info_dict['thumbnails'] = thumbnails
2572
2573     def _fill_common_fields(self, info_dict, final=True):
2574         # TODO: move sanitization here
2575         if final:
2576             title = info_dict['fulltitle'] = info_dict.get('title')
2577             if not title:
2578                 if title == '':
2579                     self.write_debug('Extractor gave empty title. Creating a generic title')
2580                 else:
2581                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2582                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2583
2584         if info_dict.get('duration') is not None:
2585             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2586
2587         for ts_key, date_key in (
2588                 ('timestamp', 'upload_date'),
2589                 ('release_timestamp', 'release_date'),
2590                 ('modified_timestamp', 'modified_date'),
2591         ):
2592             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2593                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2594                 # see http://bugs.python.org/issue1646728)
2595                 with contextlib.suppress(ValueError, OverflowError, OSError):
2596                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2597                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2598
2599         live_keys = ('is_live', 'was_live')
2600         live_status = info_dict.get('live_status')
2601         if live_status is None:
2602             for key in live_keys:
2603                 if info_dict.get(key) is False:
2604                     continue
2605                 if info_dict.get(key):
2606                     live_status = key
2607                 break
2608             if all(info_dict.get(key) is False for key in live_keys):
2609                 live_status = 'not_live'
2610         if live_status:
2611             info_dict['live_status'] = live_status
2612             for key in live_keys:
2613                 if info_dict.get(key) is None:
2614                     info_dict[key] = (live_status == key)
2615         if live_status == 'post_live':
2616             info_dict['was_live'] = True
2617
2618         # Auto generate title fields corresponding to the *_number fields when missing
2619         # in order to always have clean titles. This is very common for TV series.
2620         for field in ('chapter', 'season', 'episode'):
2621             if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2622                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2623
2624     def _raise_pending_errors(self, info):
2625         err = info.pop('__pending_error', None)
2626         if err:
2627             self.report_error(err, tb=False)
2628
2629     def sort_formats(self, info_dict):
2630         formats = self._get_formats(info_dict)
2631         formats.sort(key=FormatSorter(
2632             self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2633
2634     def process_video_result(self, info_dict, download=True):
2635         assert info_dict.get('_type', 'video') == 'video'
2636         self._num_videos += 1
2637
2638         if 'id' not in info_dict:
2639             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2640         elif not info_dict.get('id'):
2641             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2642
2643         def report_force_conversion(field, field_not, conversion):
2644             self.report_warning(
2645                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2646                 % (field, field_not, conversion))
2647
2648         def sanitize_string_field(info, string_field):
2649             field = info.get(string_field)
2650             if field is None or isinstance(field, str):
2651                 return
2652             report_force_conversion(string_field, 'a string', 'string')
2653             info[string_field] = str(field)
2654
2655         def sanitize_numeric_fields(info):
2656             for numeric_field in self._NUMERIC_FIELDS:
2657                 field = info.get(numeric_field)
2658                 if field is None or isinstance(field, (int, float)):
2659                     continue
2660                 report_force_conversion(numeric_field, 'numeric', 'int')
2661                 info[numeric_field] = int_or_none(field)
2662
2663         sanitize_string_field(info_dict, 'id')
2664         sanitize_numeric_fields(info_dict)
2665         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2666             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2667         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2668             self.report_warning('"duration" field is negative, there is an error in extractor')
2669
2670         chapters = info_dict.get('chapters') or []
2671         if chapters and chapters[0].get('start_time'):
2672             chapters.insert(0, {'start_time': 0})
2673
2674         dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2675         for idx, (prev, current, next_) in enumerate(zip(
2676                 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2677             if current.get('start_time') is None:
2678                 current['start_time'] = prev.get('end_time')
2679             if not current.get('end_time'):
2680                 current['end_time'] = next_.get('start_time')
2681             if not current.get('title'):
2682                 current['title'] = f'<Untitled Chapter {idx}>'
2683
2684         if 'playlist' not in info_dict:
2685             # It isn't part of a playlist
2686             info_dict['playlist'] = None
2687             info_dict['playlist_index'] = None
2688
2689         self._sanitize_thumbnails(info_dict)
2690
2691         thumbnail = info_dict.get('thumbnail')
2692         thumbnails = info_dict.get('thumbnails')
2693         if thumbnail:
2694             info_dict['thumbnail'] = sanitize_url(thumbnail)
2695         elif thumbnails:
2696             info_dict['thumbnail'] = thumbnails[-1]['url']
2697
2698         if info_dict.get('display_id') is None and 'id' in info_dict:
2699             info_dict['display_id'] = info_dict['id']
2700
2701         self._fill_common_fields(info_dict)
2702
2703         for cc_kind in ('subtitles', 'automatic_captions'):
2704             cc = info_dict.get(cc_kind)
2705             if cc:
2706                 for _, subtitle in cc.items():
2707                     for subtitle_format in subtitle:
2708                         if subtitle_format.get('url'):
2709                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2710                         if subtitle_format.get('ext') is None:
2711                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2712
2713         automatic_captions = info_dict.get('automatic_captions')
2714         subtitles = info_dict.get('subtitles')
2715
2716         info_dict['requested_subtitles'] = self.process_subtitles(
2717             info_dict['id'], subtitles, automatic_captions)
2718
2719         formats = self._get_formats(info_dict)
2720
2721         # Backward compatibility with InfoExtractor._sort_formats
2722         field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2723         if field_preference:
2724             info_dict['_format_sort_fields'] = field_preference
2725
2726         info_dict['_has_drm'] = any(  # or None ensures --clean-infojson removes it
2727             f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2728         if not self.params.get('allow_unplayable_formats'):
2729             formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2730
2731         if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2732             self.report_warning(
2733                 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2734                 'only images are available for download. Use --list-formats to see them'.capitalize())
2735
2736         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2737         if not get_from_start:
2738             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2739         if info_dict.get('is_live') and formats:
2740             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2741             if get_from_start and not formats:
2742                 self.raise_no_formats(info_dict, msg=(
2743                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2744                     'If you want to download from the current time, use --no-live-from-start'))
2745
2746         def is_wellformed(f):
2747             url = f.get('url')
2748             if not url:
2749                 self.report_warning(
2750                     '"url" field is missing or empty - skipping format, '
2751                     'there is an error in extractor')
2752                 return False
2753             if isinstance(url, bytes):
2754                 sanitize_string_field(f, 'url')
2755             return True
2756
2757         # Filter out malformed formats for better extraction robustness
2758         formats = list(filter(is_wellformed, formats or []))
2759
2760         if not formats:
2761             self.raise_no_formats(info_dict)
2762
2763         for format in formats:
2764             sanitize_string_field(format, 'format_id')
2765             sanitize_numeric_fields(format)
2766             format['url'] = sanitize_url(format['url'])
2767             if format.get('ext') is None:
2768                 format['ext'] = determine_ext(format['url']).lower()
2769             if format.get('protocol') is None:
2770                 format['protocol'] = determine_protocol(format)
2771             if format.get('resolution') is None:
2772                 format['resolution'] = self.format_resolution(format, default=None)
2773             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2774                 format['dynamic_range'] = 'SDR'
2775             if format.get('aspect_ratio') is None:
2776                 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2777             if (not format.get('manifest_url')  # For fragmented formats, "tbr" is often max bitrate and not average
2778                     and info_dict.get('duration') and format.get('tbr')
2779                     and not format.get('filesize') and not format.get('filesize_approx')):
2780                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2781             format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
2782
2783         # Safeguard against old/insecure infojson when using --load-info-json
2784         if info_dict.get('http_headers'):
2785             info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2786             info_dict['http_headers'].pop('Cookie', None)
2787
2788         # This is copied to http_headers by the above _calc_headers and can now be removed
2789         if '__x_forwarded_for_ip' in info_dict:
2790             del info_dict['__x_forwarded_for_ip']
2791
2792         self.sort_formats({
2793             'formats': formats,
2794             '_format_sort_fields': info_dict.get('_format_sort_fields')
2795         })
2796
2797         # Sanitize and group by format_id
2798         formats_dict = {}
2799         for i, format in enumerate(formats):
2800             if not format.get('format_id'):
2801                 format['format_id'] = str(i)
2802             else:
2803                 # Sanitize format_id from characters used in format selector expression
2804                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2805             formats_dict.setdefault(format['format_id'], []).append(format)
2806
2807         # Make sure all formats have unique format_id
2808         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2809         for format_id, ambiguous_formats in formats_dict.items():
2810             ambigious_id = len(ambiguous_formats) > 1
2811             for i, format in enumerate(ambiguous_formats):
2812                 if ambigious_id:
2813                     format['format_id'] = '%s-%d' % (format_id, i)
2814                 # Ensure there is no conflict between id and ext in format selection
2815                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2816                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2817                     format['format_id'] = 'f%s' % format['format_id']
2818
2819                 if format.get('format') is None:
2820                     format['format'] = '{id} - {res}{note}'.format(
2821                         id=format['format_id'],
2822                         res=self.format_resolution(format),
2823                         note=format_field(format, 'format_note', ' (%s)'),
2824                     )
2825
2826         if self.params.get('check_formats') is True:
2827             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2828
2829         if not formats or formats[0] is not info_dict:
2830             # only set the 'formats' fields if the original info_dict list them
2831             # otherwise we end up with a circular reference, the first (and unique)
2832             # element in the 'formats' field in info_dict is info_dict itself,
2833             # which can't be exported to json
2834             info_dict['formats'] = formats
2835
2836         info_dict, _ = self.pre_process(info_dict)
2837
2838         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2839             return info_dict
2840
2841         self.post_extract(info_dict)
2842         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2843
2844         # The pre-processors may have modified the formats
2845         formats = self._get_formats(info_dict)
2846
2847         list_only = self.params.get('simulate') == 'list_only'
2848         interactive_format_selection = not list_only and self.format_selector == '-'
2849         if self.params.get('list_thumbnails'):
2850             self.list_thumbnails(info_dict)
2851         if self.params.get('listsubtitles'):
2852             if 'automatic_captions' in info_dict:
2853                 self.list_subtitles(
2854                     info_dict['id'], automatic_captions, 'automatic captions')
2855             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2856         if self.params.get('listformats') or interactive_format_selection:
2857             self.list_formats(info_dict)
2858         if list_only:
2859             # Without this printing, -F --print-json will not work
2860             self.__forced_printings(info_dict)
2861             return info_dict
2862
2863         format_selector = self.format_selector
2864         while True:
2865             if interactive_format_selection:
2866                 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2867                                    + '(Press ENTER for default, or Ctrl+C to quit)'
2868                                    + self._format_screen(': ', self.Styles.EMPHASIS))
2869                 try:
2870                     format_selector = self.build_format_selector(req_format) if req_format else None
2871                 except SyntaxError as err:
2872                     self.report_error(err, tb=False, is_error=False)
2873                     continue
2874
2875             if format_selector is None:
2876                 req_format = self._default_format_spec(info_dict, download=download)
2877                 self.write_debug(f'Default format spec: {req_format}')
2878                 format_selector = self.build_format_selector(req_format)
2879
2880             formats_to_download = list(format_selector({
2881                 'formats': formats,
2882                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2883                 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
2884                                        or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
2885             }))
2886             if interactive_format_selection and not formats_to_download:
2887                 self.report_error('Requested format is not available', tb=False, is_error=False)
2888                 continue
2889             break
2890
2891         if not formats_to_download:
2892             if not self.params.get('ignore_no_formats_error'):
2893                 raise ExtractorError(
2894                     'Requested format is not available. Use --list-formats for a list of available formats',
2895                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2896             self.report_warning('Requested format is not available')
2897             # Process what we can, even without any available formats.
2898             formats_to_download = [{}]
2899
2900         requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2901         best_format, downloaded_formats = formats_to_download[-1], []
2902         if download:
2903             if best_format and requested_ranges:
2904                 def to_screen(*msg):
2905                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2906
2907                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2908                           (f['format_id'] for f in formats_to_download))
2909                 if requested_ranges != ({}, ):
2910                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2911                               (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2912             max_downloads_reached = False
2913
2914             for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2915                 new_info = self._copy_infodict(info_dict)
2916                 new_info.update(fmt)
2917                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2918                 end_time = offset + min(chapter.get('end_time', duration), duration)
2919                 # duration may not be accurate. So allow deviations <1sec
2920                 if end_time == float('inf') or end_time > offset + duration + 1:
2921                     end_time = None
2922                 if chapter or offset:
2923                     new_info.update({
2924                         'section_start': offset + chapter.get('start_time', 0),
2925                         'section_end': end_time,
2926                         'section_title': chapter.get('title'),
2927                         'section_number': chapter.get('index'),
2928                     })
2929                 downloaded_formats.append(new_info)
2930                 try:
2931                     self.process_info(new_info)
2932                 except MaxDownloadsReached:
2933                     max_downloads_reached = True
2934                 self._raise_pending_errors(new_info)
2935                 # Remove copied info
2936                 for key, val in tuple(new_info.items()):
2937                     if info_dict.get(key) == val:
2938                         new_info.pop(key)
2939                 if max_downloads_reached:
2940                     break
2941
2942             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2943             assert write_archive.issubset({True, False, 'ignore'})
2944             if True in write_archive and False not in write_archive:
2945                 self.record_download_archive(info_dict)
2946
2947             info_dict['requested_downloads'] = downloaded_formats
2948             info_dict = self.run_all_pps('after_video', info_dict)
2949             if max_downloads_reached:
2950                 raise MaxDownloadsReached()
2951
2952         # We update the info dict with the selected best quality format (backwards compatibility)
2953         info_dict.update(best_format)
2954         return info_dict
2955
2956     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2957         """Select the requested subtitles and their format"""
2958         available_subs, normal_sub_langs = {}, []
2959         if normal_subtitles and self.params.get('writesubtitles'):
2960             available_subs.update(normal_subtitles)
2961             normal_sub_langs = tuple(normal_subtitles.keys())
2962         if automatic_captions and self.params.get('writeautomaticsub'):
2963             for lang, cap_info in automatic_captions.items():
2964                 if lang not in available_subs:
2965                     available_subs[lang] = cap_info
2966
2967         if not available_subs or (
2968                 not self.params.get('writesubtitles')
2969                 and not self.params.get('writeautomaticsub')):
2970             return None
2971
2972         all_sub_langs = tuple(available_subs.keys())
2973         if self.params.get('allsubtitles', False):
2974             requested_langs = all_sub_langs
2975         elif self.params.get('subtitleslangs', False):
2976             try:
2977                 requested_langs = orderedSet_from_options(
2978                     self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2979             except re.error as e:
2980                 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2981         else:
2982             requested_langs = LazyList(itertools.chain(
2983                 ['en'] if 'en' in normal_sub_langs else [],
2984                 filter(lambda f: f.startswith('en'), normal_sub_langs),
2985                 ['en'] if 'en' in all_sub_langs else [],
2986                 filter(lambda f: f.startswith('en'), all_sub_langs),
2987                 normal_sub_langs, all_sub_langs,
2988             ))[:1]
2989         if requested_langs:
2990             self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2991
2992         formats_query = self.params.get('subtitlesformat', 'best')
2993         formats_preference = formats_query.split('/') if formats_query else []
2994         subs = {}
2995         for lang in requested_langs:
2996             formats = available_subs.get(lang)
2997             if formats is None:
2998                 self.report_warning(f'{lang} subtitles not available for {video_id}')
2999                 continue
3000             for ext in formats_preference:
3001                 if ext == 'best':
3002                     f = formats[-1]
3003                     break
3004                 matches = list(filter(lambda f: f['ext'] == ext, formats))
3005                 if matches:
3006                     f = matches[-1]
3007                     break
3008             else:
3009                 f = formats[-1]
3010                 self.report_warning(
3011                     'No subtitle format found matching "%s" for language %s, '
3012                     'using %s' % (formats_query, lang, f['ext']))
3013             subs[lang] = f
3014         return subs
3015
3016     def _forceprint(self, key, info_dict):
3017         if info_dict is None:
3018             return
3019         info_copy = info_dict.copy()
3020         info_copy.setdefault('filename', self.prepare_filename(info_dict))
3021         if info_dict.get('requested_formats') is not None:
3022             # For RTMP URLs, also include the playpath
3023             info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3024         elif info_dict.get('url'):
3025             info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3026         info_copy['formats_table'] = self.render_formats_table(info_dict)
3027         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3028         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3029         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3030
3031         def format_tmpl(tmpl):
3032             mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3033             if not mobj:
3034                 return tmpl
3035
3036             fmt = '%({})s'
3037             if tmpl.startswith('{'):
3038                 tmpl, fmt = f'.{tmpl}', '%({})j'
3039             if tmpl.endswith('='):
3040                 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3041             return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3042
3043         for tmpl in self.params['forceprint'].get(key, []):
3044             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3045
3046         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3047             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3048             tmpl = format_tmpl(tmpl)
3049             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3050             if self._ensure_dir_exists(filename):
3051                 with open(filename, 'a', encoding='utf-8', newline='') as f:
3052                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3053
3054         return info_copy
3055
3056     def __forced_printings(self, info_dict, filename=None, incomplete=True):
3057         if (self.params.get('forcejson')
3058                 or self.params['forceprint'].get('video')
3059                 or self.params['print_to_file'].get('video')):
3060             self.post_extract(info_dict)
3061         if filename:
3062             info_dict['filename'] = filename
3063         info_copy = self._forceprint('video', info_dict)
3064
3065         def print_field(field, actual_field=None, optional=False):
3066             if actual_field is None:
3067                 actual_field = field
3068             if self.params.get(f'force{field}') and (
3069                     info_copy.get(field) is not None or (not optional and not incomplete)):
3070                 self.to_stdout(info_copy[actual_field])
3071
3072         print_field('title')
3073         print_field('id')
3074         print_field('url', 'urls')
3075         print_field('thumbnail', optional=True)
3076         print_field('description', optional=True)
3077         print_field('filename')
3078         if self.params.get('forceduration') and info_copy.get('duration') is not None:
3079             self.to_stdout(formatSeconds(info_copy['duration']))
3080         print_field('format')
3081
3082         if self.params.get('forcejson'):
3083             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3084
3085     def dl(self, name, info, subtitle=False, test=False):
3086         if not info.get('url'):
3087             self.raise_no_formats(info, True)
3088
3089         if test:
3090             verbose = self.params.get('verbose')
3091             params = {
3092                 'test': True,
3093                 'quiet': self.params.get('quiet') or not verbose,
3094                 'verbose': verbose,
3095                 'noprogress': not verbose,
3096                 'nopart': True,
3097                 'skip_unavailable_fragments': False,
3098                 'keep_fragments': False,
3099                 'overwrites': True,
3100                 '_no_ytdl_file': True,
3101             }
3102         else:
3103             params = self.params
3104         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3105         if not test:
3106             for ph in self._progress_hooks:
3107                 fd.add_progress_hook(ph)
3108             urls = '", "'.join(
3109                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3110                 for f in info.get('requested_formats', []) or [info])
3111             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3112
3113         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3114         # But it may contain objects that are not deep-copyable
3115         new_info = self._copy_infodict(info)
3116         if new_info.get('http_headers') is None:
3117             new_info['http_headers'] = self._calc_headers(new_info)
3118         return fd.download(name, new_info, subtitle)
3119
3120     def existing_file(self, filepaths, *, default_overwrite=True):
3121         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3122         if existing_files and not self.params.get('overwrites', default_overwrite):
3123             return existing_files[0]
3124
3125         for file in existing_files:
3126             self.report_file_delete(file)
3127             os.remove(file)
3128         return None
3129
3130     def process_info(self, info_dict):
3131         """Process a single resolved IE result. (Modifies it in-place)"""
3132
3133         assert info_dict.get('_type', 'video') == 'video'
3134         original_infodict = info_dict
3135
3136         if 'format' not in info_dict and 'ext' in info_dict:
3137             info_dict['format'] = info_dict['ext']
3138
3139         if self._match_entry(info_dict) is not None:
3140             info_dict['__write_download_archive'] = 'ignore'
3141             return
3142
3143         # Does nothing under normal operation - for backward compatibility of process_info
3144         self.post_extract(info_dict)
3145
3146         def replace_info_dict(new_info):
3147             nonlocal info_dict
3148             if new_info == info_dict:
3149                 return
3150             info_dict.clear()
3151             info_dict.update(new_info)
3152
3153         new_info, _ = self.pre_process(info_dict, 'video')
3154         replace_info_dict(new_info)
3155         self._num_downloads += 1
3156
3157         # info_dict['_filename'] needs to be set for backward compatibility
3158         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3159         temp_filename = self.prepare_filename(info_dict, 'temp')
3160         files_to_move = {}
3161
3162         # Forced printings
3163         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3164
3165         def check_max_downloads():
3166             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3167                 raise MaxDownloadsReached()
3168
3169         if self.params.get('simulate'):
3170             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3171             check_max_downloads()
3172             return
3173
3174         if full_filename is None:
3175             return
3176         if not self._ensure_dir_exists(encodeFilename(full_filename)):
3177             return
3178         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3179             return
3180
3181         if self._write_description('video', info_dict,
3182                                    self.prepare_filename(info_dict, 'description')) is None:
3183             return
3184
3185         sub_files = self._write_subtitles(info_dict, temp_filename)
3186         if sub_files is None:
3187             return
3188         files_to_move.update(dict(sub_files))
3189
3190         thumb_files = self._write_thumbnails(
3191             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3192         if thumb_files is None:
3193             return
3194         files_to_move.update(dict(thumb_files))
3195
3196         infofn = self.prepare_filename(info_dict, 'infojson')
3197         _infojson_written = self._write_info_json('video', info_dict, infofn)
3198         if _infojson_written:
3199             info_dict['infojson_filename'] = infofn
3200             # For backward compatibility, even though it was a private field
3201             info_dict['__infojson_filename'] = infofn
3202         elif _infojson_written is None:
3203             return
3204
3205         # Note: Annotations are deprecated
3206         annofn = None
3207         if self.params.get('writeannotations', False):
3208             annofn = self.prepare_filename(info_dict, 'annotation')
3209         if annofn:
3210             if not self._ensure_dir_exists(encodeFilename(annofn)):
3211                 return
3212             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3213                 self.to_screen('[info] Video annotations are already present')
3214             elif not info_dict.get('annotations'):
3215                 self.report_warning('There are no annotations to write.')
3216             else:
3217                 try:
3218                     self.to_screen('[info] Writing video annotations to: ' + annofn)
3219                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3220                         annofile.write(info_dict['annotations'])
3221                 except (KeyError, TypeError):
3222                     self.report_warning('There are no annotations to write.')
3223                 except OSError:
3224                     self.report_error('Cannot write annotations file: ' + annofn)
3225                     return
3226
3227         # Write internet shortcut files
3228         def _write_link_file(link_type):
3229             url = try_get(info_dict['webpage_url'], iri_to_uri)
3230             if not url:
3231                 self.report_warning(
3232                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3233                 return True
3234             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3235             if not self._ensure_dir_exists(encodeFilename(linkfn)):
3236                 return False
3237             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3238                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3239                 return True
3240             try:
3241                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3242                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3243                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3244                     template_vars = {'url': url}
3245                     if link_type == 'desktop':
3246                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3247                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3248             except OSError:
3249                 self.report_error(f'Cannot write internet shortcut {linkfn}')
3250                 return False
3251             return True
3252
3253         write_links = {
3254             'url': self.params.get('writeurllink'),
3255             'webloc': self.params.get('writewebloclink'),
3256             'desktop': self.params.get('writedesktoplink'),
3257         }
3258         if self.params.get('writelink'):
3259             link_type = ('webloc' if sys.platform == 'darwin'
3260                          else 'desktop' if sys.platform.startswith('linux')
3261                          else 'url')
3262             write_links[link_type] = True
3263
3264         if any(should_write and not _write_link_file(link_type)
3265                for link_type, should_write in write_links.items()):
3266             return
3267
3268         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3269         replace_info_dict(new_info)
3270
3271         if self.params.get('skip_download'):
3272             info_dict['filepath'] = temp_filename
3273             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3274             info_dict['__files_to_move'] = files_to_move
3275             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3276             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3277         else:
3278             # Download
3279             info_dict.setdefault('__postprocessors', [])
3280             try:
3281
3282                 def existing_video_file(*filepaths):
3283                     ext = info_dict.get('ext')
3284                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3285                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3286                                               default_overwrite=False)
3287                     if file:
3288                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3289                     return file
3290
3291                 fd, success = None, True
3292                 if info_dict.get('protocol') or info_dict.get('url'):
3293                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3294                     if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3295                             info_dict.get('section_start') or info_dict.get('section_end')):
3296                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3297                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
3298                         self.report_error(f'{msg}. Aborting')
3299                         return
3300
3301                 if info_dict.get('requested_formats') is not None:
3302                     old_ext = info_dict['ext']
3303                     if self.params.get('merge_output_format') is None:
3304                         if (info_dict['ext'] == 'webm'
3305                                 and info_dict.get('thumbnails')
3306                                 # check with type instead of pp_key, __name__, or isinstance
3307                                 # since we dont want any custom PPs to trigger this
3308                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3309                             info_dict['ext'] = 'mkv'
3310                             self.report_warning(
3311                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3312                     new_ext = info_dict['ext']
3313
3314                     def correct_ext(filename, ext=new_ext):
3315                         if filename == '-':
3316                             return filename
3317                         filename_real_ext = os.path.splitext(filename)[1][1:]
3318                         filename_wo_ext = (
3319                             os.path.splitext(filename)[0]
3320                             if filename_real_ext in (old_ext, new_ext)
3321                             else filename)
3322                         return f'{filename_wo_ext}.{ext}'
3323
3324                     # Ensure filename always has a correct extension for successful merge
3325                     full_filename = correct_ext(full_filename)
3326                     temp_filename = correct_ext(temp_filename)
3327                     dl_filename = existing_video_file(full_filename, temp_filename)
3328
3329                     info_dict['__real_download'] = False
3330                     # NOTE: Copy so that original format dicts are not modified
3331                     info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3332
3333                     merger = FFmpegMergerPP(self)
3334                     downloaded = []
3335                     if dl_filename is not None:
3336                         self.report_file_already_downloaded(dl_filename)
3337                     elif fd:
3338                         for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3339                             f['filepath'] = fname = prepend_extension(
3340                                 correct_ext(temp_filename, info_dict['ext']),
3341                                 'f%s' % f['format_id'], info_dict['ext'])
3342                             downloaded.append(fname)
3343                         info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3344                         success, real_download = self.dl(temp_filename, info_dict)
3345                         info_dict['__real_download'] = real_download
3346                     else:
3347                         if self.params.get('allow_unplayable_formats'):
3348                             self.report_warning(
3349                                 'You have requested merging of multiple formats '
3350                                 'while also allowing unplayable formats to be downloaded. '
3351                                 'The formats won\'t be merged to prevent data corruption.')
3352                         elif not merger.available:
3353                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3354                             if not self.params.get('ignoreerrors'):
3355                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3356                                 return
3357                             self.report_warning(f'{msg}. The formats won\'t be merged')
3358
3359                         if temp_filename == '-':
3360                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3361                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3362                                       else 'but ffmpeg is not installed')
3363                             self.report_warning(
3364                                 f'You have requested downloading multiple formats to stdout {reason}. '
3365                                 'The formats will be streamed one after the other')
3366                             fname = temp_filename
3367                         for f in info_dict['requested_formats']:
3368                             new_info = dict(info_dict)
3369                             del new_info['requested_formats']
3370                             new_info.update(f)
3371                             if temp_filename != '-':
3372                                 fname = prepend_extension(
3373                                     correct_ext(temp_filename, new_info['ext']),
3374                                     'f%s' % f['format_id'], new_info['ext'])
3375                                 if not self._ensure_dir_exists(fname):
3376                                     return
3377                                 f['filepath'] = fname
3378                                 downloaded.append(fname)
3379                             partial_success, real_download = self.dl(fname, new_info)
3380                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3381                             success = success and partial_success
3382
3383                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3384                         info_dict['__postprocessors'].append(merger)
3385                         info_dict['__files_to_merge'] = downloaded
3386                         # Even if there were no downloads, it is being merged only now
3387                         info_dict['__real_download'] = True
3388                     else:
3389                         for file in downloaded:
3390                             files_to_move[file] = None
3391                 else:
3392                     # Just a single file
3393                     dl_filename = existing_video_file(full_filename, temp_filename)
3394                     if dl_filename is None or dl_filename == temp_filename:
3395                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3396                         # So we should try to resume the download
3397                         success, real_download = self.dl(temp_filename, info_dict)
3398                         info_dict['__real_download'] = real_download
3399                     else:
3400                         self.report_file_already_downloaded(dl_filename)
3401
3402                 dl_filename = dl_filename or temp_filename
3403                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3404
3405             except network_exceptions as err:
3406                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3407                 return
3408             except OSError as err:
3409                 raise UnavailableVideoError(err)
3410             except (ContentTooShortError, ) as err:
3411                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3412                 return
3413
3414             self._raise_pending_errors(info_dict)
3415             if success and full_filename != '-':
3416
3417                 def fixup():
3418                     do_fixup = True
3419                     fixup_policy = self.params.get('fixup')
3420                     vid = info_dict['id']
3421
3422                     if fixup_policy in ('ignore', 'never'):
3423                         return
3424                     elif fixup_policy == 'warn':
3425                         do_fixup = 'warn'
3426                     elif fixup_policy != 'force':
3427                         assert fixup_policy in ('detect_or_warn', None)
3428                         if not info_dict.get('__real_download'):
3429                             do_fixup = False
3430
3431                     def ffmpeg_fixup(cndn, msg, cls):
3432                         if not (do_fixup and cndn):
3433                             return
3434                         elif do_fixup == 'warn':
3435                             self.report_warning(f'{vid}: {msg}')
3436                             return
3437                         pp = cls(self)
3438                         if pp.available:
3439                             info_dict['__postprocessors'].append(pp)
3440                         else:
3441                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3442
3443                     stretched_ratio = info_dict.get('stretched_ratio')
3444                     ffmpeg_fixup(stretched_ratio not in (1, None),
3445                                  f'Non-uniform pixel ratio {stretched_ratio}',
3446                                  FFmpegFixupStretchedPP)
3447
3448                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3449                     downloader = downloader.FD_NAME if downloader else None
3450
3451                     ext = info_dict.get('ext')
3452                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3453                         isinstance(pp, FFmpegVideoConvertorPP)
3454                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3455                     ) for pp in self._pps['post_process']) or fd == FFmpegFD
3456
3457                     if not postprocessed_by_ffmpeg:
3458                         ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3459                                      'writing DASH m4a. Only some players support this container',
3460                                      FFmpegFixupM4aPP)
3461                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3462                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3463                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3464                                      FFmpegFixupM3u8PP)
3465                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
3466                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3467
3468                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3469                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3470
3471                 fixup()
3472                 try:
3473                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3474                 except PostProcessingError as err:
3475                     self.report_error('Postprocessing: %s' % str(err))
3476                     return
3477                 try:
3478                     for ph in self._post_hooks:
3479                         ph(info_dict['filepath'])
3480                 except Exception as err:
3481                     self.report_error('post hooks: %s' % str(err))
3482                     return
3483                 info_dict['__write_download_archive'] = True
3484
3485         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3486         if self.params.get('force_write_download_archive'):
3487             info_dict['__write_download_archive'] = True
3488         check_max_downloads()
3489
3490     def __download_wrapper(self, func):
3491         @functools.wraps(func)
3492         def wrapper(*args, **kwargs):
3493             try:
3494                 res = func(*args, **kwargs)
3495             except UnavailableVideoError as e:
3496                 self.report_error(e)
3497             except DownloadCancelled as e:
3498                 self.to_screen(f'[info] {e}')
3499                 if not self.params.get('break_per_url'):
3500                     raise
3501                 self._num_downloads = 0
3502             else:
3503                 if self.params.get('dump_single_json', False):
3504                     self.post_extract(res)
3505                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3506         return wrapper
3507
3508     def download(self, url_list):
3509         """Download a given list of URLs."""
3510         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3511         outtmpl = self.params['outtmpl']['default']
3512         if (len(url_list) > 1
3513                 and outtmpl != '-'
3514                 and '%' not in outtmpl
3515                 and self.params.get('max_downloads') != 1):
3516             raise SameFileError(outtmpl)
3517
3518         for url in url_list:
3519             self.__download_wrapper(self.extract_info)(
3520                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3521
3522         return self._download_retcode
3523
3524     def download_with_info_file(self, info_filename):
3525         with contextlib.closing(fileinput.FileInput(
3526                 [info_filename], mode='r',
3527                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3528             # FileInput doesn't have a read method, we can't call json.load
3529             infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3530                      for info in variadic(json.loads('\n'.join(f)))]
3531         for info in infos:
3532             try:
3533                 self.__download_wrapper(self.process_ie_result)(info, download=True)
3534             except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3535                 if not isinstance(e, EntryNotInPlaylist):
3536                     self.to_stderr('\r')
3537                 webpage_url = info.get('webpage_url')
3538                 if webpage_url is None:
3539                     raise
3540                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3541                 self.download([webpage_url])
3542         return self._download_retcode
3543
3544     @staticmethod
3545     def sanitize_info(info_dict, remove_private_keys=False):
3546         ''' Sanitize the infodict for converting to json '''
3547         if info_dict is None:
3548             return info_dict
3549         info_dict.setdefault('epoch', int(time.time()))
3550         info_dict.setdefault('_type', 'video')
3551         info_dict.setdefault('_version', {
3552             'version': __version__,
3553             'current_git_head': current_git_head(),
3554             'release_git_head': RELEASE_GIT_HEAD,
3555             'repository': REPOSITORY,
3556         })
3557
3558         if remove_private_keys:
3559             reject = lambda k, v: v is None or k.startswith('__') or k in {
3560                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3561                 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3562                 'playlist_autonumber', '_format_sort_fields',
3563             }
3564         else:
3565             reject = lambda k, v: False
3566
3567         def filter_fn(obj):
3568             if isinstance(obj, dict):
3569                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3570             elif isinstance(obj, (list, tuple, set, LazyList)):
3571                 return list(map(filter_fn, obj))
3572             elif obj is None or isinstance(obj, (str, int, float, bool)):
3573                 return obj
3574             else:
3575                 return repr(obj)
3576
3577         return filter_fn(info_dict)
3578
3579     @staticmethod
3580     def filter_requested_info(info_dict, actually_filter=True):
3581         ''' Alias of sanitize_info for backward compatibility '''
3582         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3583
3584     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3585         for filename in set(filter(None, files_to_delete)):
3586             if msg:
3587                 self.to_screen(msg % filename)
3588             try:
3589                 os.remove(filename)
3590             except OSError:
3591                 self.report_warning(f'Unable to delete file {filename}')
3592             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3593                 del info['__files_to_move'][filename]
3594
3595     @staticmethod
3596     def post_extract(info_dict):
3597         def actual_post_extract(info_dict):
3598             if info_dict.get('_type') in ('playlist', 'multi_video'):
3599                 for video_dict in info_dict.get('entries', {}):
3600                     actual_post_extract(video_dict or {})
3601                 return
3602
3603             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3604             info_dict.update(post_extractor())
3605
3606         actual_post_extract(info_dict or {})
3607
3608     def run_pp(self, pp, infodict):
3609         files_to_delete = []
3610         if '__files_to_move' not in infodict:
3611             infodict['__files_to_move'] = {}
3612         try:
3613             files_to_delete, infodict = pp.run(infodict)
3614         except PostProcessingError as e:
3615             # Must be True and not 'only_download'
3616             if self.params.get('ignoreerrors') is True:
3617                 self.report_error(e)
3618                 return infodict
3619             raise
3620
3621         if not files_to_delete:
3622             return infodict
3623         if self.params.get('keepvideo', False):
3624             for f in files_to_delete:
3625                 infodict['__files_to_move'].setdefault(f, '')
3626         else:
3627             self._delete_downloaded_files(
3628                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3629         return infodict
3630
3631     def run_all_pps(self, key, info, *, additional_pps=None):
3632         if key != 'video':
3633             self._forceprint(key, info)
3634         for pp in (additional_pps or []) + self._pps[key]:
3635             info = self.run_pp(pp, info)
3636         return info
3637
3638     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3639         info = dict(ie_info)
3640         info['__files_to_move'] = files_to_move or {}
3641         try:
3642             info = self.run_all_pps(key, info)
3643         except PostProcessingError as err:
3644             msg = f'Preprocessing: {err}'
3645             info.setdefault('__pending_error', msg)
3646             self.report_error(msg, is_error=False)
3647         return info, info.pop('__files_to_move', None)
3648
3649     def post_process(self, filename, info, files_to_move=None):
3650         """Run all the postprocessors on the given file."""
3651         info['filepath'] = filename
3652         info['__files_to_move'] = files_to_move or {}
3653         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3654         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3655         del info['__files_to_move']
3656         return self.run_all_pps('after_move', info)
3657
3658     def _make_archive_id(self, info_dict):
3659         video_id = info_dict.get('id')
3660         if not video_id:
3661             return
3662         # Future-proof against any change in case
3663         # and backwards compatibility with prior versions
3664         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3665         if extractor is None:
3666             url = str_or_none(info_dict.get('url'))
3667             if not url:
3668                 return
3669             # Try to find matching extractor for the URL and take its ie_key
3670             for ie_key, ie in self._ies.items():
3671                 if ie.suitable(url):
3672                     extractor = ie_key
3673                     break
3674             else:
3675                 return
3676         return make_archive_id(extractor, video_id)
3677
3678     def in_download_archive(self, info_dict):
3679         if not self.archive:
3680             return False
3681
3682         vid_ids = [self._make_archive_id(info_dict)]
3683         vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3684         return any(id_ in self.archive for id_ in vid_ids)
3685
3686     def record_download_archive(self, info_dict):
3687         fn = self.params.get('download_archive')
3688         if fn is None:
3689             return
3690         vid_id = self._make_archive_id(info_dict)
3691         assert vid_id
3692
3693         self.write_debug(f'Adding to archive: {vid_id}')
3694         if is_path_like(fn):
3695             with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3696                 archive_file.write(vid_id + '\n')
3697         self.archive.add(vid_id)
3698
3699     @staticmethod
3700     def format_resolution(format, default='unknown'):
3701         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3702             return 'audio only'
3703         if format.get('resolution') is not None:
3704             return format['resolution']
3705         if format.get('width') and format.get('height'):
3706             return '%dx%d' % (format['width'], format['height'])
3707         elif format.get('height'):
3708             return '%sp' % format['height']
3709         elif format.get('width'):
3710             return '%dx?' % format['width']
3711         return default
3712
3713     def _list_format_headers(self, *headers):
3714         if self.params.get('listformats_table', True) is not False:
3715             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3716         return headers
3717
3718     def _format_note(self, fdict):
3719         res = ''
3720         if fdict.get('ext') in ['f4f', 'f4m']:
3721             res += '(unsupported)'
3722         if fdict.get('language'):
3723             if res:
3724                 res += ' '
3725             res += '[%s]' % fdict['language']
3726         if fdict.get('format_note') is not None:
3727             if res:
3728                 res += ' '
3729             res += fdict['format_note']
3730         if fdict.get('tbr') is not None:
3731             if res:
3732                 res += ', '
3733             res += '%4dk' % fdict['tbr']
3734         if fdict.get('container') is not None:
3735             if res:
3736                 res += ', '
3737             res += '%s container' % fdict['container']
3738         if (fdict.get('vcodec') is not None
3739                 and fdict.get('vcodec') != 'none'):
3740             if res:
3741                 res += ', '
3742             res += fdict['vcodec']
3743             if fdict.get('vbr') is not None:
3744                 res += '@'
3745         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3746             res += 'video@'
3747         if fdict.get('vbr') is not None:
3748             res += '%4dk' % fdict['vbr']
3749         if fdict.get('fps') is not None:
3750             if res:
3751                 res += ', '
3752             res += '%sfps' % fdict['fps']
3753         if fdict.get('acodec') is not None:
3754             if res:
3755                 res += ', '
3756             if fdict['acodec'] == 'none':
3757                 res += 'video only'
3758             else:
3759                 res += '%-5s' % fdict['acodec']
3760         elif fdict.get('abr') is not None:
3761             if res:
3762                 res += ', '
3763             res += 'audio'
3764         if fdict.get('abr') is not None:
3765             res += '@%3dk' % fdict['abr']
3766         if fdict.get('asr') is not None:
3767             res += ' (%5dHz)' % fdict['asr']
3768         if fdict.get('filesize') is not None:
3769             if res:
3770                 res += ', '
3771             res += format_bytes(fdict['filesize'])
3772         elif fdict.get('filesize_approx') is not None:
3773             if res:
3774                 res += ', '
3775             res += '~' + format_bytes(fdict['filesize_approx'])
3776         return res
3777
3778     def _get_formats(self, info_dict):
3779         if info_dict.get('formats') is None:
3780             if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3781                 return [info_dict]
3782             return []
3783         return info_dict['formats']
3784
3785     def render_formats_table(self, info_dict):
3786         formats = self._get_formats(info_dict)
3787         if not formats:
3788             return
3789         if not self.params.get('listformats_table', True) is not False:
3790             table = [
3791                 [
3792                     format_field(f, 'format_id'),
3793                     format_field(f, 'ext'),
3794                     self.format_resolution(f),
3795                     self._format_note(f)
3796                 ] for f in formats if (f.get('preference') or 0) >= -1000]
3797             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3798
3799         def simplified_codec(f, field):
3800             assert field in ('acodec', 'vcodec')
3801             codec = f.get(field)
3802             if not codec:
3803                 return 'unknown'
3804             elif codec != 'none':
3805                 return '.'.join(codec.split('.')[:4])
3806
3807             if field == 'vcodec' and f.get('acodec') == 'none':
3808                 return 'images'
3809             elif field == 'acodec' and f.get('vcodec') == 'none':
3810                 return ''
3811             return self._format_out('audio only' if field == 'vcodec' else 'video only',
3812                                     self.Styles.SUPPRESS)
3813
3814         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3815         table = [
3816             [
3817                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3818                 format_field(f, 'ext'),
3819                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3820                 format_field(f, 'fps', '\t%d', func=round),
3821                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3822                 format_field(f, 'audio_channels', '\t%s'),
3823                 delim, (
3824                     format_field(f, 'filesize', ' \t%s', func=format_bytes)
3825                     or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3826                     or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3827                                     None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
3828                 format_field(f, 'tbr', '\t%dk', func=round),
3829                 shorten_protocol_name(f.get('protocol', '')),
3830                 delim,
3831                 simplified_codec(f, 'vcodec'),
3832                 format_field(f, 'vbr', '\t%dk', func=round),
3833                 simplified_codec(f, 'acodec'),
3834                 format_field(f, 'abr', '\t%dk', func=round),
3835                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3836                 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3837                     self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3838                     (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3839                      else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3840                     format_field(f, 'format_note'),
3841                     format_field(f, 'container', ignore=(None, f.get('ext'))),
3842                     delim=', '), delim=' '),
3843             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3844         header_line = self._list_format_headers(
3845             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3846             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3847
3848         return render_table(
3849             header_line, table, hide_empty=True,
3850             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3851
3852     def render_thumbnails_table(self, info_dict):
3853         thumbnails = list(info_dict.get('thumbnails') or [])
3854         if not thumbnails:
3855             return None
3856         return render_table(
3857             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3858             [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3859
3860     def render_subtitles_table(self, video_id, subtitles):
3861         def _row(lang, formats):
3862             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3863             if len(set(names)) == 1:
3864                 names = [] if names[0] == 'unknown' else names[:1]
3865             return [lang, ', '.join(names), ', '.join(exts)]
3866
3867         if not subtitles:
3868             return None
3869         return render_table(
3870             self._list_format_headers('Language', 'Name', 'Formats'),
3871             [_row(lang, formats) for lang, formats in subtitles.items()],
3872             hide_empty=True)
3873
3874     def __list_table(self, video_id, name, func, *args):
3875         table = func(*args)
3876         if not table:
3877             self.to_screen(f'{video_id} has no {name}')
3878             return
3879         self.to_screen(f'[info] Available {name} for {video_id}:')
3880         self.to_stdout(table)
3881
3882     def list_formats(self, info_dict):
3883         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3884
3885     def list_thumbnails(self, info_dict):
3886         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3887
3888     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3889         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3890
3891     def print_debug_header(self):
3892         if not self.params.get('verbose'):
3893             return
3894
3895         from . import _IN_CLI  # Must be delayed import
3896
3897         # These imports can be slow. So import them only as needed
3898         from .extractor.extractors import _LAZY_LOADER
3899         from .extractor.extractors import (
3900             _PLUGIN_CLASSES as plugin_ies,
3901             _PLUGIN_OVERRIDES as plugin_ie_overrides
3902         )
3903
3904         def get_encoding(stream):
3905             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3906             additional_info = []
3907             if os.environ.get('TERM', '').lower() == 'dumb':
3908                 additional_info.append('dumb')
3909             if not supports_terminal_sequences(stream):
3910                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3911                 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3912             if additional_info:
3913                 ret = f'{ret} ({",".join(additional_info)})'
3914             return ret
3915
3916         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3917             locale.getpreferredencoding(),
3918             sys.getfilesystemencoding(),
3919             self.get_encoding(),
3920             ', '.join(
3921                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3922                 if stream is not None and key != 'console')
3923         )
3924
3925         logger = self.params.get('logger')
3926         if logger:
3927             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3928             write_debug(encoding_str)
3929         else:
3930             write_string(f'[debug] {encoding_str}\n', encoding=None)
3931             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3932
3933         source = detect_variant()
3934         if VARIANT not in (None, 'pip'):
3935             source += '*'
3936         klass = type(self)
3937         write_debug(join_nonempty(
3938             f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3939             f'{CHANNEL}@{__version__}',
3940             f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3941             '' if source == 'unknown' else f'({source})',
3942             '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3943             delim=' '))
3944
3945         if not _IN_CLI:
3946             write_debug(f'params: {self.params}')
3947
3948         if not _LAZY_LOADER:
3949             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3950                 write_debug('Lazy loading extractors is forcibly disabled')
3951             else:
3952                 write_debug('Lazy loading extractors is disabled')
3953         if self.params['compat_opts']:
3954             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3955
3956         if current_git_head():
3957             write_debug(f'Git HEAD: {current_git_head()}')
3958         write_debug(system_identifier())
3959
3960         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3961         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3962         if ffmpeg_features:
3963             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3964
3965         exe_versions['rtmpdump'] = rtmpdump_version()
3966         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3967         exe_str = ', '.join(
3968             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3969         ) or 'none'
3970         write_debug('exe versions: %s' % exe_str)
3971
3972         from .compat.compat_utils import get_package_info
3973         from .dependencies import available_dependencies
3974
3975         write_debug('Optional libraries: %s' % (', '.join(sorted({
3976             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3977         })) or 'none'))
3978
3979         write_debug(f'Proxy map: {self.proxies}')
3980         # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
3981         for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3982             display_list = ['%s%s' % (
3983                 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3984                 for name, klass in plugins.items()]
3985             if plugin_type == 'Extractor':
3986                 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3987                                     for parent, plugins in plugin_ie_overrides.items())
3988             if not display_list:
3989                 continue
3990             write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3991
3992         plugin_dirs = plugin_directories()
3993         if plugin_dirs:
3994             write_debug(f'Plugin directories: {plugin_dirs}')
3995
3996         # Not implemented
3997         if False and self.params.get('call_home'):
3998             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3999             write_debug('Public IP address: %s' % ipaddr)
4000             latest_version = self.urlopen(
4001                 'https://yt-dl.org/latest/version').read().decode()
4002             if version_tuple(latest_version) > version_tuple(__version__):
4003                 self.report_warning(
4004                     'You are using an outdated version (newest version: %s)! '
4005                     'See https://yt-dl.org/update if you need help updating.' %
4006                     latest_version)
4007
4008     @functools.cached_property
4009     def proxies(self):
4010         """Global proxy configuration"""
4011         opts_proxy = self.params.get('proxy')
4012         if opts_proxy is not None:
4013             if opts_proxy == '':
4014                 opts_proxy = '__noproxy__'
4015             proxies = {'all': opts_proxy}
4016         else:
4017             proxies = urllib.request.getproxies()
4018             # compat. Set HTTPS_PROXY to __noproxy__ to revert
4019             if 'http' in proxies and 'https' not in proxies:
4020                 proxies['https'] = proxies['http']
4021
4022         return proxies
4023
4024     @functools.cached_property
4025     def cookiejar(self):
4026         """Global cookiejar instance"""
4027         return load_cookies(
4028             self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4029
4030     @property
4031     def _opener(self):
4032         """
4033         Get a urllib OpenerDirector from the Urllib handler (deprecated).
4034         """
4035         self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4036         handler = self._request_director.handlers['Urllib']
4037         return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4038
4039     def urlopen(self, req):
4040         """ Start an HTTP download """
4041         if isinstance(req, str):
4042             req = Request(req)
4043         elif isinstance(req, urllib.request.Request):
4044             self.deprecation_warning(
4045                 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4046                 'Use yt_dlp.networking.common.Request instead.')
4047             req = urllib_req_to_req(req)
4048         assert isinstance(req, Request)
4049
4050         # compat: Assume user:pass url params are basic auth
4051         url, basic_auth_header = extract_basic_auth(req.url)
4052         if basic_auth_header:
4053             req.headers['Authorization'] = basic_auth_header
4054         req.url = sanitize_url(url)
4055
4056         clean_proxies(proxies=req.proxies, headers=req.headers)
4057         clean_headers(req.headers)
4058
4059         try:
4060             return self._request_director.send(req)
4061         except NoSupportingHandlers as e:
4062             for ue in e.unsupported_errors:
4063                 if not (ue.handler and ue.msg):
4064                     continue
4065                 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4066                     raise RequestError(
4067                         'file:// URLs are disabled by default in yt-dlp for security reasons. '
4068                         'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4069             raise
4070         except SSLError as e:
4071             if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4072                 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4073             elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4074                 raise RequestError(
4075                     'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4076                     'Try using --legacy-server-connect', cause=e) from e
4077             raise
4078         except HTTPError as e:  # TODO: Remove in a future release
4079             raise _CompatHTTPError(e) from e
4080
4081     def build_request_director(self, handlers):
4082         logger = _YDLLogger(self)
4083         headers = self.params.get('http_headers').copy()
4084         proxies = self.proxies.copy()
4085         clean_headers(headers)
4086         clean_proxies(proxies, headers)
4087
4088         director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4089         for handler in handlers:
4090             director.add_handler(handler(
4091                 logger=logger,
4092                 headers=headers,
4093                 cookiejar=self.cookiejar,
4094                 proxies=proxies,
4095                 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4096                 verify=not self.params.get('nocheckcertificate'),
4097                 **traverse_obj(self.params, {
4098                     'verbose': 'debug_printtraffic',
4099                     'source_address': 'source_address',
4100                     'timeout': 'socket_timeout',
4101                     'legacy_ssl_support': 'legacyserverconnect',
4102                     'enable_file_urls': 'enable_file_urls',
4103                     'client_cert': {
4104                         'client_certificate': 'client_certificate',
4105                         'client_certificate_key': 'client_certificate_key',
4106                         'client_certificate_password': 'client_certificate_password',
4107                     },
4108                 }),
4109             ))
4110         return director
4111
4112     def encode(self, s):
4113         if isinstance(s, bytes):
4114             return s  # Already encoded
4115
4116         try:
4117             return s.encode(self.get_encoding())
4118         except UnicodeEncodeError as err:
4119             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4120             raise
4121
4122     def get_encoding(self):
4123         encoding = self.params.get('encoding')
4124         if encoding is None:
4125             encoding = preferredencoding()
4126         return encoding
4127
4128     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4129         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4130         if overwrite is None:
4131             overwrite = self.params.get('overwrites', True)
4132         if not self.params.get('writeinfojson'):
4133             return False
4134         elif not infofn:
4135             self.write_debug(f'Skipping writing {label} infojson')
4136             return False
4137         elif not self._ensure_dir_exists(infofn):
4138             return None
4139         elif not overwrite and os.path.exists(infofn):
4140             self.to_screen(f'[info] {label.title()} metadata is already present')
4141             return 'exists'
4142
4143         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4144         try:
4145             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4146             return True
4147         except OSError:
4148             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4149             return None
4150
4151     def _write_description(self, label, ie_result, descfn):
4152         ''' Write description and returns True = written, False = skip, None = error '''
4153         if not self.params.get('writedescription'):
4154             return False
4155         elif not descfn:
4156             self.write_debug(f'Skipping writing {label} description')
4157             return False
4158         elif not self._ensure_dir_exists(descfn):
4159             return None
4160         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4161             self.to_screen(f'[info] {label.title()} description is already present')
4162         elif ie_result.get('description') is None:
4163             self.to_screen(f'[info] There\'s no {label} description to write')
4164             return False
4165         else:
4166             try:
4167                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4168                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4169                     descfile.write(ie_result['description'])
4170             except OSError:
4171                 self.report_error(f'Cannot write {label} description file {descfn}')
4172                 return None
4173         return True
4174
4175     def _write_subtitles(self, info_dict, filename):
4176         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4177         ret = []
4178         subtitles = info_dict.get('requested_subtitles')
4179         if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4180             # subtitles download errors are already managed as troubles in relevant IE
4181             # that way it will silently go on when used with unsupporting IE
4182             return ret
4183         elif not subtitles:
4184             self.to_screen('[info] There are no subtitles for the requested languages')
4185             return ret
4186         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4187         if not sub_filename_base:
4188             self.to_screen('[info] Skipping writing video subtitles')
4189             return ret
4190
4191         for sub_lang, sub_info in subtitles.items():
4192             sub_format = sub_info['ext']
4193             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4194             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4195             existing_sub = self.existing_file((sub_filename_final, sub_filename))
4196             if existing_sub:
4197                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4198                 sub_info['filepath'] = existing_sub
4199                 ret.append((existing_sub, sub_filename_final))
4200                 continue
4201
4202             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4203             if sub_info.get('data') is not None:
4204                 try:
4205                     # Use newline='' to prevent conversion of newline characters
4206                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
4207                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4208                         subfile.write(sub_info['data'])
4209                     sub_info['filepath'] = sub_filename
4210                     ret.append((sub_filename, sub_filename_final))
4211                     continue
4212                 except OSError:
4213                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
4214                     return None
4215
4216             try:
4217                 sub_copy = sub_info.copy()
4218                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4219                 self.dl(sub_filename, sub_copy, subtitle=True)
4220                 sub_info['filepath'] = sub_filename
4221                 ret.append((sub_filename, sub_filename_final))
4222             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4223                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4224                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
4225                     if not self.params.get('ignoreerrors'):
4226                         self.report_error(msg)
4227                     raise DownloadError(msg)
4228                 self.report_warning(msg)
4229         return ret
4230
4231     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4232         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4233         write_all = self.params.get('write_all_thumbnails', False)
4234         thumbnails, ret = [], []
4235         if write_all or self.params.get('writethumbnail', False):
4236             thumbnails = info_dict.get('thumbnails') or []
4237             if not thumbnails:
4238                 self.to_screen(f'[info] There are no {label} thumbnails to download')
4239                 return ret
4240         multiple = write_all and len(thumbnails) > 1
4241
4242         if thumb_filename_base is None:
4243             thumb_filename_base = filename
4244         if thumbnails and not thumb_filename_base:
4245             self.write_debug(f'Skipping writing {label} thumbnail')
4246             return ret
4247
4248         for idx, t in list(enumerate(thumbnails))[::-1]:
4249             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4250             thumb_display_id = f'{label} thumbnail {t["id"]}'
4251             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4252             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4253
4254             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4255             if existing_thumb:
4256                 self.to_screen('[info] %s is already present' % (
4257                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4258                 t['filepath'] = existing_thumb
4259                 ret.append((existing_thumb, thumb_filename_final))
4260             else:
4261                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4262                 try:
4263                     uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4264                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4265                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4266                         shutil.copyfileobj(uf, thumbf)
4267                     ret.append((thumb_filename, thumb_filename_final))
4268                     t['filepath'] = thumb_filename
4269                 except network_exceptions as err:
4270                     if isinstance(err, HTTPError) and err.status == 404:
4271                         self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4272                     else:
4273                         self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4274                     thumbnails.pop(idx)
4275             if ret and not write_all:
4276                 break
4277         return ret