yt_dlp/YoutubeDL.py

   1 import collections
   2 import contextlib
   3 import copy
   4 import datetime
   5 import errno
   6 import fileinput
   7 import functools
   8 import http.cookiejar
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import random
  16 import re
  17 import shutil
  18 import string
  19 import subprocess
  20 import sys
  21 import tempfile
  22 import time
  23 import tokenize
  24 import traceback
  25 import unicodedata
  26
  27 from .cache import Cache
  28 from .compat import urllib  # isort: split
  29 from .compat import compat_os_name, compat_shlex_quote
  30 from .cookies import LenientSimpleCookie, load_cookies
  31 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  32 from .downloader.rtmp import rtmpdump_version
  33 from .extractor import gen_extractor_classes, get_info_extractor
  34 from .extractor.common import UnsupportedURLIE
  35 from .extractor.openload import PhantomJSwrapper
  36 from .minicurses import format_text
  37 from .plugins import directories as plugin_directories
  38 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
  39 from .postprocessor import (
  40     EmbedThumbnailPP,
  41     FFmpegFixupDuplicateMoovPP,
  42     FFmpegFixupDurationPP,
  43     FFmpegFixupM3u8PP,
  44     FFmpegFixupM4aPP,
  45     FFmpegFixupStretchedPP,
  46     FFmpegFixupTimestampPP,
  47     FFmpegMergerPP,
  48     FFmpegPostProcessor,
  49     FFmpegVideoConvertorPP,
  50     MoveFilesAfterDownloadPP,
  51     get_postprocessor,
  52 )
  53 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
  54 from .update import REPOSITORY, current_git_head, detect_variant
  55 from .utils import (
  56     DEFAULT_OUTTMPL,
  57     IDENTITY,
  58     LINK_TEMPLATES,
  59     MEDIA_EXTENSIONS,
  60     NO_DEFAULT,
  61     NUMBER_RE,
  62     OUTTMPL_TYPES,
  63     POSTPROCESS_WHEN,
  64     STR_FORMAT_RE_TMPL,
  65     STR_FORMAT_TYPES,
  66     ContentTooShortError,
  67     DateRange,
  68     DownloadCancelled,
  69     DownloadError,
  70     EntryNotInPlaylist,
  71     ExistingVideoReached,
  72     ExtractorError,
  73     FormatSorter,
  74     GeoRestrictedError,
  75     HEADRequest,
  76     ISO3166Utils,
  77     LazyList,
  78     MaxDownloadsReached,
  79     Namespace,
  80     PagedList,
  81     PerRequestProxyHandler,
  82     PlaylistEntries,
  83     Popen,
  84     PostProcessingError,
  85     ReExtractInfo,
  86     RejectedVideoReached,
  87     SameFileError,
  88     UnavailableVideoError,
  89     UserNotLive,
  90     YoutubeDLCookieProcessor,
  91     YoutubeDLHandler,
  92     YoutubeDLRedirectHandler,
  93     age_restricted,
  94     args_to_str,
  95     bug_reports_message,
  96     date_from_str,
  97     deprecation_warning,
  98     determine_ext,
  99     determine_protocol,
 100     encode_compat_str,
 101     encodeFilename,
 102     error_to_compat_str,
 103     escapeHTML,
 104     expand_path,
 105     filter_dict,
 106     float_or_none,
 107     format_bytes,
 108     format_decimal_suffix,
 109     format_field,
 110     formatSeconds,
 111     get_compatible_ext,
 112     get_domain,
 113     int_or_none,
 114     iri_to_uri,
 115     is_path_like,
 116     join_nonempty,
 117     locked_file,
 118     make_archive_id,
 119     make_dir,
 120     make_HTTPS_handler,
 121     merge_headers,
 122     network_exceptions,
 123     number_of_digits,
 124     orderedSet,
 125     orderedSet_from_options,
 126     parse_filesize,
 127     preferredencoding,
 128     prepend_extension,
 129     remove_terminal_sequences,
 130     render_table,
 131     replace_extension,
 132     sanitize_filename,
 133     sanitize_path,
 134     sanitize_url,
 135     sanitized_Request,
 136     std_headers,
 137     str_or_none,
 138     strftime_or_none,
 139     subtitles_filename,
 140     supports_terminal_sequences,
 141     system_identifier,
 142     timetuple_from_msec,
 143     to_high_limit_path,
 144     traverse_obj,
 145     try_call,
 146     try_get,
 147     url_basename,
 148     variadic,
 149     version_tuple,
 150     windows_enable_vt_mode,
 151     write_json_file,
 152     write_string,
 153 )
 154 from .utils.networking import clean_headers
 155 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
 156
 157 if compat_os_name == 'nt':
 158     import ctypes
 159
 160
 161 class YoutubeDL:
 162     """YoutubeDL class.
 163
 164     YoutubeDL objects are the ones responsible of downloading the
 165     actual video file and writing it to disk if the user has requested
 166     it, among some other tasks. In most cases there should be one per
 167     program. As, given a video URL, the downloader doesn't know how to
 168     extract all the needed information, task that InfoExtractors do, it
 169     has to pass the URL to one of them.
 170
 171     For this, YoutubeDL objects have a method that allows
 172     InfoExtractors to be registered in a given order. When it is passed
 173     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 174     finds that reports being able to handle it. The InfoExtractor extracts
 175     all the information about the video or videos the URL refers to, and
 176     YoutubeDL process the extracted information, possibly using a File
 177     Downloader to download the video.
 178
 179     YoutubeDL objects accept a lot of parameters. In order not to saturate
 180     the object constructor with arguments, it receives a dictionary of
 181     options instead. These options are available through the params
 182     attribute for the InfoExtractors to use. The YoutubeDL also
 183     registers itself as the downloader in charge for the InfoExtractors
 184     that are added to it, so this is a "mutual registration".
 185
 186     Available options:
 187
 188     username:          Username for authentication purposes.
 189     password:          Password for authentication purposes.
 190     videopassword:     Password for accessing a video.
 191     ap_mso:            Adobe Pass multiple-system operator identifier.
 192     ap_username:       Multiple-system operator account username.
 193     ap_password:       Multiple-system operator account password.
 194     usenetrc:          Use netrc for authentication instead.
 195     netrc_location:    Location of the netrc file. Defaults to ~/.netrc.
 196     netrc_cmd:         Use a shell command to get credentials
 197     verbose:           Print additional info to stdout.
 198     quiet:             Do not print messages to stdout.
 199     no_warnings:       Do not print out anything for warnings.
 200     forceprint:        A dict with keys WHEN mapped to a list of templates to
 201                        print to stdout. The allowed keys are video or any of the
 202                        items in utils.POSTPROCESS_WHEN.
 203                        For compatibility, a single list is also accepted
 204     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 205                        a list of tuples with (template, filename)
 206     forcejson:         Force printing info_dict as JSON.
 207     dump_single_json:  Force printing the info_dict of the whole playlist
 208                        (or video) as a single JSON line.
 209     force_write_download_archive: Force writing download archive regardless
 210                        of 'skip_download' or 'simulate'.
 211     simulate:          Do not download the video files. If unset (or None),
 212                        simulate only if listsubtitles, listformats or list_thumbnails is used
 213     format:            Video format code. see "FORMAT SELECTION" for more details.
 214                        You can also pass a function. The function takes 'ctx' as
 215                        argument and returns the formats to download.
 216                        See "build_format_selector" for an implementation
 217     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 218     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 219                        extracting metadata even if the video is not actually
 220                        available for download (experimental)
 221     format_sort:       A list of fields by which to sort the video formats.
 222                        See "Sorting Formats" for more details.
 223     format_sort_force: Force the given format_sort. see "Sorting Formats"
 224                        for more details.
 225     prefer_free_formats: Whether to prefer video formats with free containers
 226                        over non-free ones of same quality.
 227     allow_multiple_video_streams:   Allow multiple video streams to be merged
 228                        into a single file
 229     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 230                        into a single file
 231     check_formats      Whether to test if the formats are downloadable.
 232                        Can be True (check all), False (check none),
 233                        'selected' (check selected formats),
 234                        or None (check only if requested by extractor)
 235     paths:             Dictionary of output paths. The allowed keys are 'home'
 236                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 237     outtmpl:           Dictionary of templates for output names. Allowed keys
 238                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 239                        For compatibility with youtube-dl, a single string can also be used
 240     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 241     restrictfilenames: Do not allow "&" and spaces in file names
 242     trim_file_name:    Limit length of filename (extension excluded)
 243     windowsfilenames:  Force the filenames to be windows compatible
 244     ignoreerrors:      Do not stop on download/postprocessing errors.
 245                        Can be 'only_download' to ignore only download errors.
 246                        Default is 'only_download' for CLI, but False for API
 247     skip_playlist_after_errors: Number of allowed failures until the rest of
 248                        the playlist is skipped
 249     allowed_extractors:  List of regexes to match against extractor names that are allowed
 250     overwrites:        Overwrite all video and metadata files if True,
 251                        overwrite only non-video files if None
 252                        and don't overwrite any file if False
 253                        For compatibility with youtube-dl,
 254                        "nooverwrites" may also be used instead
 255     playlist_items:    Specific indices of playlist to download.
 256     playlistrandom:    Download playlist items in random order.
 257     lazy_playlist:     Process playlist entries as they are received.
 258     matchtitle:        Download only matching titles.
 259     rejecttitle:       Reject downloads for matching titles.
 260     logger:            Log messages to a logging.Logger instance.
 261     logtostderr:       Print everything to stderr instead of stdout.
 262     consoletitle:      Display progress in console window's titlebar.
 263     writedescription:  Write the video description to a .description file
 264     writeinfojson:     Write the video description to a .info.json file
 265     clean_infojson:    Remove internal metadata from the infojson
 266     getcomments:       Extract video comments. This will not be written to disk
 267                        unless writeinfojson is also given
 268     writeannotations:  Write the video annotations to a .annotations.xml file
 269     writethumbnail:    Write the thumbnail image to a file
 270     allow_playlist_files: Whether to write playlists' description, infojson etc
 271                        also to disk when using the 'write*' options
 272     write_all_thumbnails:  Write all thumbnail formats to files
 273     writelink:         Write an internet shortcut file, depending on the
 274                        current platform (.url/.webloc/.desktop)
 275     writeurllink:      Write a Windows internet shortcut file (.url)
 276     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 277     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 278     writesubtitles:    Write the video subtitles to a file
 279     writeautomaticsub: Write the automatically generated subtitles to a file
 280     listsubtitles:     Lists all available subtitles for the video
 281     subtitlesformat:   The format code for subtitles
 282     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 283                        The list may contain "all" to refer to all the available
 284                        subtitles. The language can be prefixed with a "-" to
 285                        exclude it from the requested languages, e.g. ['all', '-live_chat']
 286     keepvideo:         Keep the video file after post-processing
 287     daterange:         A utils.DateRange object, download only if the upload_date is in the range.
 288     skip_download:     Skip the actual download of the video file
 289     cachedir:          Location of the cache files in the filesystem.
 290                        False to disable filesystem cache.
 291     noplaylist:        Download single video instead of a playlist if in doubt.
 292     age_limit:         An integer representing the user's age in years.
 293                        Unsuitable videos for the given age are skipped.
 294     min_views:         An integer representing the minimum view count the video
 295                        must have in order to not be skipped.
 296                        Videos without view count information are always
 297                        downloaded. None for no limit.
 298     max_views:         An integer representing the maximum view count.
 299                        Videos that are more popular than that are not
 300                        downloaded.
 301                        Videos without view count information are always
 302                        downloaded. None for no limit.
 303     download_archive:  A set, or the name of a file where all downloads are recorded.
 304                        Videos already present in the file are not downloaded again.
 305     break_on_existing: Stop the download process after attempting to download a
 306                        file that is in the archive.
 307     break_per_url:     Whether break_on_reject and break_on_existing
 308                        should act on each input URL as opposed to for the entire queue
 309     cookiefile:        File name or text stream from where cookies should be read and dumped to
 310     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 311                        name/path from where cookies are loaded, the name of the keyring,
 312                        and the container name, e.g. ('chrome', ) or
 313                        ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
 314     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 315                        support RFC 5746 secure renegotiation
 316     nocheckcertificate:  Do not verify SSL certificates
 317     client_certificate:  Path to client certificate file in PEM format. May include the private key
 318     client_certificate_key:  Path to private key file for client certificate
 319     client_certificate_password:  Password for client certificate private key, if encrypted.
 320                         If not provided and the key is encrypted, yt-dlp will ask interactively
 321     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 322                        (Only supported by some extractors)
 323     enable_file_urls:  Enable file:// URLs. This is disabled by default for security reasons.
 324     http_headers:      A dictionary of custom headers to be used for all requests
 325     proxy:             URL of the proxy server to use
 326     geo_verification_proxy:  URL of the proxy to use for IP address verification
 327                        on geo-restricted sites.
 328     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 329     bidi_workaround:   Work around buggy terminals without bidirectional text
 330                        support, using fridibi
 331     debug_printtraffic:Print out sent and received HTTP traffic
 332     default_search:    Prepend this string if an input url is not valid.
 333                        'auto' for elaborate guessing
 334     encoding:          Use this encoding instead of the system-specified.
 335     extract_flat:      Whether to resolve and process url_results further
 336                        * False:     Always process. Default for API
 337                        * True:      Never process
 338                        * 'in_playlist': Do not process inside playlist/multi_video
 339                        * 'discard': Always process, but don't return the result
 340                                     from inside playlist/multi_video
 341                        * 'discard_in_playlist': Same as "discard", but only for
 342                                     playlists (not multi_video). Default for CLI
 343     wait_for_video:    If given, wait for scheduled streams to become available.
 344                        The value should be a tuple containing the range
 345                        (min_secs, max_secs) to wait between retries
 346     postprocessors:    A list of dictionaries, each with an entry
 347                        * key:  The name of the postprocessor. See
 348                                yt_dlp/postprocessor/__init__.py for a list.
 349                        * when: When to run the postprocessor. Allowed values are
 350                                the entries of utils.POSTPROCESS_WHEN
 351                                Assumed to be 'post_process' if not given
 352     progress_hooks:    A list of functions that get called on download
 353                        progress, with a dictionary with the entries
 354                        * status: One of "downloading", "error", or "finished".
 355                                  Check this first and ignore unknown values.
 356                        * info_dict: The extracted info_dict
 357
 358                        If status is one of "downloading", or "finished", the
 359                        following properties may also be present:
 360                        * filename: The final filename (always present)
 361                        * tmpfilename: The filename we're currently writing to
 362                        * downloaded_bytes: Bytes on disk
 363                        * total_bytes: Size of the whole file, None if unknown
 364                        * total_bytes_estimate: Guess of the eventual file size,
 365                                                None if unavailable.
 366                        * elapsed: The number of seconds since download started.
 367                        * eta: The estimated time in seconds, None if unknown
 368                        * speed: The download speed in bytes/second, None if
 369                                 unknown
 370                        * fragment_index: The counter of the currently
 371                                          downloaded video fragment.
 372                        * fragment_count: The number of fragments (= individual
 373                                          files that will be merged)
 374
 375                        Progress hooks are guaranteed to be called at least once
 376                        (with status "finished") if the download is successful.
 377     postprocessor_hooks:  A list of functions that get called on postprocessing
 378                        progress, with a dictionary with the entries
 379                        * status: One of "started", "processing", or "finished".
 380                                  Check this first and ignore unknown values.
 381                        * postprocessor: Name of the postprocessor
 382                        * info_dict: The extracted info_dict
 383
 384                        Progress hooks are guaranteed to be called at least twice
 385                        (with status "started" and "finished") if the processing is successful.
 386     merge_output_format: "/" separated list of extensions to use when merging formats.
 387     final_ext:         Expected final extension; used to detect when the file was
 388                        already downloaded and converted
 389     fixup:             Automatically correct known faults of the file.
 390                        One of:
 391                        - "never": do nothing
 392                        - "warn": only emit a warning
 393                        - "detect_or_warn": check whether we can do anything
 394                                            about it, warn otherwise (default)
 395     source_address:    Client-side IP address to bind to.
 396     sleep_interval_requests: Number of seconds to sleep between requests
 397                        during extraction
 398     sleep_interval:    Number of seconds to sleep before each download when
 399                        used alone or a lower bound of a range for randomized
 400                        sleep before each download (minimum possible number
 401                        of seconds to sleep) when used along with
 402                        max_sleep_interval.
 403     max_sleep_interval:Upper bound of a range for randomized sleep before each
 404                        download (maximum possible number of seconds to sleep).
 405                        Must only be used along with sleep_interval.
 406                        Actual sleep time will be a random float from range
 407                        [sleep_interval; max_sleep_interval].
 408     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 409     listformats:       Print an overview of available video formats and exit.
 410     list_thumbnails:   Print a table of all thumbnails and exit.
 411     match_filter:      A function that gets called for every video with the signature
 412                        (info_dict, *, incomplete: bool) -> Optional[str]
 413                        For backward compatibility with youtube-dl, the signature
 414                        (info_dict) -> Optional[str] is also allowed.
 415                        - If it returns a message, the video is ignored.
 416                        - If it returns None, the video is downloaded.
 417                        - If it returns utils.NO_DEFAULT, the user is interactively
 418                          asked whether to download the video.
 419                        - Raise utils.DownloadCancelled(msg) to abort remaining
 420                          downloads when a video is rejected.
 421                        match_filter_func in utils.py is one example for this.
 422     color:             A Dictionary with output stream names as keys
 423                        and their respective color policy as values.
 424                        Can also just be a single color policy,
 425                        in which case it applies to all outputs.
 426                        Valid stream names are 'stdout' and 'stderr'.
 427                        Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
 428     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 429                        HTTP header
 430     geo_bypass_country:
 431                        Two-letter ISO 3166-2 country code that will be used for
 432                        explicit geographic restriction bypassing via faking
 433                        X-Forwarded-For HTTP header
 434     geo_bypass_ip_block:
 435                        IP range in CIDR notation that will be used similarly to
 436                        geo_bypass_country
 437     external_downloader: A dictionary of protocol keys and the executable of the
 438                        external downloader to use for it. The allowed protocols
 439                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 440                        Set the value to 'native' to use the native downloader
 441     compat_opts:       Compatibility options. See "Differences in default behavior".
 442                        The following options do not work when used through the API:
 443                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 444                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 445                        Refer __init__.py for their implementation
 446     progress_template: Dictionary of templates for progress outputs.
 447                        Allowed keys are 'download', 'postprocess',
 448                        'download-title' (console title) and 'postprocess-title'.
 449                        The template is mapped on a dictionary with keys 'progress' and 'info'
 450     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 451                        as argument and returns the time to sleep in seconds.
 452                        Allowed keys are 'http', 'fragment', 'file_access'
 453     download_ranges:   A callback function that gets called for every video with
 454                        the signature (info_dict, ydl) -> Iterable[Section].
 455                        Only the returned sections will be downloaded.
 456                        Each Section is a dict with the following keys:
 457                        * start_time: Start time of the section in seconds
 458                        * end_time: End time of the section in seconds
 459                        * title: Section title (Optional)
 460                        * index: Section number (Optional)
 461     force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
 462     noprogress:        Do not print the progress bar
 463     live_from_start:   Whether to download livestreams videos from the start
 464
 465     The following parameters are not used by YoutubeDL itself, they are used by
 466     the downloader (see yt_dlp/downloader/common.py):
 467     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 468     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 469     continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 470     external_downloader_args, concurrent_fragment_downloads.
 471
 472     The following options are used by the post processors:
 473     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 474                        to the binary or its containing directory.
 475     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 476                        and a list of additional command-line arguments for the
 477                        postprocessor/executable. The dict can also have "PP+EXE" keys
 478                        which are used when the given exe is used by the given PP.
 479                        Use 'default' as the name for arguments to passed to all PP
 480                        For compatibility with youtube-dl, a single list of args
 481                        can also be used
 482
 483     The following options are used by the extractors:
 484     extractor_retries: Number of times to retry for known errors (default: 3)
 485     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 486     hls_split_discontinuity: Split HLS playlists to different formats at
 487                        discontinuities such as ad breaks (default: False)
 488     extractor_args:    A dictionary of arguments to be passed to the extractors.
 489                        See "EXTRACTOR ARGUMENTS" for details.
 490                        E.g. {'youtube': {'skip': ['dash', 'hls']}}
 491     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 492
 493     The following options are deprecated and may be removed in the future:
 494
 495     break_on_reject:   Stop the download process when encountering a video that
 496                        has been filtered out.
 497                        - `raise DownloadCancelled(msg)` in match_filter instead
 498     force_generic_extractor: Force downloader to use the generic extractor
 499                        - Use allowed_extractors = ['generic', 'default']
 500     playliststart:     - Use playlist_items
 501                        Playlist item to start at.
 502     playlistend:       - Use playlist_items
 503                        Playlist item to end at.
 504     playlistreverse:   - Use playlist_items
 505                        Download playlist items in reverse order.
 506     forceurl:          - Use forceprint
 507                        Force printing final URL.
 508     forcetitle:        - Use forceprint
 509                        Force printing title.
 510     forceid:           - Use forceprint
 511                        Force printing ID.
 512     forcethumbnail:    - Use forceprint
 513                        Force printing thumbnail URL.
 514     forcedescription:  - Use forceprint
 515                        Force printing description.
 516     forcefilename:     - Use forceprint
 517                        Force printing final filename.
 518     forceduration:     - Use forceprint
 519                        Force printing duration.
 520     allsubtitles:      - Use subtitleslangs = ['all']
 521                        Downloads all the subtitles of the video
 522                        (requires writesubtitles or writeautomaticsub)
 523     include_ads:       - Doesn't work
 524                        Download ads as well
 525     call_home:         - Not implemented
 526                        Boolean, true iff we are allowed to contact the
 527                        yt-dlp servers for debugging.
 528     post_hooks:        - Register a custom postprocessor
 529                        A list of functions that get called as the final step
 530                        for each video file, after all postprocessors have been
 531                        called. The filename will be passed as the only argument.
 532     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 533                        Use the native HLS downloader instead of ffmpeg/avconv
 534                        if True, otherwise use ffmpeg/avconv if False, otherwise
 535                        use downloader suggested by extractor if None.
 536     prefer_ffmpeg:     - avconv support is deprecated
 537                        If False, use avconv instead of ffmpeg if both are available,
 538                        otherwise prefer ffmpeg.
 539     youtube_include_dash_manifest: - Use extractor_args
 540                        If True (default), DASH manifests and related
 541                        data will be downloaded and processed by extractor.
 542                        You can reduce network I/O by disabling it if you don't
 543                        care about DASH. (only for youtube)
 544     youtube_include_hls_manifest: - Use extractor_args
 545                        If True (default), HLS manifests and related
 546                        data will be downloaded and processed by extractor.
 547                        You can reduce network I/O by disabling it if you don't
 548                        care about HLS. (only for youtube)
 549     no_color:          Same as `color='no_color'`
 550     """
 551
 552     _NUMERIC_FIELDS = {
 553         'width', 'height', 'asr', 'audio_channels', 'fps',
 554         'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
 555         'timestamp', 'release_timestamp',
 556         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 557         'average_rating', 'comment_count', 'age_limit',
 558         'start_time', 'end_time',
 559         'chapter_number', 'season_number', 'episode_number',
 560         'track_number', 'disc_number', 'release_year',
 561     }
 562
 563     _format_fields = {
 564         # NB: Keep in sync with the docstring of extractor/common.py
 565         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 566         'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
 567         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
 568         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 569         'preference', 'language', 'language_preference', 'quality', 'source_preference',
 570         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
 571         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 572     }
 573     _format_selection_exts = {
 574         'audio': set(MEDIA_EXTENSIONS.common_audio),
 575         'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
 576         'storyboards': set(MEDIA_EXTENSIONS.storyboards),
 577     }
 578
 579     def __init__(self, params=None, auto_init=True):
 580         """Create a FileDownloader object with the given options.
 581         @param auto_init    Whether to load the default extractors and print header (if verbose).
 582                             Set to 'no_verbose_header' to not print the header
 583         """
 584         if params is None:
 585             params = {}
 586         self.params = params
 587         self._ies = {}
 588         self._ies_instances = {}
 589         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 590         self._printed_messages = set()
 591         self._first_webpage_request = True
 592         self._post_hooks = []
 593         self._progress_hooks = []
 594         self._postprocessor_hooks = []
 595         self._download_retcode = 0
 596         self._num_downloads = 0
 597         self._num_videos = 0
 598         self._playlist_level = 0
 599         self._playlist_urls = set()
 600         self.cache = Cache(self)
 601
 602         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 603         self._out_files = Namespace(
 604             out=stdout,
 605             error=sys.stderr,
 606             screen=sys.stderr if self.params.get('quiet') else stdout,
 607             console=None if compat_os_name == 'nt' else next(
 608                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 609         )
 610
 611         try:
 612             windows_enable_vt_mode()
 613         except Exception as e:
 614             self.write_debug(f'Failed to enable VT mode: {e}')
 615
 616         if self.params.get('no_color'):
 617             if self.params.get('color') is not None:
 618                 self.report_warning('Overwriting params from "color" with "no_color"')
 619             self.params['color'] = 'no_color'
 620
 621         term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
 622
 623         def process_color_policy(stream):
 624             stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
 625             policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
 626             if policy in ('auto', None):
 627                 return term_allow_color and supports_terminal_sequences(stream)
 628             assert policy in ('always', 'never', 'no_color')
 629             return {'always': True, 'never': False}.get(policy, policy)
 630
 631         self._allow_colors = Namespace(**{
 632             name: process_color_policy(stream)
 633             for name, stream in self._out_files.items_ if name != 'console'
 634         })
 635
 636         # The code is left like this to be reused for future deprecations
 637         MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
 638         current_version = sys.version_info[:2]
 639         if current_version < MIN_RECOMMENDED:
 640             msg = ('Support for Python version %d.%d has been deprecated. '
 641                    'See  https://github.com/yt-dlp/yt-dlp/issues/3764  for more details.'
 642                    '\n                    You will no longer receive updates on this version')
 643             if current_version < MIN_SUPPORTED:
 644                 msg = 'Python version %d.%d is no longer supported'
 645             self.deprecated_feature(
 646                 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
 647
 648         if self.params.get('allow_unplayable_formats'):
 649             self.report_warning(
 650                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 651                 'This is a developer option intended for debugging. \n'
 652                 '         If you experience any issues while using this option, '
 653                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 654
 655         if self.params.get('bidi_workaround', False):
 656             try:
 657                 import pty
 658                 master, slave = pty.openpty()
 659                 width = shutil.get_terminal_size().columns
 660                 width_args = [] if width is None else ['-w', str(width)]
 661                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 662                 try:
 663                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 664                 except OSError:
 665                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 666                 self._output_channel = os.fdopen(master, 'rb')
 667             except OSError as ose:
 668                 if ose.errno == errno.ENOENT:
 669                     self.report_warning(
 670                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 671                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 672                 else:
 673                     raise
 674
 675         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 676         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 677         if auto_init and auto_init != 'no_verbose_header':
 678             self.print_debug_header()
 679
 680         self.__header_cookies = []
 681         self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False))  # compat
 682
 683         def check_deprecated(param, option, suggestion):
 684             if self.params.get(param) is not None:
 685                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 686                 return True
 687             return False
 688
 689         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 690             if self.params.get('geo_verification_proxy') is None:
 691                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 692
 693         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 694         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 695         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 696
 697         for msg in self.params.get('_warnings', []):
 698             self.report_warning(msg)
 699         for msg in self.params.get('_deprecation_warnings', []):
 700             self.deprecated_feature(msg)
 701
 702         if 'list-formats' in self.params['compat_opts']:
 703             self.params['listformats_table'] = False
 704
 705         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 706             # nooverwrites was unnecessarily changed to overwrites
 707             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 708             # This ensures compatibility with both keys
 709             self.params['overwrites'] = not self.params['nooverwrites']
 710         elif self.params.get('overwrites') is None:
 711             self.params.pop('overwrites', None)
 712         else:
 713             self.params['nooverwrites'] = not self.params['overwrites']
 714
 715         if self.params.get('simulate') is None and any((
 716             self.params.get('list_thumbnails'),
 717             self.params.get('listformats'),
 718             self.params.get('listsubtitles'),
 719         )):
 720             self.params['simulate'] = 'list_only'
 721
 722         self.params.setdefault('forceprint', {})
 723         self.params.setdefault('print_to_file', {})
 724
 725         # Compatibility with older syntax
 726         if not isinstance(params['forceprint'], dict):
 727             self.params['forceprint'] = {'video': params['forceprint']}
 728
 729         if auto_init:
 730             self.add_default_info_extractors()
 731
 732         if (sys.platform != 'win32'
 733                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 734                 and not self.params.get('restrictfilenames', False)):
 735             # Unicode filesystem API will throw errors (#1474, #13027)
 736             self.report_warning(
 737                 'Assuming --restrict-filenames since file system encoding '
 738                 'cannot encode all characters. '
 739                 'Set the LC_ALL environment variable to fix this.')
 740             self.params['restrictfilenames'] = True
 741
 742         self._parse_outtmpl()
 743
 744         # Creating format selector here allows us to catch syntax errors before the extraction
 745         self.format_selector = (
 746             self.params.get('format') if self.params.get('format') in (None, '-')
 747             else self.params['format'] if callable(self.params['format'])
 748             else self.build_format_selector(self.params['format']))
 749
 750         hooks = {
 751             'post_hooks': self.add_post_hook,
 752             'progress_hooks': self.add_progress_hook,
 753             'postprocessor_hooks': self.add_postprocessor_hook,
 754         }
 755         for opt, fn in hooks.items():
 756             for ph in self.params.get(opt, []):
 757                 fn(ph)
 758
 759         for pp_def_raw in self.params.get('postprocessors', []):
 760             pp_def = dict(pp_def_raw)
 761             when = pp_def.pop('when', 'post_process')
 762             self.add_post_processor(
 763                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 764                 when=when)
 765
 766         self._setup_opener()
 767
 768         def preload_download_archive(fn):
 769             """Preload the archive, if any is specified"""
 770             archive = set()
 771             if fn is None:
 772                 return archive
 773             elif not is_path_like(fn):
 774                 return fn
 775
 776             self.write_debug(f'Loading archive file {fn!r}')
 777             try:
 778                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 779                     for line in archive_file:
 780                         archive.add(line.strip())
 781             except OSError as ioe:
 782                 if ioe.errno != errno.ENOENT:
 783                     raise
 784             return archive
 785
 786         self.archive = preload_download_archive(self.params.get('download_archive'))
 787
 788     def warn_if_short_id(self, argv):
 789         # short YouTube ID starting with dash?
 790         idxs = [
 791             i for i, a in enumerate(argv)
 792             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 793         if idxs:
 794             correct_argv = (
 795                 ['yt-dlp']
 796                 + [a for i, a in enumerate(argv) if i not in idxs]
 797                 + ['--'] + [argv[i] for i in idxs]
 798             )
 799             self.report_warning(
 800                 'Long argument string detected. '
 801                 'Use -- to separate parameters and URLs, like this:\n%s' %
 802                 args_to_str(correct_argv))
 803
 804     def add_info_extractor(self, ie):
 805         """Add an InfoExtractor object to the end of the list."""
 806         ie_key = ie.ie_key()
 807         self._ies[ie_key] = ie
 808         if not isinstance(ie, type):
 809             self._ies_instances[ie_key] = ie
 810             ie.set_downloader(self)
 811
 812     def get_info_extractor(self, ie_key):
 813         """
 814         Get an instance of an IE with name ie_key, it will try to get one from
 815         the _ies list, if there's no instance it will create a new one and add
 816         it to the extractor list.
 817         """
 818         ie = self._ies_instances.get(ie_key)
 819         if ie is None:
 820             ie = get_info_extractor(ie_key)()
 821             self.add_info_extractor(ie)
 822         return ie
 823
 824     def add_default_info_extractors(self):
 825         """
 826         Add the InfoExtractors returned by gen_extractors to the end of the list
 827         """
 828         all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
 829         all_ies['end'] = UnsupportedURLIE()
 830         try:
 831             ie_names = orderedSet_from_options(
 832                 self.params.get('allowed_extractors', ['default']), {
 833                     'all': list(all_ies),
 834                     'default': [name for name, ie in all_ies.items() if ie._ENABLED],
 835                 }, use_regex=True)
 836         except re.error as e:
 837             raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
 838         for name in ie_names:
 839             self.add_info_extractor(all_ies[name])
 840         self.write_debug(f'Loaded {len(ie_names)} extractors')
 841
 842     def add_post_processor(self, pp, when='post_process'):
 843         """Add a PostProcessor object to the end of the chain."""
 844         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 845         self._pps[when].append(pp)
 846         pp.set_downloader(self)
 847
 848     def add_post_hook(self, ph):
 849         """Add the post hook"""
 850         self._post_hooks.append(ph)
 851
 852     def add_progress_hook(self, ph):
 853         """Add the download progress hook"""
 854         self._progress_hooks.append(ph)
 855
 856     def add_postprocessor_hook(self, ph):
 857         """Add the postprocessing progress hook"""
 858         self._postprocessor_hooks.append(ph)
 859         for pps in self._pps.values():
 860             for pp in pps:
 861                 pp.add_progress_hook(ph)
 862
 863     def _bidi_workaround(self, message):
 864         if not hasattr(self, '_output_channel'):
 865             return message
 866
 867         assert hasattr(self, '_output_process')
 868         assert isinstance(message, str)
 869         line_count = message.count('\n') + 1
 870         self._output_process.stdin.write((message + '\n').encode())
 871         self._output_process.stdin.flush()
 872         res = ''.join(self._output_channel.readline().decode()
 873                       for _ in range(line_count))
 874         return res[:-len('\n')]
 875
 876     def _write_string(self, message, out=None, only_once=False):
 877         if only_once:
 878             if message in self._printed_messages:
 879                 return
 880             self._printed_messages.add(message)
 881         write_string(message, out=out, encoding=self.params.get('encoding'))
 882
 883     def to_stdout(self, message, skip_eol=False, quiet=None):
 884         """Print message to stdout"""
 885         if quiet is not None:
 886             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
 887                                      'Use "YoutubeDL.to_screen" instead')
 888         if skip_eol is not False:
 889             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
 890                                      'Use "YoutubeDL.to_screen" instead')
 891         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 892
 893     def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
 894         """Print message to screen if not in quiet mode"""
 895         if self.params.get('logger'):
 896             self.params['logger'].debug(message)
 897             return
 898         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 899             return
 900         self._write_string(
 901             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 902             self._out_files.screen, only_once=only_once)
 903
 904     def to_stderr(self, message, only_once=False):
 905         """Print message to stderr"""
 906         assert isinstance(message, str)
 907         if self.params.get('logger'):
 908             self.params['logger'].error(message)
 909         else:
 910             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 911
 912     def _send_console_code(self, code):
 913         if compat_os_name == 'nt' or not self._out_files.console:
 914             return
 915         self._write_string(code, self._out_files.console)
 916
 917     def to_console_title(self, message):
 918         if not self.params.get('consoletitle', False):
 919             return
 920         message = remove_terminal_sequences(message)
 921         if compat_os_name == 'nt':
 922             if ctypes.windll.kernel32.GetConsoleWindow():
 923                 # c_wchar_p() might not be necessary if `message` is
 924                 # already of type unicode()
 925                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 926         else:
 927             self._send_console_code(f'\033]0;{message}\007')
 928
 929     def save_console_title(self):
 930         if not self.params.get('consoletitle') or self.params.get('simulate'):
 931             return
 932         self._send_console_code('\033[22;0t')  # Save the title on stack
 933
 934     def restore_console_title(self):
 935         if not self.params.get('consoletitle') or self.params.get('simulate'):
 936             return
 937         self._send_console_code('\033[23;0t')  # Restore the title from stack
 938
 939     def __enter__(self):
 940         self.save_console_title()
 941         return self
 942
 943     def save_cookies(self):
 944         if self.params.get('cookiefile') is not None:
 945             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 946
 947     def __exit__(self, *args):
 948         self.restore_console_title()
 949         self.save_cookies()
 950
 951     def trouble(self, message=None, tb=None, is_error=True):
 952         """Determine action to take when a download problem appears.
 953
 954         Depending on if the downloader has been configured to ignore
 955         download errors or not, this method may throw an exception or
 956         not when errors are found, after printing the message.
 957
 958         @param tb          If given, is additional traceback information
 959         @param is_error    Whether to raise error according to ignorerrors
 960         """
 961         if message is not None:
 962             self.to_stderr(message)
 963         if self.params.get('verbose'):
 964             if tb is None:
 965                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 966                     tb = ''
 967                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 968                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 969                     tb += encode_compat_str(traceback.format_exc())
 970                 else:
 971                     tb_data = traceback.format_list(traceback.extract_stack())
 972                     tb = ''.join(tb_data)
 973             if tb:
 974                 self.to_stderr(tb)
 975         if not is_error:
 976             return
 977         if not self.params.get('ignoreerrors'):
 978             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 979                 exc_info = sys.exc_info()[1].exc_info
 980             else:
 981                 exc_info = sys.exc_info()
 982             raise DownloadError(message, exc_info)
 983         self._download_retcode = 1
 984
 985     Styles = Namespace(
 986         HEADERS='yellow',
 987         EMPHASIS='light blue',
 988         FILENAME='green',
 989         ID='green',
 990         DELIM='blue',
 991         ERROR='red',
 992         BAD_FORMAT='light red',
 993         WARNING='yellow',
 994         SUPPRESS='light black',
 995     )
 996
 997     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 998         text = str(text)
 999         if test_encoding:
1000             original_text = text
1001             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1002             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1003             text = text.encode(encoding, 'ignore').decode(encoding)
1004             if fallback is not None and text != original_text:
1005                 text = fallback
1006         return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1007
1008     def _format_out(self, *args, **kwargs):
1009         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1010
1011     def _format_screen(self, *args, **kwargs):
1012         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1013
1014     def _format_err(self, *args, **kwargs):
1015         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1016
1017     def report_warning(self, message, only_once=False):
1018         '''
1019         Print the message to stderr, it will be prefixed with 'WARNING:'
1020         If stderr is a tty file the 'WARNING:' will be colored
1021         '''
1022         if self.params.get('logger') is not None:
1023             self.params['logger'].warning(message)
1024         else:
1025             if self.params.get('no_warnings'):
1026                 return
1027             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1028
1029     def deprecation_warning(self, message, *, stacklevel=0):
1030         deprecation_warning(
1031             message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1032
1033     def deprecated_feature(self, message):
1034         if self.params.get('logger') is not None:
1035             self.params['logger'].warning(f'Deprecated Feature: {message}')
1036         self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1037
1038     def report_error(self, message, *args, **kwargs):
1039         '''
1040         Do the same as trouble, but prefixes the message with 'ERROR:', colored
1041         in red if stderr is a tty file.
1042         '''
1043         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1044
1045     def write_debug(self, message, only_once=False):
1046         '''Log debug message or Print message to stderr'''
1047         if not self.params.get('verbose', False):
1048             return
1049         message = f'[debug] {message}'
1050         if self.params.get('logger'):
1051             self.params['logger'].debug(message)
1052         else:
1053             self.to_stderr(message, only_once)
1054
1055     def report_file_already_downloaded(self, file_name):
1056         """Report file has already been fully downloaded."""
1057         try:
1058             self.to_screen('[download] %s has already been downloaded' % file_name)
1059         except UnicodeEncodeError:
1060             self.to_screen('[download] The file has already been downloaded')
1061
1062     def report_file_delete(self, file_name):
1063         """Report that existing file will be deleted."""
1064         try:
1065             self.to_screen('Deleting existing file %s' % file_name)
1066         except UnicodeEncodeError:
1067             self.to_screen('Deleting existing file')
1068
1069     def raise_no_formats(self, info, forced=False, *, msg=None):
1070         has_drm = info.get('_has_drm')
1071         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1072         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1073         if forced or not ignored:
1074             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1075                                  expected=has_drm or ignored or expected)
1076         else:
1077             self.report_warning(msg)
1078
1079     def parse_outtmpl(self):
1080         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1081         self._parse_outtmpl()
1082         return self.params['outtmpl']
1083
1084     def _parse_outtmpl(self):
1085         sanitize = IDENTITY
1086         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1087             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1088
1089         outtmpl = self.params.setdefault('outtmpl', {})
1090         if not isinstance(outtmpl, dict):
1091             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1092         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1093
1094     def get_output_path(self, dir_type='', filename=None):
1095         paths = self.params.get('paths', {})
1096         assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1097         path = os.path.join(
1098             expand_path(paths.get('home', '').strip()),
1099             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1100             filename or '')
1101         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1102
1103     @staticmethod
1104     def _outtmpl_expandpath(outtmpl):
1105         # expand_path translates '%%' into '%' and '$$' into '$'
1106         # correspondingly that is not what we want since we need to keep
1107         # '%%' intact for template dict substitution step. Working around
1108         # with boundary-alike separator hack.
1109         sep = ''.join(random.choices(string.ascii_letters, k=32))
1110         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1111
1112         # outtmpl should be expand_path'ed before template dict substitution
1113         # because meta fields may contain env variables we don't want to
1114         # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1115         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1116         return expand_path(outtmpl).replace(sep, '')
1117
1118     @staticmethod
1119     def escape_outtmpl(outtmpl):
1120         ''' Escape any remaining strings like %s, %abc% etc. '''
1121         return re.sub(
1122             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1123             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1124             outtmpl)
1125
1126     @classmethod
1127     def validate_outtmpl(cls, outtmpl):
1128         ''' @return None or Exception object '''
1129         outtmpl = re.sub(
1130             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1131             lambda mobj: f'{mobj.group(0)[:-1]}s',
1132             cls._outtmpl_expandpath(outtmpl))
1133         try:
1134             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1135             return None
1136         except ValueError as err:
1137             return err
1138
1139     @staticmethod
1140     def _copy_infodict(info_dict):
1141         info_dict = dict(info_dict)
1142         info_dict.pop('__postprocessors', None)
1143         info_dict.pop('__pending_error', None)
1144         return info_dict
1145
1146     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1147         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1148         @param sanitize    Whether to sanitize the output as a filename.
1149                            For backward compatibility, a function can also be passed
1150         """
1151
1152         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1153
1154         info_dict = self._copy_infodict(info_dict)
1155         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1156             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1157             if info_dict.get('duration', None) is not None
1158             else None)
1159         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1160         info_dict['video_autonumber'] = self._num_videos
1161         if info_dict.get('resolution') is None:
1162             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1163
1164         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1165         # of %(field)s to %(field)0Nd for backward compatibility
1166         field_size_compat_map = {
1167             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1168             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1169             'autonumber': self.params.get('autonumber_size') or 5,
1170         }
1171
1172         TMPL_DICT = {}
1173         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1174         MATH_FUNCTIONS = {
1175             '+': float.__add__,
1176             '-': float.__sub__,
1177         }
1178         # Field is of the form key1.key2...
1179         # where keys (except first) can be string, int, slice or "{field, ...}"
1180         FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1181         FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1182             'inner': FIELD_INNER_RE,
1183             'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1184         }
1185         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1186         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1187         INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1188             (?P<negate>-)?
1189             (?P<fields>{FIELD_RE})
1190             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1191             (?:>(?P<strf_format>.+?))?
1192             (?P<remaining>
1193                 (?P<alternate>(?<!\\),[^|&)]+)?
1194                 (?:&(?P<replacement>.*?))?
1195                 (?:\|(?P<default>.*?))?
1196             )$''')
1197
1198         def _traverse_infodict(fields):
1199             fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1200                       for f in ([x] if x.startswith('{') else x.split('.'))]
1201             for i in (0, -1):
1202                 if fields and not fields[i]:
1203                     fields.pop(i)
1204
1205             for i, f in enumerate(fields):
1206                 if not f.startswith('{'):
1207                     continue
1208                 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1209                 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1210
1211             return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1212
1213         def get_value(mdict):
1214             # Object traversal
1215             value = _traverse_infodict(mdict['fields'])
1216             # Negative
1217             if mdict['negate']:
1218                 value = float_or_none(value)
1219                 if value is not None:
1220                     value *= -1
1221             # Do maths
1222             offset_key = mdict['maths']
1223             if offset_key:
1224                 value = float_or_none(value)
1225                 operator = None
1226                 while offset_key:
1227                     item = re.match(
1228                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1229                         offset_key).group(0)
1230                     offset_key = offset_key[len(item):]
1231                     if operator is None:
1232                         operator = MATH_FUNCTIONS[item]
1233                         continue
1234                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1235                     offset = float_or_none(item)
1236                     if offset is None:
1237                         offset = float_or_none(_traverse_infodict(item))
1238                     try:
1239                         value = operator(value, multiplier * offset)
1240                     except (TypeError, ZeroDivisionError):
1241                         return None
1242                     operator = None
1243             # Datetime formatting
1244             if mdict['strf_format']:
1245                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1246
1247             # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1248             if sanitize and value == '':
1249                 value = None
1250             return value
1251
1252         na = self.params.get('outtmpl_na_placeholder', 'NA')
1253
1254         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1255             return sanitize_filename(str(value), restricted=restricted, is_id=(
1256                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1257                 if 'filename-sanitization' in self.params['compat_opts']
1258                 else NO_DEFAULT))
1259
1260         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1261         sanitize = bool(sanitize)
1262
1263         def _dumpjson_default(obj):
1264             if isinstance(obj, (set, LazyList)):
1265                 return list(obj)
1266             return repr(obj)
1267
1268         class _ReplacementFormatter(string.Formatter):
1269             def get_field(self, field_name, args, kwargs):
1270                 if field_name.isdigit():
1271                     return args[0], -1
1272                 raise ValueError('Unsupported field')
1273
1274         replacement_formatter = _ReplacementFormatter()
1275
1276         def create_key(outer_mobj):
1277             if not outer_mobj.group('has_key'):
1278                 return outer_mobj.group(0)
1279             key = outer_mobj.group('key')
1280             mobj = re.match(INTERNAL_FORMAT_RE, key)
1281             value, replacement, default, last_field = None, None, na, ''
1282             while mobj:
1283                 mobj = mobj.groupdict()
1284                 default = mobj['default'] if mobj['default'] is not None else default
1285                 value = get_value(mobj)
1286                 last_field, replacement = mobj['fields'], mobj['replacement']
1287                 if value is None and mobj['alternate']:
1288                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1289                 else:
1290                     break
1291
1292             fmt = outer_mobj.group('format')
1293             if fmt == 's' and value is not None and last_field in field_size_compat_map.keys():
1294                 fmt = f'0{field_size_compat_map[last_field]:d}d'
1295
1296             if None not in (value, replacement):
1297                 try:
1298                     value = replacement_formatter.format(replacement, value)
1299                 except ValueError:
1300                     value, default = None, na
1301
1302             flags = outer_mobj.group('conversion') or ''
1303             str_fmt = f'{fmt[:-1]}s'
1304             if value is None:
1305                 value, fmt = default, 's'
1306             elif fmt[-1] == 'l':  # list
1307                 delim = '\n' if '#' in flags else ', '
1308                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1309             elif fmt[-1] == 'j':  # json
1310                 value, fmt = json.dumps(
1311                     value, default=_dumpjson_default,
1312                     indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1313             elif fmt[-1] == 'h':  # html
1314                 value, fmt = escapeHTML(str(value)), str_fmt
1315             elif fmt[-1] == 'q':  # quoted
1316                 value = map(str, variadic(value) if '#' in flags else [value])
1317                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1318             elif fmt[-1] == 'B':  # bytes
1319                 value = f'%{str_fmt}'.encode() % str(value).encode()
1320                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1321             elif fmt[-1] == 'U':  # unicode normalized
1322                 value, fmt = unicodedata.normalize(
1323                     # "+" = compatibility equivalence, "#" = NFD
1324                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1325                     value), str_fmt
1326             elif fmt[-1] == 'D':  # decimal suffix
1327                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1328                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1329                                               factor=1024 if '#' in flags else 1000)
1330             elif fmt[-1] == 'S':  # filename sanitization
1331                 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1332             elif fmt[-1] == 'c':
1333                 if value:
1334                     value = str(value)[0]
1335                 else:
1336                     fmt = str_fmt
1337             elif fmt[-1] not in 'rsa':  # numeric
1338                 value = float_or_none(value)
1339                 if value is None:
1340                     value, fmt = default, 's'
1341
1342             if sanitize:
1343                 # If value is an object, sanitize might convert it to a string
1344                 # So we convert it to repr first
1345                 if fmt[-1] == 'r':
1346                     value, fmt = repr(value), str_fmt
1347                 elif fmt[-1] == 'a':
1348                     value, fmt = ascii(value), str_fmt
1349                 if fmt[-1] in 'csra':
1350                     value = sanitizer(last_field, value)
1351
1352             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1353             TMPL_DICT[key] = value
1354             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1355
1356         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1357
1358     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1359         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1360         return self.escape_outtmpl(outtmpl) % info_dict
1361
1362     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1363         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1364         if outtmpl is None:
1365             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1366         try:
1367             outtmpl = self._outtmpl_expandpath(outtmpl)
1368             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1369             if not filename:
1370                 return None
1371
1372             if tmpl_type in ('', 'temp'):
1373                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1374                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1375                     filename = replace_extension(filename, ext, final_ext)
1376             elif tmpl_type:
1377                 force_ext = OUTTMPL_TYPES[tmpl_type]
1378                 if force_ext:
1379                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1380
1381             # https://github.com/blackjack4494/youtube-dlc/issues/85
1382             trim_file_name = self.params.get('trim_file_name', False)
1383             if trim_file_name:
1384                 no_ext, *ext = filename.rsplit('.', 2)
1385                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1386
1387             return filename
1388         except ValueError as err:
1389             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1390             return None
1391
1392     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1393         """Generate the output filename"""
1394         if outtmpl:
1395             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1396             dir_type = None
1397         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1398         if not filename and dir_type not in ('', 'temp'):
1399             return ''
1400
1401         if warn:
1402             if not self.params.get('paths'):
1403                 pass
1404             elif filename == '-':
1405                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1406             elif os.path.isabs(filename):
1407                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1408         if filename == '-' or not filename:
1409             return filename
1410
1411         return self.get_output_path(dir_type, filename)
1412
1413     def _match_entry(self, info_dict, incomplete=False, silent=False):
1414         """Returns None if the file should be downloaded"""
1415         _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1416         assert incomplete or _type == 'video', 'Only video result can be considered complete'
1417
1418         video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1419
1420         def check_filter():
1421             if _type in ('playlist', 'multi_video'):
1422                 return
1423             elif _type in ('url', 'url_transparent') and not try_call(
1424                     lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1425                 return
1426
1427             if 'title' in info_dict:
1428                 # This can happen when we're just evaluating the playlist
1429                 title = info_dict['title']
1430                 matchtitle = self.params.get('matchtitle', False)
1431                 if matchtitle:
1432                     if not re.search(matchtitle, title, re.IGNORECASE):
1433                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1434                 rejecttitle = self.params.get('rejecttitle', False)
1435                 if rejecttitle:
1436                     if re.search(rejecttitle, title, re.IGNORECASE):
1437                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1438
1439             date = info_dict.get('upload_date')
1440             if date is not None:
1441                 dateRange = self.params.get('daterange', DateRange())
1442                 if date not in dateRange:
1443                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1444             view_count = info_dict.get('view_count')
1445             if view_count is not None:
1446                 min_views = self.params.get('min_views')
1447                 if min_views is not None and view_count < min_views:
1448                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1449                 max_views = self.params.get('max_views')
1450                 if max_views is not None and view_count > max_views:
1451                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1452             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1453                 return 'Skipping "%s" because it is age restricted' % video_title
1454
1455             match_filter = self.params.get('match_filter')
1456             if match_filter is None:
1457                 return None
1458
1459             cancelled = None
1460             try:
1461                 try:
1462                     ret = match_filter(info_dict, incomplete=incomplete)
1463                 except TypeError:
1464                     # For backward compatibility
1465                     ret = None if incomplete else match_filter(info_dict)
1466             except DownloadCancelled as err:
1467                 if err.msg is not NO_DEFAULT:
1468                     raise
1469                 ret, cancelled = err.msg, err
1470
1471             if ret is NO_DEFAULT:
1472                 while True:
1473                     filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1474                     reply = input(self._format_screen(
1475                         f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1476                     if reply in {'y', ''}:
1477                         return None
1478                     elif reply == 'n':
1479                         if cancelled:
1480                             raise type(cancelled)(f'Skipping {video_title}')
1481                         return f'Skipping {video_title}'
1482             return ret
1483
1484         if self.in_download_archive(info_dict):
1485             reason = '%s has already been recorded in the archive' % video_title
1486             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1487         else:
1488             try:
1489                 reason = check_filter()
1490             except DownloadCancelled as e:
1491                 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1492             else:
1493                 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1494         if reason is not None:
1495             if not silent:
1496                 self.to_screen('[download] ' + reason)
1497             if self.params.get(break_opt, False):
1498                 raise break_err()
1499         return reason
1500
1501     @staticmethod
1502     def add_extra_info(info_dict, extra_info):
1503         '''Set the keys from extra_info in info dict if they are missing'''
1504         for key, value in extra_info.items():
1505             info_dict.setdefault(key, value)
1506
1507     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1508                      process=True, force_generic_extractor=False):
1509         """
1510         Extract and return the information dictionary of the URL
1511
1512         Arguments:
1513         @param url          URL to extract
1514
1515         Keyword arguments:
1516         @param download     Whether to download videos
1517         @param process      Whether to resolve all unresolved references (URLs, playlist items).
1518                             Must be True for download to work
1519         @param ie_key       Use only the extractor with this key
1520
1521         @param extra_info   Dictionary containing the extra values to add to the info (For internal use only)
1522         @force_generic_extractor  Force using the generic extractor (Deprecated; use ie_key='Generic')
1523         """
1524
1525         if extra_info is None:
1526             extra_info = {}
1527
1528         if not ie_key and force_generic_extractor:
1529             ie_key = 'Generic'
1530
1531         if ie_key:
1532             ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1533         else:
1534             ies = self._ies
1535
1536         for key, ie in ies.items():
1537             if not ie.suitable(url):
1538                 continue
1539
1540             if not ie.working():
1541                 self.report_warning('The program functionality for this site has been marked as broken, '
1542                                     'and will probably not work.')
1543
1544             temp_id = ie.get_temp_id(url)
1545             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1546                 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
1547                 if self.params.get('break_on_existing', False):
1548                     raise ExistingVideoReached()
1549                 break
1550             return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1551         else:
1552             extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1553             self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1554                               tb=False if extractors_restricted else None)
1555
1556     def _handle_extraction_exceptions(func):
1557         @functools.wraps(func)
1558         def wrapper(self, *args, **kwargs):
1559             while True:
1560                 try:
1561                     return func(self, *args, **kwargs)
1562                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1563                     raise
1564                 except ReExtractInfo as e:
1565                     if e.expected:
1566                         self.to_screen(f'{e}; Re-extracting data')
1567                     else:
1568                         self.to_stderr('\r')
1569                         self.report_warning(f'{e}; Re-extracting data')
1570                     continue
1571                 except GeoRestrictedError as e:
1572                     msg = e.msg
1573                     if e.countries:
1574                         msg += '\nThis video is available in %s.' % ', '.join(
1575                             map(ISO3166Utils.short2full, e.countries))
1576                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1577                     self.report_error(msg)
1578                 except ExtractorError as e:  # An error we somewhat expected
1579                     self.report_error(str(e), e.format_traceback())
1580                 except Exception as e:
1581                     if self.params.get('ignoreerrors'):
1582                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1583                     else:
1584                         raise
1585                 break
1586         return wrapper
1587
1588     def _wait_for_video(self, ie_result={}):
1589         if (not self.params.get('wait_for_video')
1590                 or ie_result.get('_type', 'video') != 'video'
1591                 or ie_result.get('formats') or ie_result.get('url')):
1592             return
1593
1594         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1595         last_msg = ''
1596
1597         def progress(msg):
1598             nonlocal last_msg
1599             full_msg = f'{msg}\n'
1600             if not self.params.get('noprogress'):
1601                 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1602             elif last_msg:
1603                 return
1604             self.to_screen(full_msg, skip_eol=True)
1605             last_msg = msg
1606
1607         min_wait, max_wait = self.params.get('wait_for_video')
1608         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1609         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1610             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1611             self.report_warning('Release time of video is not known')
1612         elif ie_result and (diff or 0) <= 0:
1613             self.report_warning('Video should already be available according to extracted info')
1614         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1615         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1616
1617         wait_till = time.time() + diff
1618         try:
1619             while True:
1620                 diff = wait_till - time.time()
1621                 if diff <= 0:
1622                     progress('')
1623                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1624                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1625                 time.sleep(1)
1626         except KeyboardInterrupt:
1627             progress('')
1628             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1629         except BaseException as e:
1630             if not isinstance(e, ReExtractInfo):
1631                 self.to_screen('')
1632             raise
1633
1634     def _load_cookies(self, data, *, from_headers=True):
1635         """Loads cookies from a `Cookie` header
1636
1637         This tries to work around the security vulnerability of passing cookies to every domain.
1638         See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1639         The unscoped cookies are saved for later to be stored in the jar with a limited scope.
1640
1641         @param data         The Cookie header as string to load the cookies from
1642         @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
1643         """
1644         for cookie in LenientSimpleCookie(data).values():
1645             if from_headers and any(cookie.values()):
1646                 raise ValueError('Invalid syntax in Cookie Header')
1647
1648             domain = cookie.get('domain') or ''
1649             expiry = cookie.get('expires')
1650             if expiry == '':  # 0 is valid
1651                 expiry = None
1652             prepared_cookie = http.cookiejar.Cookie(
1653                 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1654                 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1655                 cookie.get('secure') or False, expiry, False, None, None, {})
1656
1657             if domain:
1658                 self.cookiejar.set_cookie(prepared_cookie)
1659             elif from_headers:
1660                 self.deprecated_feature(
1661                     'Passing cookies as a header is a potential security risk; '
1662                     'they will be scoped to the domain of the downloaded urls. '
1663                     'Please consider loading cookies from a file or browser instead.')
1664                 self.__header_cookies.append(prepared_cookie)
1665             else:
1666                 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1667                                   tb=False, is_error=False)
1668
1669     def _apply_header_cookies(self, url):
1670         """Applies stray header cookies to the provided url
1671
1672         This loads header cookies and scopes them to the domain provided in `url`.
1673         While this is not ideal, it helps reduce the risk of them being sent
1674         to an unintended destination while mostly maintaining compatibility.
1675         """
1676         parsed = urllib.parse.urlparse(url)
1677         if not parsed.hostname:
1678             return
1679
1680         for cookie in map(copy.copy, self.__header_cookies):
1681             cookie.domain = f'.{parsed.hostname}'
1682             self.cookiejar.set_cookie(cookie)
1683
1684     @_handle_extraction_exceptions
1685     def __extract_info(self, url, ie, download, extra_info, process):
1686         self._apply_header_cookies(url)
1687
1688         try:
1689             ie_result = ie.extract(url)
1690         except UserNotLive as e:
1691             if process:
1692                 if self.params.get('wait_for_video'):
1693                     self.report_warning(e)
1694                 self._wait_for_video()
1695             raise
1696         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1697             self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1698             return
1699         if isinstance(ie_result, list):
1700             # Backwards compatibility: old IE result format
1701             ie_result = {
1702                 '_type': 'compat_list',
1703                 'entries': ie_result,
1704             }
1705         if extra_info.get('original_url'):
1706             ie_result.setdefault('original_url', extra_info['original_url'])
1707         self.add_default_extra_info(ie_result, ie, url)
1708         if process:
1709             self._wait_for_video(ie_result)
1710             return self.process_ie_result(ie_result, download, extra_info)
1711         else:
1712             return ie_result
1713
1714     def add_default_extra_info(self, ie_result, ie, url):
1715         if url is not None:
1716             self.add_extra_info(ie_result, {
1717                 'webpage_url': url,
1718                 'original_url': url,
1719             })
1720         webpage_url = ie_result.get('webpage_url')
1721         if webpage_url:
1722             self.add_extra_info(ie_result, {
1723                 'webpage_url_basename': url_basename(webpage_url),
1724                 'webpage_url_domain': get_domain(webpage_url),
1725             })
1726         if ie is not None:
1727             self.add_extra_info(ie_result, {
1728                 'extractor': ie.IE_NAME,
1729                 'extractor_key': ie.ie_key(),
1730             })
1731
1732     def process_ie_result(self, ie_result, download=True, extra_info=None):
1733         """
1734         Take the result of the ie(may be modified) and resolve all unresolved
1735         references (URLs, playlist items).
1736
1737         It will also download the videos if 'download'.
1738         Returns the resolved ie_result.
1739         """
1740         if extra_info is None:
1741             extra_info = {}
1742         result_type = ie_result.get('_type', 'video')
1743
1744         if result_type in ('url', 'url_transparent'):
1745             ie_result['url'] = sanitize_url(
1746                 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1747             if ie_result.get('original_url') and not extra_info.get('original_url'):
1748                 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1749
1750             extract_flat = self.params.get('extract_flat', False)
1751             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1752                     or extract_flat is True):
1753                 info_copy = ie_result.copy()
1754                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1755                 if ie and not ie_result.get('id'):
1756                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1757                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1758                 self.add_extra_info(info_copy, extra_info)
1759                 info_copy, _ = self.pre_process(info_copy)
1760                 self._fill_common_fields(info_copy, False)
1761                 self.__forced_printings(info_copy)
1762                 self._raise_pending_errors(info_copy)
1763                 if self.params.get('force_write_download_archive', False):
1764                     self.record_download_archive(info_copy)
1765                 return ie_result
1766
1767         if result_type == 'video':
1768             self.add_extra_info(ie_result, extra_info)
1769             ie_result = self.process_video_result(ie_result, download=download)
1770             self._raise_pending_errors(ie_result)
1771             additional_urls = (ie_result or {}).get('additional_urls')
1772             if additional_urls:
1773                 # TODO: Improve MetadataParserPP to allow setting a list
1774                 if isinstance(additional_urls, str):
1775                     additional_urls = [additional_urls]
1776                 self.to_screen(
1777                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1778                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1779                 ie_result['additional_entries'] = [
1780                     self.extract_info(
1781                         url, download, extra_info=extra_info,
1782                         force_generic_extractor=self.params.get('force_generic_extractor'))
1783                     for url in additional_urls
1784                 ]
1785             return ie_result
1786         elif result_type == 'url':
1787             # We have to add extra_info to the results because it may be
1788             # contained in a playlist
1789             return self.extract_info(
1790                 ie_result['url'], download,
1791                 ie_key=ie_result.get('ie_key'),
1792                 extra_info=extra_info)
1793         elif result_type == 'url_transparent':
1794             # Use the information from the embedding page
1795             info = self.extract_info(
1796                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1797                 extra_info=extra_info, download=False, process=False)
1798
1799             # extract_info may return None when ignoreerrors is enabled and
1800             # extraction failed with an error, don't crash and return early
1801             # in this case
1802             if not info:
1803                 return info
1804
1805             exempted_fields = {'_type', 'url', 'ie_key'}
1806             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1807                 # For video clips, the id etc of the clip extractor should be used
1808                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1809
1810             new_result = info.copy()
1811             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1812
1813             # Extracted info may not be a video result (i.e.
1814             # info.get('_type', 'video') != video) but rather an url or
1815             # url_transparent. In such cases outer metadata (from ie_result)
1816             # should be propagated to inner one (info). For this to happen
1817             # _type of info should be overridden with url_transparent. This
1818             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1819             if new_result.get('_type') == 'url':
1820                 new_result['_type'] = 'url_transparent'
1821
1822             return self.process_ie_result(
1823                 new_result, download=download, extra_info=extra_info)
1824         elif result_type in ('playlist', 'multi_video'):
1825             # Protect from infinite recursion due to recursively nested playlists
1826             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1827             webpage_url = ie_result.get('webpage_url')  # Playlists maynot have webpage_url
1828             if webpage_url and webpage_url in self._playlist_urls:
1829                 self.to_screen(
1830                     '[download] Skipping already downloaded playlist: %s'
1831                     % ie_result.get('title') or ie_result.get('id'))
1832                 return
1833
1834             self._playlist_level += 1
1835             self._playlist_urls.add(webpage_url)
1836             self._fill_common_fields(ie_result, False)
1837             self._sanitize_thumbnails(ie_result)
1838             try:
1839                 return self.__process_playlist(ie_result, download)
1840             finally:
1841                 self._playlist_level -= 1
1842                 if not self._playlist_level:
1843                     self._playlist_urls.clear()
1844         elif result_type == 'compat_list':
1845             self.report_warning(
1846                 'Extractor %s returned a compat_list result. '
1847                 'It needs to be updated.' % ie_result.get('extractor'))
1848
1849             def _fixup(r):
1850                 self.add_extra_info(r, {
1851                     'extractor': ie_result['extractor'],
1852                     'webpage_url': ie_result['webpage_url'],
1853                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1854                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1855                     'extractor_key': ie_result['extractor_key'],
1856                 })
1857                 return r
1858             ie_result['entries'] = [
1859                 self.process_ie_result(_fixup(r), download, extra_info)
1860                 for r in ie_result['entries']
1861             ]
1862             return ie_result
1863         else:
1864             raise Exception('Invalid result type: %s' % result_type)
1865
1866     def _ensure_dir_exists(self, path):
1867         return make_dir(path, self.report_error)
1868
1869     @staticmethod
1870     def _playlist_infodict(ie_result, strict=False, **kwargs):
1871         info = {
1872             'playlist_count': ie_result.get('playlist_count'),
1873             'playlist': ie_result.get('title') or ie_result.get('id'),
1874             'playlist_id': ie_result.get('id'),
1875             'playlist_title': ie_result.get('title'),
1876             'playlist_uploader': ie_result.get('uploader'),
1877             'playlist_uploader_id': ie_result.get('uploader_id'),
1878             **kwargs,
1879         }
1880         if strict:
1881             return info
1882         if ie_result.get('webpage_url'):
1883             info.update({
1884                 'webpage_url': ie_result['webpage_url'],
1885                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1886                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1887             })
1888         return {
1889             **info,
1890             'playlist_index': 0,
1891             '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1892             'extractor': ie_result['extractor'],
1893             'extractor_key': ie_result['extractor_key'],
1894         }
1895
1896     def __process_playlist(self, ie_result, download):
1897         """Process each entry in the playlist"""
1898         assert ie_result['_type'] in ('playlist', 'multi_video')
1899
1900         common_info = self._playlist_infodict(ie_result, strict=True)
1901         title = common_info.get('playlist') or '<Untitled>'
1902         if self._match_entry(common_info, incomplete=True) is not None:
1903             return
1904         self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1905
1906         all_entries = PlaylistEntries(self, ie_result)
1907         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1908
1909         lazy = self.params.get('lazy_playlist')
1910         if lazy:
1911             resolved_entries, n_entries = [], 'N/A'
1912             ie_result['requested_entries'], ie_result['entries'] = None, None
1913         else:
1914             entries = resolved_entries = list(entries)
1915             n_entries = len(resolved_entries)
1916             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1917         if not ie_result.get('playlist_count'):
1918             # Better to do this after potentially exhausting entries
1919             ie_result['playlist_count'] = all_entries.get_full_count()
1920
1921         extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1922         ie_copy = collections.ChainMap(ie_result, extra)
1923
1924         _infojson_written = False
1925         write_playlist_files = self.params.get('allow_playlist_files', True)
1926         if write_playlist_files and self.params.get('list_thumbnails'):
1927             self.list_thumbnails(ie_result)
1928         if write_playlist_files and not self.params.get('simulate'):
1929             _infojson_written = self._write_info_json(
1930                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1931             if _infojson_written is None:
1932                 return
1933             if self._write_description('playlist', ie_result,
1934                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1935                 return
1936             # TODO: This should be passed to ThumbnailsConvertor if necessary
1937             self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1938
1939         if lazy:
1940             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1941                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1942         elif self.params.get('playlistreverse'):
1943             entries.reverse()
1944         elif self.params.get('playlistrandom'):
1945             random.shuffle(entries)
1946
1947         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1948                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1949
1950         keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1951         if self.params.get('extract_flat') == 'discard_in_playlist':
1952             keep_resolved_entries = ie_result['_type'] != 'playlist'
1953         if keep_resolved_entries:
1954             self.write_debug('The information of all playlist entries will be held in memory')
1955
1956         failures = 0
1957         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1958         for i, (playlist_index, entry) in enumerate(entries):
1959             if lazy:
1960                 resolved_entries.append((playlist_index, entry))
1961             if not entry:
1962                 continue
1963
1964             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1965             if not lazy and 'playlist-index' in self.params['compat_opts']:
1966                 playlist_index = ie_result['requested_entries'][i]
1967
1968             entry_copy = collections.ChainMap(entry, {
1969                 **common_info,
1970                 'n_entries': int_or_none(n_entries),
1971                 'playlist_index': playlist_index,
1972                 'playlist_autonumber': i + 1,
1973             })
1974
1975             if self._match_entry(entry_copy, incomplete=True) is not None:
1976                 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1977                 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1978                 continue
1979
1980             self.to_screen('[download] Downloading item %s of %s' % (
1981                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1982
1983             entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
1984                 'playlist_index': playlist_index,
1985                 'playlist_autonumber': i + 1,
1986             }, extra))
1987             if not entry_result:
1988                 failures += 1
1989             if failures >= max_failures:
1990                 self.report_error(
1991                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1992                 break
1993             if keep_resolved_entries:
1994                 resolved_entries[i] = (playlist_index, entry_result)
1995
1996         # Update with processed data
1997         ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
1998         ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1999         if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2000             # Do not set for full playlist
2001             ie_result.pop('requested_entries')
2002
2003         # Write the updated info to json
2004         if _infojson_written is True and self._write_info_json(
2005                 'updated playlist', ie_result,
2006                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2007             return
2008
2009         ie_result = self.run_all_pps('playlist', ie_result)
2010         self.to_screen(f'[download] Finished downloading playlist: {title}')
2011         return ie_result
2012
2013     @_handle_extraction_exceptions
2014     def __process_iterable_entry(self, entry, download, extra_info):
2015         return self.process_ie_result(
2016             entry, download=download, extra_info=extra_info)
2017
2018     def _build_format_filter(self, filter_spec):
2019         " Returns a function to filter the formats according to the filter_spec "
2020
2021         OPERATORS = {
2022             '<': operator.lt,
2023             '<=': operator.le,
2024             '>': operator.gt,
2025             '>=': operator.ge,
2026             '=': operator.eq,
2027             '!=': operator.ne,
2028         }
2029         operator_rex = re.compile(r'''(?x)\s*
2030             (?P<key>[\w.-]+)\s*
2031             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2032             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2033             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2034         m = operator_rex.fullmatch(filter_spec)
2035         if m:
2036             try:
2037                 comparison_value = int(m.group('value'))
2038             except ValueError:
2039                 comparison_value = parse_filesize(m.group('value'))
2040                 if comparison_value is None:
2041                     comparison_value = parse_filesize(m.group('value') + 'B')
2042                 if comparison_value is None:
2043                     raise ValueError(
2044                         'Invalid value %r in format specification %r' % (
2045                             m.group('value'), filter_spec))
2046             op = OPERATORS[m.group('op')]
2047
2048         if not m:
2049             STR_OPERATORS = {
2050                 '=': operator.eq,
2051                 '^=': lambda attr, value: attr.startswith(value),
2052                 '$=': lambda attr, value: attr.endswith(value),
2053                 '*=': lambda attr, value: value in attr,
2054                 '~=': lambda attr, value: value.search(attr) is not None
2055             }
2056             str_operator_rex = re.compile(r'''(?x)\s*
2057                 (?P<key>[a-zA-Z0-9._-]+)\s*
2058                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2059                 (?P<quote>["'])?
2060                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2061                 (?(quote)(?P=quote))\s*
2062                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2063             m = str_operator_rex.fullmatch(filter_spec)
2064             if m:
2065                 if m.group('op') == '~=':
2066                     comparison_value = re.compile(m.group('value'))
2067                 else:
2068                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2069                 str_op = STR_OPERATORS[m.group('op')]
2070                 if m.group('negation'):
2071                     op = lambda attr, value: not str_op(attr, value)
2072                 else:
2073                     op = str_op
2074
2075         if not m:
2076             raise SyntaxError('Invalid filter specification %r' % filter_spec)
2077
2078         def _filter(f):
2079             actual_value = f.get(m.group('key'))
2080             if actual_value is None:
2081                 return m.group('none_inclusive')
2082             return op(actual_value, comparison_value)
2083         return _filter
2084
2085     def _check_formats(self, formats):
2086         for f in formats:
2087             self.to_screen('[info] Testing format %s' % f['format_id'])
2088             path = self.get_output_path('temp')
2089             if not self._ensure_dir_exists(f'{path}/'):
2090                 continue
2091             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2092             temp_file.close()
2093             try:
2094                 success, _ = self.dl(temp_file.name, f, test=True)
2095             except (DownloadError, OSError, ValueError) + network_exceptions:
2096                 success = False
2097             finally:
2098                 if os.path.exists(temp_file.name):
2099                     try:
2100                         os.remove(temp_file.name)
2101                     except OSError:
2102                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2103             if success:
2104                 yield f
2105             else:
2106                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2107
2108     def _default_format_spec(self, info_dict, download=True):
2109
2110         def can_merge():
2111             merger = FFmpegMergerPP(self)
2112             return merger.available and merger.can_merge()
2113
2114         prefer_best = (
2115             not self.params.get('simulate')
2116             and download
2117             and (
2118                 not can_merge()
2119                 or info_dict.get('is_live') and not self.params.get('live_from_start')
2120                 or self.params['outtmpl']['default'] == '-'))
2121         compat = (
2122             prefer_best
2123             or self.params.get('allow_multiple_audio_streams', False)
2124             or 'format-spec' in self.params['compat_opts'])
2125
2126         return (
2127             'best/bestvideo+bestaudio' if prefer_best
2128             else 'bestvideo*+bestaudio/best' if not compat
2129             else 'bestvideo+bestaudio/best')
2130
2131     def build_format_selector(self, format_spec):
2132         def syntax_error(note, start):
2133             message = (
2134                 'Invalid format specification: '
2135                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2136             return SyntaxError(message)
2137
2138         PICKFIRST = 'PICKFIRST'
2139         MERGE = 'MERGE'
2140         SINGLE = 'SINGLE'
2141         GROUP = 'GROUP'
2142         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2143
2144         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2145                                   'video': self.params.get('allow_multiple_video_streams', False)}
2146
2147         def _parse_filter(tokens):
2148             filter_parts = []
2149             for type, string_, start, _, _ in tokens:
2150                 if type == tokenize.OP and string_ == ']':
2151                     return ''.join(filter_parts)
2152                 else:
2153                     filter_parts.append(string_)
2154
2155         def _remove_unused_ops(tokens):
2156             # Remove operators that we don't use and join them with the surrounding strings.
2157             # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2158             ALLOWED_OPS = ('/', '+', ',', '(', ')')
2159             last_string, last_start, last_end, last_line = None, None, None, None
2160             for type, string_, start, end, line in tokens:
2161                 if type == tokenize.OP and string_ == '[':
2162                     if last_string:
2163                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2164                         last_string = None
2165                     yield type, string_, start, end, line
2166                     # everything inside brackets will be handled by _parse_filter
2167                     for type, string_, start, end, line in tokens:
2168                         yield type, string_, start, end, line
2169                         if type == tokenize.OP and string_ == ']':
2170                             break
2171                 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2172                     if last_string:
2173                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2174                         last_string = None
2175                     yield type, string_, start, end, line
2176                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2177                     if not last_string:
2178                         last_string = string_
2179                         last_start = start
2180                         last_end = end
2181                     else:
2182                         last_string += string_
2183             if last_string:
2184                 yield tokenize.NAME, last_string, last_start, last_end, last_line
2185
2186         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2187             selectors = []
2188             current_selector = None
2189             for type, string_, start, _, _ in tokens:
2190                 # ENCODING is only defined in python 3.x
2191                 if type == getattr(tokenize, 'ENCODING', None):
2192                     continue
2193                 elif type in [tokenize.NAME, tokenize.NUMBER]:
2194                     current_selector = FormatSelector(SINGLE, string_, [])
2195                 elif type == tokenize.OP:
2196                     if string_ == ')':
2197                         if not inside_group:
2198                             # ')' will be handled by the parentheses group
2199                             tokens.restore_last_token()
2200                         break
2201                     elif inside_merge and string_ in ['/', ',']:
2202                         tokens.restore_last_token()
2203                         break
2204                     elif inside_choice and string_ == ',':
2205                         tokens.restore_last_token()
2206                         break
2207                     elif string_ == ',':
2208                         if not current_selector:
2209                             raise syntax_error('"," must follow a format selector', start)
2210                         selectors.append(current_selector)
2211                         current_selector = None
2212                     elif string_ == '/':
2213                         if not current_selector:
2214                             raise syntax_error('"/" must follow a format selector', start)
2215                         first_choice = current_selector
2216                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2217                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2218                     elif string_ == '[':
2219                         if not current_selector:
2220                             current_selector = FormatSelector(SINGLE, 'best', [])
2221                         format_filter = _parse_filter(tokens)
2222                         current_selector.filters.append(format_filter)
2223                     elif string_ == '(':
2224                         if current_selector:
2225                             raise syntax_error('Unexpected "("', start)
2226                         group = _parse_format_selection(tokens, inside_group=True)
2227                         current_selector = FormatSelector(GROUP, group, [])
2228                     elif string_ == '+':
2229                         if not current_selector:
2230                             raise syntax_error('Unexpected "+"', start)
2231                         selector_1 = current_selector
2232                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2233                         if not selector_2:
2234                             raise syntax_error('Expected a selector', start)
2235                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2236                     else:
2237                         raise syntax_error(f'Operator not recognized: "{string_}"', start)
2238                 elif type == tokenize.ENDMARKER:
2239                     break
2240             if current_selector:
2241                 selectors.append(current_selector)
2242             return selectors
2243
2244         def _merge(formats_pair):
2245             format_1, format_2 = formats_pair
2246
2247             formats_info = []
2248             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2249             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2250
2251             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2252                 get_no_more = {'video': False, 'audio': False}
2253                 for (i, fmt_info) in enumerate(formats_info):
2254                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2255                         formats_info.pop(i)
2256                         continue
2257                     for aud_vid in ['audio', 'video']:
2258                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2259                             if get_no_more[aud_vid]:
2260                                 formats_info.pop(i)
2261                                 break
2262                             get_no_more[aud_vid] = True
2263
2264             if len(formats_info) == 1:
2265                 return formats_info[0]
2266
2267             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2268             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2269
2270             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2271             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2272
2273             output_ext = get_compatible_ext(
2274                 vcodecs=[f.get('vcodec') for f in video_fmts],
2275                 acodecs=[f.get('acodec') for f in audio_fmts],
2276                 vexts=[f['ext'] for f in video_fmts],
2277                 aexts=[f['ext'] for f in audio_fmts],
2278                 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2279                              or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2280
2281             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2282
2283             new_dict = {
2284                 'requested_formats': formats_info,
2285                 'format': '+'.join(filtered('format')),
2286                 'format_id': '+'.join(filtered('format_id')),
2287                 'ext': output_ext,
2288                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2289                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2290                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2291                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2292                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2293             }
2294
2295             if the_only_video:
2296                 new_dict.update({
2297                     'width': the_only_video.get('width'),
2298                     'height': the_only_video.get('height'),
2299                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2300                     'fps': the_only_video.get('fps'),
2301                     'dynamic_range': the_only_video.get('dynamic_range'),
2302                     'vcodec': the_only_video.get('vcodec'),
2303                     'vbr': the_only_video.get('vbr'),
2304                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2305                     'aspect_ratio': the_only_video.get('aspect_ratio'),
2306                 })
2307
2308             if the_only_audio:
2309                 new_dict.update({
2310                     'acodec': the_only_audio.get('acodec'),
2311                     'abr': the_only_audio.get('abr'),
2312                     'asr': the_only_audio.get('asr'),
2313                     'audio_channels': the_only_audio.get('audio_channels')
2314                 })
2315
2316             return new_dict
2317
2318         def _check_formats(formats):
2319             if (self.params.get('check_formats') is not None
2320                     or self.params.get('allow_unplayable_formats')):
2321                 yield from formats
2322                 return
2323             elif self.params.get('check_formats') == 'selected':
2324                 yield from self._check_formats(formats)
2325                 return
2326
2327             for f in formats:
2328                 if f.get('has_drm'):
2329                     yield from self._check_formats([f])
2330                 else:
2331                     yield f
2332
2333         def _build_selector_function(selector):
2334             if isinstance(selector, list):  # ,
2335                 fs = [_build_selector_function(s) for s in selector]
2336
2337                 def selector_function(ctx):
2338                     for f in fs:
2339                         yield from f(ctx)
2340                 return selector_function
2341
2342             elif selector.type == GROUP:  # ()
2343                 selector_function = _build_selector_function(selector.selector)
2344
2345             elif selector.type == PICKFIRST:  # /
2346                 fs = [_build_selector_function(s) for s in selector.selector]
2347
2348                 def selector_function(ctx):
2349                     for f in fs:
2350                         picked_formats = list(f(ctx))
2351                         if picked_formats:
2352                             return picked_formats
2353                     return []
2354
2355             elif selector.type == MERGE:  # +
2356                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2357
2358                 def selector_function(ctx):
2359                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2360                         yield _merge(pair)
2361
2362             elif selector.type == SINGLE:  # atom
2363                 format_spec = selector.selector or 'best'
2364
2365                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2366                 if format_spec == 'all':
2367                     def selector_function(ctx):
2368                         yield from _check_formats(ctx['formats'][::-1])
2369                 elif format_spec == 'mergeall':
2370                     def selector_function(ctx):
2371                         formats = list(_check_formats(
2372                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2373                         if not formats:
2374                             return
2375                         merged_format = formats[-1]
2376                         for f in formats[-2::-1]:
2377                             merged_format = _merge((merged_format, f))
2378                         yield merged_format
2379
2380                 else:
2381                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2382                     mobj = re.match(
2383                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2384                         format_spec)
2385                     if mobj is not None:
2386                         format_idx = int_or_none(mobj.group('n'), default=1)
2387                         format_reverse = mobj.group('bw')[0] == 'b'
2388                         format_type = (mobj.group('type') or [None])[0]
2389                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2390                         format_modified = mobj.group('mod') is not None
2391
2392                         format_fallback = not format_type and not format_modified  # for b, w
2393                         _filter_f = (
2394                             (lambda f: f.get('%scodec' % format_type) != 'none')
2395                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2396                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2397                             if format_type  # bv, ba, wv, wa
2398                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2399                             if not format_modified  # b, w
2400                             else lambda f: True)  # b*, w*
2401                         filter_f = lambda f: _filter_f(f) and (
2402                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2403                     else:
2404                         if format_spec in self._format_selection_exts['audio']:
2405                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2406                         elif format_spec in self._format_selection_exts['video']:
2407                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2408                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2409                         elif format_spec in self._format_selection_exts['storyboards']:
2410                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2411                         else:
2412                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2413
2414                     def selector_function(ctx):
2415                         formats = list(ctx['formats'])
2416                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2417                         if not matches:
2418                             if format_fallback and ctx['incomplete_formats']:
2419                                 # for extractors with incomplete formats (audio only (soundcloud)
2420                                 # or video only (imgur)) best/worst will fallback to
2421                                 # best/worst {video,audio}-only format
2422                                 matches = formats
2423                             elif seperate_fallback and not ctx['has_merged_format']:
2424                                 # for compatibility with youtube-dl when there is no pre-merged format
2425                                 matches = list(filter(seperate_fallback, formats))
2426                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2427                         try:
2428                             yield matches[format_idx - 1]
2429                         except LazyList.IndexError:
2430                             return
2431
2432             filters = [self._build_format_filter(f) for f in selector.filters]
2433
2434             def final_selector(ctx):
2435                 ctx_copy = dict(ctx)
2436                 for _filter in filters:
2437                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2438                 return selector_function(ctx_copy)
2439             return final_selector
2440
2441         stream = io.BytesIO(format_spec.encode())
2442         try:
2443             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2444         except tokenize.TokenError:
2445             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2446
2447         class TokenIterator:
2448             def __init__(self, tokens):
2449                 self.tokens = tokens
2450                 self.counter = 0
2451
2452             def __iter__(self):
2453                 return self
2454
2455             def __next__(self):
2456                 if self.counter >= len(self.tokens):
2457                     raise StopIteration()
2458                 value = self.tokens[self.counter]
2459                 self.counter += 1
2460                 return value
2461
2462             next = __next__
2463
2464             def restore_last_token(self):
2465                 self.counter -= 1
2466
2467         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2468         return _build_selector_function(parsed_selector)
2469
2470     def _calc_headers(self, info_dict):
2471         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2472         clean_headers(res)
2473         cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2474         if cookies:
2475             encoder = LenientSimpleCookie()
2476             values = []
2477             for cookie in cookies:
2478                 _, value = encoder.value_encode(cookie.value)
2479                 values.append(f'{cookie.name}={value}')
2480                 if cookie.domain:
2481                     values.append(f'Domain={cookie.domain}')
2482                 if cookie.path:
2483                     values.append(f'Path={cookie.path}')
2484                 if cookie.secure:
2485                     values.append('Secure')
2486                 if cookie.expires:
2487                     values.append(f'Expires={cookie.expires}')
2488                 if cookie.version:
2489                     values.append(f'Version={cookie.version}')
2490             info_dict['cookies'] = '; '.join(values)
2491
2492         if 'X-Forwarded-For' not in res:
2493             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2494             if x_forwarded_for_ip:
2495                 res['X-Forwarded-For'] = x_forwarded_for_ip
2496
2497         return res
2498
2499     def _calc_cookies(self, url):
2500         self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2501         return self.cookiejar.get_cookie_header(url)
2502
2503     def _sort_thumbnails(self, thumbnails):
2504         thumbnails.sort(key=lambda t: (
2505             t.get('preference') if t.get('preference') is not None else -1,
2506             t.get('width') if t.get('width') is not None else -1,
2507             t.get('height') if t.get('height') is not None else -1,
2508             t.get('id') if t.get('id') is not None else '',
2509             t.get('url')))
2510
2511     def _sanitize_thumbnails(self, info_dict):
2512         thumbnails = info_dict.get('thumbnails')
2513         if thumbnails is None:
2514             thumbnail = info_dict.get('thumbnail')
2515             if thumbnail:
2516                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2517         if not thumbnails:
2518             return
2519
2520         def check_thumbnails(thumbnails):
2521             for t in thumbnails:
2522                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2523                 try:
2524                     self.urlopen(HEADRequest(t['url']))
2525                 except network_exceptions as err:
2526                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2527                     continue
2528                 yield t
2529
2530         self._sort_thumbnails(thumbnails)
2531         for i, t in enumerate(thumbnails):
2532             if t.get('id') is None:
2533                 t['id'] = '%d' % i
2534             if t.get('width') and t.get('height'):
2535                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2536             t['url'] = sanitize_url(t['url'])
2537
2538         if self.params.get('check_formats') is True:
2539             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2540         else:
2541             info_dict['thumbnails'] = thumbnails
2542
2543     def _fill_common_fields(self, info_dict, final=True):
2544         # TODO: move sanitization here
2545         if final:
2546             title = info_dict['fulltitle'] = info_dict.get('title')
2547             if not title:
2548                 if title == '':
2549                     self.write_debug('Extractor gave empty title. Creating a generic title')
2550                 else:
2551                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2552                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2553
2554         if info_dict.get('duration') is not None:
2555             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2556
2557         for ts_key, date_key in (
2558                 ('timestamp', 'upload_date'),
2559                 ('release_timestamp', 'release_date'),
2560                 ('modified_timestamp', 'modified_date'),
2561         ):
2562             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2563                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2564                 # see http://bugs.python.org/issue1646728)
2565                 with contextlib.suppress(ValueError, OverflowError, OSError):
2566                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2567                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2568
2569         live_keys = ('is_live', 'was_live')
2570         live_status = info_dict.get('live_status')
2571         if live_status is None:
2572             for key in live_keys:
2573                 if info_dict.get(key) is False:
2574                     continue
2575                 if info_dict.get(key):
2576                     live_status = key
2577                 break
2578             if all(info_dict.get(key) is False for key in live_keys):
2579                 live_status = 'not_live'
2580         if live_status:
2581             info_dict['live_status'] = live_status
2582             for key in live_keys:
2583                 if info_dict.get(key) is None:
2584                     info_dict[key] = (live_status == key)
2585         if live_status == 'post_live':
2586             info_dict['was_live'] = True
2587
2588         # Auto generate title fields corresponding to the *_number fields when missing
2589         # in order to always have clean titles. This is very common for TV series.
2590         for field in ('chapter', 'season', 'episode'):
2591             if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2592                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2593
2594     def _raise_pending_errors(self, info):
2595         err = info.pop('__pending_error', None)
2596         if err:
2597             self.report_error(err, tb=False)
2598
2599     def sort_formats(self, info_dict):
2600         formats = self._get_formats(info_dict)
2601         formats.sort(key=FormatSorter(
2602             self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2603
2604     def process_video_result(self, info_dict, download=True):
2605         assert info_dict.get('_type', 'video') == 'video'
2606         self._num_videos += 1
2607
2608         if 'id' not in info_dict:
2609             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2610         elif not info_dict.get('id'):
2611             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2612
2613         def report_force_conversion(field, field_not, conversion):
2614             self.report_warning(
2615                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2616                 % (field, field_not, conversion))
2617
2618         def sanitize_string_field(info, string_field):
2619             field = info.get(string_field)
2620             if field is None or isinstance(field, str):
2621                 return
2622             report_force_conversion(string_field, 'a string', 'string')
2623             info[string_field] = str(field)
2624
2625         def sanitize_numeric_fields(info):
2626             for numeric_field in self._NUMERIC_FIELDS:
2627                 field = info.get(numeric_field)
2628                 if field is None or isinstance(field, (int, float)):
2629                     continue
2630                 report_force_conversion(numeric_field, 'numeric', 'int')
2631                 info[numeric_field] = int_or_none(field)
2632
2633         sanitize_string_field(info_dict, 'id')
2634         sanitize_numeric_fields(info_dict)
2635         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2636             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2637         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2638             self.report_warning('"duration" field is negative, there is an error in extractor')
2639
2640         chapters = info_dict.get('chapters') or []
2641         if chapters and chapters[0].get('start_time'):
2642             chapters.insert(0, {'start_time': 0})
2643
2644         dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2645         for idx, (prev, current, next_) in enumerate(zip(
2646                 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2647             if current.get('start_time') is None:
2648                 current['start_time'] = prev.get('end_time')
2649             if not current.get('end_time'):
2650                 current['end_time'] = next_.get('start_time')
2651             if not current.get('title'):
2652                 current['title'] = f'<Untitled Chapter {idx}>'
2653
2654         if 'playlist' not in info_dict:
2655             # It isn't part of a playlist
2656             info_dict['playlist'] = None
2657             info_dict['playlist_index'] = None
2658
2659         self._sanitize_thumbnails(info_dict)
2660
2661         thumbnail = info_dict.get('thumbnail')
2662         thumbnails = info_dict.get('thumbnails')
2663         if thumbnail:
2664             info_dict['thumbnail'] = sanitize_url(thumbnail)
2665         elif thumbnails:
2666             info_dict['thumbnail'] = thumbnails[-1]['url']
2667
2668         if info_dict.get('display_id') is None and 'id' in info_dict:
2669             info_dict['display_id'] = info_dict['id']
2670
2671         self._fill_common_fields(info_dict)
2672
2673         for cc_kind in ('subtitles', 'automatic_captions'):
2674             cc = info_dict.get(cc_kind)
2675             if cc:
2676                 for _, subtitle in cc.items():
2677                     for subtitle_format in subtitle:
2678                         if subtitle_format.get('url'):
2679                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2680                         if subtitle_format.get('ext') is None:
2681                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2682
2683         automatic_captions = info_dict.get('automatic_captions')
2684         subtitles = info_dict.get('subtitles')
2685
2686         info_dict['requested_subtitles'] = self.process_subtitles(
2687             info_dict['id'], subtitles, automatic_captions)
2688
2689         formats = self._get_formats(info_dict)
2690
2691         # Backward compatibility with InfoExtractor._sort_formats
2692         field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2693         if field_preference:
2694             info_dict['_format_sort_fields'] = field_preference
2695
2696         info_dict['_has_drm'] = any(  # or None ensures --clean-infojson removes it
2697             f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2698         if not self.params.get('allow_unplayable_formats'):
2699             formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2700
2701         if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2702             self.report_warning(
2703                 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2704                 'only images are available for download. Use --list-formats to see them'.capitalize())
2705
2706         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2707         if not get_from_start:
2708             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2709         if info_dict.get('is_live') and formats:
2710             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2711             if get_from_start and not formats:
2712                 self.raise_no_formats(info_dict, msg=(
2713                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2714                     'If you want to download from the current time, use --no-live-from-start'))
2715
2716         def is_wellformed(f):
2717             url = f.get('url')
2718             if not url:
2719                 self.report_warning(
2720                     '"url" field is missing or empty - skipping format, '
2721                     'there is an error in extractor')
2722                 return False
2723             if isinstance(url, bytes):
2724                 sanitize_string_field(f, 'url')
2725             return True
2726
2727         # Filter out malformed formats for better extraction robustness
2728         formats = list(filter(is_wellformed, formats or []))
2729
2730         if not formats:
2731             self.raise_no_formats(info_dict)
2732
2733         for format in formats:
2734             sanitize_string_field(format, 'format_id')
2735             sanitize_numeric_fields(format)
2736             format['url'] = sanitize_url(format['url'])
2737             if format.get('ext') is None:
2738                 format['ext'] = determine_ext(format['url']).lower()
2739             if format.get('protocol') is None:
2740                 format['protocol'] = determine_protocol(format)
2741             if format.get('resolution') is None:
2742                 format['resolution'] = self.format_resolution(format, default=None)
2743             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2744                 format['dynamic_range'] = 'SDR'
2745             if format.get('aspect_ratio') is None:
2746                 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2747             if (not format.get('manifest_url')  # For fragmented formats, "tbr" is often max bitrate and not average
2748                     and info_dict.get('duration') and format.get('tbr')
2749                     and not format.get('filesize') and not format.get('filesize_approx')):
2750                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2751             format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
2752
2753         # This is copied to http_headers by the above _calc_headers and can now be removed
2754         if '__x_forwarded_for_ip' in info_dict:
2755             del info_dict['__x_forwarded_for_ip']
2756
2757         self.sort_formats({
2758             'formats': formats,
2759             '_format_sort_fields': info_dict.get('_format_sort_fields')
2760         })
2761
2762         # Sanitize and group by format_id
2763         formats_dict = {}
2764         for i, format in enumerate(formats):
2765             if not format.get('format_id'):
2766                 format['format_id'] = str(i)
2767             else:
2768                 # Sanitize format_id from characters used in format selector expression
2769                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2770             formats_dict.setdefault(format['format_id'], []).append(format)
2771
2772         # Make sure all formats have unique format_id
2773         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2774         for format_id, ambiguous_formats in formats_dict.items():
2775             ambigious_id = len(ambiguous_formats) > 1
2776             for i, format in enumerate(ambiguous_formats):
2777                 if ambigious_id:
2778                     format['format_id'] = '%s-%d' % (format_id, i)
2779                 # Ensure there is no conflict between id and ext in format selection
2780                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2781                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2782                     format['format_id'] = 'f%s' % format['format_id']
2783
2784                 if format.get('format') is None:
2785                     format['format'] = '{id} - {res}{note}'.format(
2786                         id=format['format_id'],
2787                         res=self.format_resolution(format),
2788                         note=format_field(format, 'format_note', ' (%s)'),
2789                     )
2790
2791         if self.params.get('check_formats') is True:
2792             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2793
2794         if not formats or formats[0] is not info_dict:
2795             # only set the 'formats' fields if the original info_dict list them
2796             # otherwise we end up with a circular reference, the first (and unique)
2797             # element in the 'formats' field in info_dict is info_dict itself,
2798             # which can't be exported to json
2799             info_dict['formats'] = formats
2800
2801         info_dict, _ = self.pre_process(info_dict)
2802
2803         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2804             return info_dict
2805
2806         self.post_extract(info_dict)
2807         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2808
2809         # The pre-processors may have modified the formats
2810         formats = self._get_formats(info_dict)
2811
2812         list_only = self.params.get('simulate') == 'list_only'
2813         interactive_format_selection = not list_only and self.format_selector == '-'
2814         if self.params.get('list_thumbnails'):
2815             self.list_thumbnails(info_dict)
2816         if self.params.get('listsubtitles'):
2817             if 'automatic_captions' in info_dict:
2818                 self.list_subtitles(
2819                     info_dict['id'], automatic_captions, 'automatic captions')
2820             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2821         if self.params.get('listformats') or interactive_format_selection:
2822             self.list_formats(info_dict)
2823         if list_only:
2824             # Without this printing, -F --print-json will not work
2825             self.__forced_printings(info_dict)
2826             return info_dict
2827
2828         format_selector = self.format_selector
2829         while True:
2830             if interactive_format_selection:
2831                 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2832                                    + '(Press ENTER for default, or Ctrl+C to quit)'
2833                                    + self._format_screen(': ', self.Styles.EMPHASIS))
2834                 try:
2835                     format_selector = self.build_format_selector(req_format) if req_format else None
2836                 except SyntaxError as err:
2837                     self.report_error(err, tb=False, is_error=False)
2838                     continue
2839
2840             if format_selector is None:
2841                 req_format = self._default_format_spec(info_dict, download=download)
2842                 self.write_debug(f'Default format spec: {req_format}')
2843                 format_selector = self.build_format_selector(req_format)
2844
2845             formats_to_download = list(format_selector({
2846                 'formats': formats,
2847                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2848                 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
2849                                        or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
2850             }))
2851             if interactive_format_selection and not formats_to_download:
2852                 self.report_error('Requested format is not available', tb=False, is_error=False)
2853                 continue
2854             break
2855
2856         if not formats_to_download:
2857             if not self.params.get('ignore_no_formats_error'):
2858                 raise ExtractorError(
2859                     'Requested format is not available. Use --list-formats for a list of available formats',
2860                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2861             self.report_warning('Requested format is not available')
2862             # Process what we can, even without any available formats.
2863             formats_to_download = [{}]
2864
2865         requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2866         best_format, downloaded_formats = formats_to_download[-1], []
2867         if download:
2868             if best_format and requested_ranges:
2869                 def to_screen(*msg):
2870                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2871
2872                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2873                           (f['format_id'] for f in formats_to_download))
2874                 if requested_ranges != ({}, ):
2875                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2876                               (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2877             max_downloads_reached = False
2878
2879             for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2880                 new_info = self._copy_infodict(info_dict)
2881                 new_info.update(fmt)
2882                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2883                 end_time = offset + min(chapter.get('end_time', duration), duration)
2884                 # duration may not be accurate. So allow deviations <1sec
2885                 if end_time == float('inf') or end_time > offset + duration + 1:
2886                     end_time = None
2887                 if chapter or offset:
2888                     new_info.update({
2889                         'section_start': offset + chapter.get('start_time', 0),
2890                         'section_end': end_time,
2891                         'section_title': chapter.get('title'),
2892                         'section_number': chapter.get('index'),
2893                     })
2894                 downloaded_formats.append(new_info)
2895                 try:
2896                     self.process_info(new_info)
2897                 except MaxDownloadsReached:
2898                     max_downloads_reached = True
2899                 self._raise_pending_errors(new_info)
2900                 # Remove copied info
2901                 for key, val in tuple(new_info.items()):
2902                     if info_dict.get(key) == val:
2903                         new_info.pop(key)
2904                 if max_downloads_reached:
2905                     break
2906
2907             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2908             assert write_archive.issubset({True, False, 'ignore'})
2909             if True in write_archive and False not in write_archive:
2910                 self.record_download_archive(info_dict)
2911
2912             info_dict['requested_downloads'] = downloaded_formats
2913             info_dict = self.run_all_pps('after_video', info_dict)
2914             if max_downloads_reached:
2915                 raise MaxDownloadsReached()
2916
2917         # We update the info dict with the selected best quality format (backwards compatibility)
2918         info_dict.update(best_format)
2919         return info_dict
2920
2921     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2922         """Select the requested subtitles and their format"""
2923         available_subs, normal_sub_langs = {}, []
2924         if normal_subtitles and self.params.get('writesubtitles'):
2925             available_subs.update(normal_subtitles)
2926             normal_sub_langs = tuple(normal_subtitles.keys())
2927         if automatic_captions and self.params.get('writeautomaticsub'):
2928             for lang, cap_info in automatic_captions.items():
2929                 if lang not in available_subs:
2930                     available_subs[lang] = cap_info
2931
2932         if not available_subs or (
2933                 not self.params.get('writesubtitles')
2934                 and not self.params.get('writeautomaticsub')):
2935             return None
2936
2937         all_sub_langs = tuple(available_subs.keys())
2938         if self.params.get('allsubtitles', False):
2939             requested_langs = all_sub_langs
2940         elif self.params.get('subtitleslangs', False):
2941             try:
2942                 requested_langs = orderedSet_from_options(
2943                     self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2944             except re.error as e:
2945                 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2946         else:
2947             requested_langs = LazyList(itertools.chain(
2948                 ['en'] if 'en' in normal_sub_langs else [],
2949                 filter(lambda f: f.startswith('en'), normal_sub_langs),
2950                 ['en'] if 'en' in all_sub_langs else [],
2951                 filter(lambda f: f.startswith('en'), all_sub_langs),
2952                 normal_sub_langs, all_sub_langs,
2953             ))[:1]
2954         if requested_langs:
2955             self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2956
2957         formats_query = self.params.get('subtitlesformat', 'best')
2958         formats_preference = formats_query.split('/') if formats_query else []
2959         subs = {}
2960         for lang in requested_langs:
2961             formats = available_subs.get(lang)
2962             if formats is None:
2963                 self.report_warning(f'{lang} subtitles not available for {video_id}')
2964                 continue
2965             for ext in formats_preference:
2966                 if ext == 'best':
2967                     f = formats[-1]
2968                     break
2969                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2970                 if matches:
2971                     f = matches[-1]
2972                     break
2973             else:
2974                 f = formats[-1]
2975                 self.report_warning(
2976                     'No subtitle format found matching "%s" for language %s, '
2977                     'using %s' % (formats_query, lang, f['ext']))
2978             subs[lang] = f
2979         return subs
2980
2981     def _forceprint(self, key, info_dict):
2982         if info_dict is None:
2983             return
2984         info_copy = info_dict.copy()
2985         info_copy.setdefault('filename', self.prepare_filename(info_dict))
2986         if info_dict.get('requested_formats') is not None:
2987             # For RTMP URLs, also include the playpath
2988             info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2989         elif info_dict.get('url'):
2990             info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2991         info_copy['formats_table'] = self.render_formats_table(info_dict)
2992         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2993         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2994         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2995
2996         def format_tmpl(tmpl):
2997             mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
2998             if not mobj:
2999                 return tmpl
3000
3001             fmt = '%({})s'
3002             if tmpl.startswith('{'):
3003                 tmpl, fmt = f'.{tmpl}', '%({})j'
3004             if tmpl.endswith('='):
3005                 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3006             return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3007
3008         for tmpl in self.params['forceprint'].get(key, []):
3009             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3010
3011         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3012             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3013             tmpl = format_tmpl(tmpl)
3014             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3015             if self._ensure_dir_exists(filename):
3016                 with open(filename, 'a', encoding='utf-8', newline='') as f:
3017                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3018
3019         return info_copy
3020
3021     def __forced_printings(self, info_dict, filename=None, incomplete=True):
3022         if (self.params.get('forcejson')
3023                 or self.params['forceprint'].get('video')
3024                 or self.params['print_to_file'].get('video')):
3025             self.post_extract(info_dict)
3026         if filename:
3027             info_dict['filename'] = filename
3028         info_copy = self._forceprint('video', info_dict)
3029
3030         def print_field(field, actual_field=None, optional=False):
3031             if actual_field is None:
3032                 actual_field = field
3033             if self.params.get(f'force{field}') and (
3034                     info_copy.get(field) is not None or (not optional and not incomplete)):
3035                 self.to_stdout(info_copy[actual_field])
3036
3037         print_field('title')
3038         print_field('id')
3039         print_field('url', 'urls')
3040         print_field('thumbnail', optional=True)
3041         print_field('description', optional=True)
3042         print_field('filename')
3043         if self.params.get('forceduration') and info_copy.get('duration') is not None:
3044             self.to_stdout(formatSeconds(info_copy['duration']))
3045         print_field('format')
3046
3047         if self.params.get('forcejson'):
3048             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3049
3050     def dl(self, name, info, subtitle=False, test=False):
3051         if not info.get('url'):
3052             self.raise_no_formats(info, True)
3053
3054         if test:
3055             verbose = self.params.get('verbose')
3056             params = {
3057                 'test': True,
3058                 'quiet': self.params.get('quiet') or not verbose,
3059                 'verbose': verbose,
3060                 'noprogress': not verbose,
3061                 'nopart': True,
3062                 'skip_unavailable_fragments': False,
3063                 'keep_fragments': False,
3064                 'overwrites': True,
3065                 '_no_ytdl_file': True,
3066             }
3067         else:
3068             params = self.params
3069         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3070         if not test:
3071             for ph in self._progress_hooks:
3072                 fd.add_progress_hook(ph)
3073             urls = '", "'.join(
3074                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3075                 for f in info.get('requested_formats', []) or [info])
3076             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3077
3078         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3079         # But it may contain objects that are not deep-copyable
3080         new_info = self._copy_infodict(info)
3081         if new_info.get('http_headers') is None:
3082             new_info['http_headers'] = self._calc_headers(new_info)
3083         return fd.download(name, new_info, subtitle)
3084
3085     def existing_file(self, filepaths, *, default_overwrite=True):
3086         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3087         if existing_files and not self.params.get('overwrites', default_overwrite):
3088             return existing_files[0]
3089
3090         for file in existing_files:
3091             self.report_file_delete(file)
3092             os.remove(file)
3093         return None
3094
3095     def process_info(self, info_dict):
3096         """Process a single resolved IE result. (Modifies it in-place)"""
3097
3098         assert info_dict.get('_type', 'video') == 'video'
3099         original_infodict = info_dict
3100
3101         if 'format' not in info_dict and 'ext' in info_dict:
3102             info_dict['format'] = info_dict['ext']
3103
3104         if self._match_entry(info_dict) is not None:
3105             info_dict['__write_download_archive'] = 'ignore'
3106             return
3107
3108         # Does nothing under normal operation - for backward compatibility of process_info
3109         self.post_extract(info_dict)
3110
3111         def replace_info_dict(new_info):
3112             nonlocal info_dict
3113             if new_info == info_dict:
3114                 return
3115             info_dict.clear()
3116             info_dict.update(new_info)
3117
3118         new_info, _ = self.pre_process(info_dict, 'video')
3119         replace_info_dict(new_info)
3120         self._num_downloads += 1
3121
3122         # info_dict['_filename'] needs to be set for backward compatibility
3123         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3124         temp_filename = self.prepare_filename(info_dict, 'temp')
3125         files_to_move = {}
3126
3127         # Forced printings
3128         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3129
3130         def check_max_downloads():
3131             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3132                 raise MaxDownloadsReached()
3133
3134         if self.params.get('simulate'):
3135             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3136             check_max_downloads()
3137             return
3138
3139         if full_filename is None:
3140             return
3141         if not self._ensure_dir_exists(encodeFilename(full_filename)):
3142             return
3143         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3144             return
3145
3146         if self._write_description('video', info_dict,
3147                                    self.prepare_filename(info_dict, 'description')) is None:
3148             return
3149
3150         sub_files = self._write_subtitles(info_dict, temp_filename)
3151         if sub_files is None:
3152             return
3153         files_to_move.update(dict(sub_files))
3154
3155         thumb_files = self._write_thumbnails(
3156             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3157         if thumb_files is None:
3158             return
3159         files_to_move.update(dict(thumb_files))
3160
3161         infofn = self.prepare_filename(info_dict, 'infojson')
3162         _infojson_written = self._write_info_json('video', info_dict, infofn)
3163         if _infojson_written:
3164             info_dict['infojson_filename'] = infofn
3165             # For backward compatibility, even though it was a private field
3166             info_dict['__infojson_filename'] = infofn
3167         elif _infojson_written is None:
3168             return
3169
3170         # Note: Annotations are deprecated
3171         annofn = None
3172         if self.params.get('writeannotations', False):
3173             annofn = self.prepare_filename(info_dict, 'annotation')
3174         if annofn:
3175             if not self._ensure_dir_exists(encodeFilename(annofn)):
3176                 return
3177             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3178                 self.to_screen('[info] Video annotations are already present')
3179             elif not info_dict.get('annotations'):
3180                 self.report_warning('There are no annotations to write.')
3181             else:
3182                 try:
3183                     self.to_screen('[info] Writing video annotations to: ' + annofn)
3184                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3185                         annofile.write(info_dict['annotations'])
3186                 except (KeyError, TypeError):
3187                     self.report_warning('There are no annotations to write.')
3188                 except OSError:
3189                     self.report_error('Cannot write annotations file: ' + annofn)
3190                     return
3191
3192         # Write internet shortcut files
3193         def _write_link_file(link_type):
3194             url = try_get(info_dict['webpage_url'], iri_to_uri)
3195             if not url:
3196                 self.report_warning(
3197                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3198                 return True
3199             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3200             if not self._ensure_dir_exists(encodeFilename(linkfn)):
3201                 return False
3202             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3203                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3204                 return True
3205             try:
3206                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3207                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3208                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3209                     template_vars = {'url': url}
3210                     if link_type == 'desktop':
3211                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3212                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3213             except OSError:
3214                 self.report_error(f'Cannot write internet shortcut {linkfn}')
3215                 return False
3216             return True
3217
3218         write_links = {
3219             'url': self.params.get('writeurllink'),
3220             'webloc': self.params.get('writewebloclink'),
3221             'desktop': self.params.get('writedesktoplink'),
3222         }
3223         if self.params.get('writelink'):
3224             link_type = ('webloc' if sys.platform == 'darwin'
3225                          else 'desktop' if sys.platform.startswith('linux')
3226                          else 'url')
3227             write_links[link_type] = True
3228
3229         if any(should_write and not _write_link_file(link_type)
3230                for link_type, should_write in write_links.items()):
3231             return
3232
3233         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3234         replace_info_dict(new_info)
3235
3236         if self.params.get('skip_download'):
3237             info_dict['filepath'] = temp_filename
3238             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3239             info_dict['__files_to_move'] = files_to_move
3240             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3241             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3242         else:
3243             # Download
3244             info_dict.setdefault('__postprocessors', [])
3245             try:
3246
3247                 def existing_video_file(*filepaths):
3248                     ext = info_dict.get('ext')
3249                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3250                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3251                                               default_overwrite=False)
3252                     if file:
3253                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3254                     return file
3255
3256                 fd, success = None, True
3257                 if info_dict.get('protocol') or info_dict.get('url'):
3258                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3259                     if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3260                             info_dict.get('section_start') or info_dict.get('section_end')):
3261                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3262                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
3263                         self.report_error(f'{msg}. Aborting')
3264                         return
3265
3266                 if info_dict.get('requested_formats') is not None:
3267                     old_ext = info_dict['ext']
3268                     if self.params.get('merge_output_format') is None:
3269                         if (info_dict['ext'] == 'webm'
3270                                 and info_dict.get('thumbnails')
3271                                 # check with type instead of pp_key, __name__, or isinstance
3272                                 # since we dont want any custom PPs to trigger this
3273                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3274                             info_dict['ext'] = 'mkv'
3275                             self.report_warning(
3276                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3277                     new_ext = info_dict['ext']
3278
3279                     def correct_ext(filename, ext=new_ext):
3280                         if filename == '-':
3281                             return filename
3282                         filename_real_ext = os.path.splitext(filename)[1][1:]
3283                         filename_wo_ext = (
3284                             os.path.splitext(filename)[0]
3285                             if filename_real_ext in (old_ext, new_ext)
3286                             else filename)
3287                         return f'{filename_wo_ext}.{ext}'
3288
3289                     # Ensure filename always has a correct extension for successful merge
3290                     full_filename = correct_ext(full_filename)
3291                     temp_filename = correct_ext(temp_filename)
3292                     dl_filename = existing_video_file(full_filename, temp_filename)
3293
3294                     info_dict['__real_download'] = False
3295                     # NOTE: Copy so that original format dicts are not modified
3296                     info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3297
3298                     merger = FFmpegMergerPP(self)
3299                     downloaded = []
3300                     if dl_filename is not None:
3301                         self.report_file_already_downloaded(dl_filename)
3302                     elif fd:
3303                         for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3304                             f['filepath'] = fname = prepend_extension(
3305                                 correct_ext(temp_filename, info_dict['ext']),
3306                                 'f%s' % f['format_id'], info_dict['ext'])
3307                             downloaded.append(fname)
3308                         info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3309                         success, real_download = self.dl(temp_filename, info_dict)
3310                         info_dict['__real_download'] = real_download
3311                     else:
3312                         if self.params.get('allow_unplayable_formats'):
3313                             self.report_warning(
3314                                 'You have requested merging of multiple formats '
3315                                 'while also allowing unplayable formats to be downloaded. '
3316                                 'The formats won\'t be merged to prevent data corruption.')
3317                         elif not merger.available:
3318                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3319                             if not self.params.get('ignoreerrors'):
3320                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3321                                 return
3322                             self.report_warning(f'{msg}. The formats won\'t be merged')
3323
3324                         if temp_filename == '-':
3325                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3326                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3327                                       else 'but ffmpeg is not installed')
3328                             self.report_warning(
3329                                 f'You have requested downloading multiple formats to stdout {reason}. '
3330                                 'The formats will be streamed one after the other')
3331                             fname = temp_filename
3332                         for f in info_dict['requested_formats']:
3333                             new_info = dict(info_dict)
3334                             del new_info['requested_formats']
3335                             new_info.update(f)
3336                             if temp_filename != '-':
3337                                 fname = prepend_extension(
3338                                     correct_ext(temp_filename, new_info['ext']),
3339                                     'f%s' % f['format_id'], new_info['ext'])
3340                                 if not self._ensure_dir_exists(fname):
3341                                     return
3342                                 f['filepath'] = fname
3343                                 downloaded.append(fname)
3344                             partial_success, real_download = self.dl(fname, new_info)
3345                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3346                             success = success and partial_success
3347
3348                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3349                         info_dict['__postprocessors'].append(merger)
3350                         info_dict['__files_to_merge'] = downloaded
3351                         # Even if there were no downloads, it is being merged only now
3352                         info_dict['__real_download'] = True
3353                     else:
3354                         for file in downloaded:
3355                             files_to_move[file] = None
3356                 else:
3357                     # Just a single file
3358                     dl_filename = existing_video_file(full_filename, temp_filename)
3359                     if dl_filename is None or dl_filename == temp_filename:
3360                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3361                         # So we should try to resume the download
3362                         success, real_download = self.dl(temp_filename, info_dict)
3363                         info_dict['__real_download'] = real_download
3364                     else:
3365                         self.report_file_already_downloaded(dl_filename)
3366
3367                 dl_filename = dl_filename or temp_filename
3368                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3369
3370             except network_exceptions as err:
3371                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3372                 return
3373             except OSError as err:
3374                 raise UnavailableVideoError(err)
3375             except (ContentTooShortError, ) as err:
3376                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3377                 return
3378
3379             self._raise_pending_errors(info_dict)
3380             if success and full_filename != '-':
3381
3382                 def fixup():
3383                     do_fixup = True
3384                     fixup_policy = self.params.get('fixup')
3385                     vid = info_dict['id']
3386
3387                     if fixup_policy in ('ignore', 'never'):
3388                         return
3389                     elif fixup_policy == 'warn':
3390                         do_fixup = 'warn'
3391                     elif fixup_policy != 'force':
3392                         assert fixup_policy in ('detect_or_warn', None)
3393                         if not info_dict.get('__real_download'):
3394                             do_fixup = False
3395
3396                     def ffmpeg_fixup(cndn, msg, cls):
3397                         if not (do_fixup and cndn):
3398                             return
3399                         elif do_fixup == 'warn':
3400                             self.report_warning(f'{vid}: {msg}')
3401                             return
3402                         pp = cls(self)
3403                         if pp.available:
3404                             info_dict['__postprocessors'].append(pp)
3405                         else:
3406                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3407
3408                     stretched_ratio = info_dict.get('stretched_ratio')
3409                     ffmpeg_fixup(stretched_ratio not in (1, None),
3410                                  f'Non-uniform pixel ratio {stretched_ratio}',
3411                                  FFmpegFixupStretchedPP)
3412
3413                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3414                     downloader = downloader.FD_NAME if downloader else None
3415
3416                     ext = info_dict.get('ext')
3417                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3418                         isinstance(pp, FFmpegVideoConvertorPP)
3419                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3420                     ) for pp in self._pps['post_process'])
3421
3422                     if not postprocessed_by_ffmpeg:
3423                         ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3424                                      'writing DASH m4a. Only some players support this container',
3425                                      FFmpegFixupM4aPP)
3426                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3427                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3428                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3429                                      FFmpegFixupM3u8PP)
3430                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
3431                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3432
3433                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3434                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3435
3436                 fixup()
3437                 try:
3438                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3439                 except PostProcessingError as err:
3440                     self.report_error('Postprocessing: %s' % str(err))
3441                     return
3442                 try:
3443                     for ph in self._post_hooks:
3444                         ph(info_dict['filepath'])
3445                 except Exception as err:
3446                     self.report_error('post hooks: %s' % str(err))
3447                     return
3448                 info_dict['__write_download_archive'] = True
3449
3450         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3451         if self.params.get('force_write_download_archive'):
3452             info_dict['__write_download_archive'] = True
3453         check_max_downloads()
3454
3455     def __download_wrapper(self, func):
3456         @functools.wraps(func)
3457         def wrapper(*args, **kwargs):
3458             try:
3459                 res = func(*args, **kwargs)
3460             except UnavailableVideoError as e:
3461                 self.report_error(e)
3462             except DownloadCancelled as e:
3463                 self.to_screen(f'[info] {e}')
3464                 if not self.params.get('break_per_url'):
3465                     raise
3466                 self._num_downloads = 0
3467             else:
3468                 if self.params.get('dump_single_json', False):
3469                     self.post_extract(res)
3470                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3471         return wrapper
3472
3473     def download(self, url_list):
3474         """Download a given list of URLs."""
3475         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3476         outtmpl = self.params['outtmpl']['default']
3477         if (len(url_list) > 1
3478                 and outtmpl != '-'
3479                 and '%' not in outtmpl
3480                 and self.params.get('max_downloads') != 1):
3481             raise SameFileError(outtmpl)
3482
3483         for url in url_list:
3484             self.__download_wrapper(self.extract_info)(
3485                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3486
3487         return self._download_retcode
3488
3489     def download_with_info_file(self, info_filename):
3490         with contextlib.closing(fileinput.FileInput(
3491                 [info_filename], mode='r',
3492                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3493             # FileInput doesn't have a read method, we can't call json.load
3494             infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3495                      for info in variadic(json.loads('\n'.join(f)))]
3496         for info in infos:
3497             self._load_cookies(info.get('cookies'), from_headers=False)
3498             self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False))  # compat
3499             try:
3500                 self.__download_wrapper(self.process_ie_result)(info, download=True)
3501             except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3502                 if not isinstance(e, EntryNotInPlaylist):
3503                     self.to_stderr('\r')
3504                 webpage_url = info.get('webpage_url')
3505                 if webpage_url is None:
3506                     raise
3507                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3508                 self.download([webpage_url])
3509         return self._download_retcode
3510
3511     @staticmethod
3512     def sanitize_info(info_dict, remove_private_keys=False):
3513         ''' Sanitize the infodict for converting to json '''
3514         if info_dict is None:
3515             return info_dict
3516         info_dict.setdefault('epoch', int(time.time()))
3517         info_dict.setdefault('_type', 'video')
3518         info_dict.setdefault('_version', {
3519             'version': __version__,
3520             'current_git_head': current_git_head(),
3521             'release_git_head': RELEASE_GIT_HEAD,
3522             'repository': REPOSITORY,
3523         })
3524
3525         if remove_private_keys:
3526             reject = lambda k, v: v is None or k.startswith('__') or k in {
3527                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3528                 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3529                 'playlist_autonumber', '_format_sort_fields',
3530             }
3531         else:
3532             reject = lambda k, v: False
3533
3534         def filter_fn(obj):
3535             if isinstance(obj, dict):
3536                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3537             elif isinstance(obj, (list, tuple, set, LazyList)):
3538                 return list(map(filter_fn, obj))
3539             elif obj is None or isinstance(obj, (str, int, float, bool)):
3540                 return obj
3541             else:
3542                 return repr(obj)
3543
3544         return filter_fn(info_dict)
3545
3546     @staticmethod
3547     def filter_requested_info(info_dict, actually_filter=True):
3548         ''' Alias of sanitize_info for backward compatibility '''
3549         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3550
3551     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3552         for filename in set(filter(None, files_to_delete)):
3553             if msg:
3554                 self.to_screen(msg % filename)
3555             try:
3556                 os.remove(filename)
3557             except OSError:
3558                 self.report_warning(f'Unable to delete file {filename}')
3559             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3560                 del info['__files_to_move'][filename]
3561
3562     @staticmethod
3563     def post_extract(info_dict):
3564         def actual_post_extract(info_dict):
3565             if info_dict.get('_type') in ('playlist', 'multi_video'):
3566                 for video_dict in info_dict.get('entries', {}):
3567                     actual_post_extract(video_dict or {})
3568                 return
3569
3570             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3571             info_dict.update(post_extractor())
3572
3573         actual_post_extract(info_dict or {})
3574
3575     def run_pp(self, pp, infodict):
3576         files_to_delete = []
3577         if '__files_to_move' not in infodict:
3578             infodict['__files_to_move'] = {}
3579         try:
3580             files_to_delete, infodict = pp.run(infodict)
3581         except PostProcessingError as e:
3582             # Must be True and not 'only_download'
3583             if self.params.get('ignoreerrors') is True:
3584                 self.report_error(e)
3585                 return infodict
3586             raise
3587
3588         if not files_to_delete:
3589             return infodict
3590         if self.params.get('keepvideo', False):
3591             for f in files_to_delete:
3592                 infodict['__files_to_move'].setdefault(f, '')
3593         else:
3594             self._delete_downloaded_files(
3595                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3596         return infodict
3597
3598     def run_all_pps(self, key, info, *, additional_pps=None):
3599         if key != 'video':
3600             self._forceprint(key, info)
3601         for pp in (additional_pps or []) + self._pps[key]:
3602             info = self.run_pp(pp, info)
3603         return info
3604
3605     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3606         info = dict(ie_info)
3607         info['__files_to_move'] = files_to_move or {}
3608         try:
3609             info = self.run_all_pps(key, info)
3610         except PostProcessingError as err:
3611             msg = f'Preprocessing: {err}'
3612             info.setdefault('__pending_error', msg)
3613             self.report_error(msg, is_error=False)
3614         return info, info.pop('__files_to_move', None)
3615
3616     def post_process(self, filename, info, files_to_move=None):
3617         """Run all the postprocessors on the given file."""
3618         info['filepath'] = filename
3619         info['__files_to_move'] = files_to_move or {}
3620         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3621         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3622         del info['__files_to_move']
3623         return self.run_all_pps('after_move', info)
3624
3625     def _make_archive_id(self, info_dict):
3626         video_id = info_dict.get('id')
3627         if not video_id:
3628             return
3629         # Future-proof against any change in case
3630         # and backwards compatibility with prior versions
3631         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3632         if extractor is None:
3633             url = str_or_none(info_dict.get('url'))
3634             if not url:
3635                 return
3636             # Try to find matching extractor for the URL and take its ie_key
3637             for ie_key, ie in self._ies.items():
3638                 if ie.suitable(url):
3639                     extractor = ie_key
3640                     break
3641             else:
3642                 return
3643         return make_archive_id(extractor, video_id)
3644
3645     def in_download_archive(self, info_dict):
3646         if not self.archive:
3647             return False
3648
3649         vid_ids = [self._make_archive_id(info_dict)]
3650         vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3651         return any(id_ in self.archive for id_ in vid_ids)
3652
3653     def record_download_archive(self, info_dict):
3654         fn = self.params.get('download_archive')
3655         if fn is None:
3656             return
3657         vid_id = self._make_archive_id(info_dict)
3658         assert vid_id
3659
3660         self.write_debug(f'Adding to archive: {vid_id}')
3661         if is_path_like(fn):
3662             with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3663                 archive_file.write(vid_id + '\n')
3664         self.archive.add(vid_id)
3665
3666     @staticmethod
3667     def format_resolution(format, default='unknown'):
3668         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3669             return 'audio only'
3670         if format.get('resolution') is not None:
3671             return format['resolution']
3672         if format.get('width') and format.get('height'):
3673             return '%dx%d' % (format['width'], format['height'])
3674         elif format.get('height'):
3675             return '%sp' % format['height']
3676         elif format.get('width'):
3677             return '%dx?' % format['width']
3678         return default
3679
3680     def _list_format_headers(self, *headers):
3681         if self.params.get('listformats_table', True) is not False:
3682             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3683         return headers
3684
3685     def _format_note(self, fdict):
3686         res = ''
3687         if fdict.get('ext') in ['f4f', 'f4m']:
3688             res += '(unsupported)'
3689         if fdict.get('language'):
3690             if res:
3691                 res += ' '
3692             res += '[%s]' % fdict['language']
3693         if fdict.get('format_note') is not None:
3694             if res:
3695                 res += ' '
3696             res += fdict['format_note']
3697         if fdict.get('tbr') is not None:
3698             if res:
3699                 res += ', '
3700             res += '%4dk' % fdict['tbr']
3701         if fdict.get('container') is not None:
3702             if res:
3703                 res += ', '
3704             res += '%s container' % fdict['container']
3705         if (fdict.get('vcodec') is not None
3706                 and fdict.get('vcodec') != 'none'):
3707             if res:
3708                 res += ', '
3709             res += fdict['vcodec']
3710             if fdict.get('vbr') is not None:
3711                 res += '@'
3712         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3713             res += 'video@'
3714         if fdict.get('vbr') is not None:
3715             res += '%4dk' % fdict['vbr']
3716         if fdict.get('fps') is not None:
3717             if res:
3718                 res += ', '
3719             res += '%sfps' % fdict['fps']
3720         if fdict.get('acodec') is not None:
3721             if res:
3722                 res += ', '
3723             if fdict['acodec'] == 'none':
3724                 res += 'video only'
3725             else:
3726                 res += '%-5s' % fdict['acodec']
3727         elif fdict.get('abr') is not None:
3728             if res:
3729                 res += ', '
3730             res += 'audio'
3731         if fdict.get('abr') is not None:
3732             res += '@%3dk' % fdict['abr']
3733         if fdict.get('asr') is not None:
3734             res += ' (%5dHz)' % fdict['asr']
3735         if fdict.get('filesize') is not None:
3736             if res:
3737                 res += ', '
3738             res += format_bytes(fdict['filesize'])
3739         elif fdict.get('filesize_approx') is not None:
3740             if res:
3741                 res += ', '
3742             res += '~' + format_bytes(fdict['filesize_approx'])
3743         return res
3744
3745     def _get_formats(self, info_dict):
3746         if info_dict.get('formats') is None:
3747             if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3748                 return [info_dict]
3749             return []
3750         return info_dict['formats']
3751
3752     def render_formats_table(self, info_dict):
3753         formats = self._get_formats(info_dict)
3754         if not formats:
3755             return
3756         if not self.params.get('listformats_table', True) is not False:
3757             table = [
3758                 [
3759                     format_field(f, 'format_id'),
3760                     format_field(f, 'ext'),
3761                     self.format_resolution(f),
3762                     self._format_note(f)
3763                 ] for f in formats if (f.get('preference') or 0) >= -1000]
3764             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3765
3766         def simplified_codec(f, field):
3767             assert field in ('acodec', 'vcodec')
3768             codec = f.get(field)
3769             if not codec:
3770                 return 'unknown'
3771             elif codec != 'none':
3772                 return '.'.join(codec.split('.')[:4])
3773
3774             if field == 'vcodec' and f.get('acodec') == 'none':
3775                 return 'images'
3776             elif field == 'acodec' and f.get('vcodec') == 'none':
3777                 return ''
3778             return self._format_out('audio only' if field == 'vcodec' else 'video only',
3779                                     self.Styles.SUPPRESS)
3780
3781         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3782         table = [
3783             [
3784                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3785                 format_field(f, 'ext'),
3786                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3787                 format_field(f, 'fps', '\t%d', func=round),
3788                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3789                 format_field(f, 'audio_channels', '\t%s'),
3790                 delim, (
3791                     format_field(f, 'filesize', ' \t%s', func=format_bytes)
3792                     or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3793                     or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3794                                     None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
3795                 format_field(f, 'tbr', '\t%dk', func=round),
3796                 shorten_protocol_name(f.get('protocol', '')),
3797                 delim,
3798                 simplified_codec(f, 'vcodec'),
3799                 format_field(f, 'vbr', '\t%dk', func=round),
3800                 simplified_codec(f, 'acodec'),
3801                 format_field(f, 'abr', '\t%dk', func=round),
3802                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3803                 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3804                     self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3805                     (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3806                      else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3807                     format_field(f, 'format_note'),
3808                     format_field(f, 'container', ignore=(None, f.get('ext'))),
3809                     delim=', '), delim=' '),
3810             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3811         header_line = self._list_format_headers(
3812             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3813             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3814
3815         return render_table(
3816             header_line, table, hide_empty=True,
3817             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3818
3819     def render_thumbnails_table(self, info_dict):
3820         thumbnails = list(info_dict.get('thumbnails') or [])
3821         if not thumbnails:
3822             return None
3823         return render_table(
3824             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3825             [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3826
3827     def render_subtitles_table(self, video_id, subtitles):
3828         def _row(lang, formats):
3829             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3830             if len(set(names)) == 1:
3831                 names = [] if names[0] == 'unknown' else names[:1]
3832             return [lang, ', '.join(names), ', '.join(exts)]
3833
3834         if not subtitles:
3835             return None
3836         return render_table(
3837             self._list_format_headers('Language', 'Name', 'Formats'),
3838             [_row(lang, formats) for lang, formats in subtitles.items()],
3839             hide_empty=True)
3840
3841     def __list_table(self, video_id, name, func, *args):
3842         table = func(*args)
3843         if not table:
3844             self.to_screen(f'{video_id} has no {name}')
3845             return
3846         self.to_screen(f'[info] Available {name} for {video_id}:')
3847         self.to_stdout(table)
3848
3849     def list_formats(self, info_dict):
3850         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3851
3852     def list_thumbnails(self, info_dict):
3853         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3854
3855     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3856         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3857
3858     def print_debug_header(self):
3859         if not self.params.get('verbose'):
3860             return
3861
3862         from . import _IN_CLI  # Must be delayed import
3863
3864         # These imports can be slow. So import them only as needed
3865         from .extractor.extractors import _LAZY_LOADER
3866         from .extractor.extractors import (
3867             _PLUGIN_CLASSES as plugin_ies,
3868             _PLUGIN_OVERRIDES as plugin_ie_overrides
3869         )
3870
3871         def get_encoding(stream):
3872             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3873             additional_info = []
3874             if os.environ.get('TERM', '').lower() == 'dumb':
3875                 additional_info.append('dumb')
3876             if not supports_terminal_sequences(stream):
3877                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3878                 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3879             if additional_info:
3880                 ret = f'{ret} ({",".join(additional_info)})'
3881             return ret
3882
3883         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3884             locale.getpreferredencoding(),
3885             sys.getfilesystemencoding(),
3886             self.get_encoding(),
3887             ', '.join(
3888                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3889                 if stream is not None and key != 'console')
3890         )
3891
3892         logger = self.params.get('logger')
3893         if logger:
3894             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3895             write_debug(encoding_str)
3896         else:
3897             write_string(f'[debug] {encoding_str}\n', encoding=None)
3898             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3899
3900         source = detect_variant()
3901         if VARIANT not in (None, 'pip'):
3902             source += '*'
3903         klass = type(self)
3904         write_debug(join_nonempty(
3905             f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3906             f'{CHANNEL}@{__version__}',
3907             f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3908             '' if source == 'unknown' else f'({source})',
3909             '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3910             delim=' '))
3911
3912         if not _IN_CLI:
3913             write_debug(f'params: {self.params}')
3914
3915         if not _LAZY_LOADER:
3916             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3917                 write_debug('Lazy loading extractors is forcibly disabled')
3918             else:
3919                 write_debug('Lazy loading extractors is disabled')
3920         if self.params['compat_opts']:
3921             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3922
3923         if current_git_head():
3924             write_debug(f'Git HEAD: {current_git_head()}')
3925         write_debug(system_identifier())
3926
3927         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3928         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3929         if ffmpeg_features:
3930             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3931
3932         exe_versions['rtmpdump'] = rtmpdump_version()
3933         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3934         exe_str = ', '.join(
3935             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3936         ) or 'none'
3937         write_debug('exe versions: %s' % exe_str)
3938
3939         from .compat.compat_utils import get_package_info
3940         from .dependencies import available_dependencies
3941
3942         write_debug('Optional libraries: %s' % (', '.join(sorted({
3943             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3944         })) or 'none'))
3945
3946         self._setup_opener()
3947         proxy_map = {}
3948         for handler in self._opener.handlers:
3949             if hasattr(handler, 'proxies'):
3950                 proxy_map.update(handler.proxies)
3951         write_debug(f'Proxy map: {proxy_map}')
3952
3953         for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3954             display_list = ['%s%s' % (
3955                 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3956                 for name, klass in plugins.items()]
3957             if plugin_type == 'Extractor':
3958                 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3959                                     for parent, plugins in plugin_ie_overrides.items())
3960             if not display_list:
3961                 continue
3962             write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3963
3964         plugin_dirs = plugin_directories()
3965         if plugin_dirs:
3966             write_debug(f'Plugin directories: {plugin_dirs}')
3967
3968         # Not implemented
3969         if False and self.params.get('call_home'):
3970             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3971             write_debug('Public IP address: %s' % ipaddr)
3972             latest_version = self.urlopen(
3973                 'https://yt-dl.org/latest/version').read().decode()
3974             if version_tuple(latest_version) > version_tuple(__version__):
3975                 self.report_warning(
3976                     'You are using an outdated version (newest version: %s)! '
3977                     'See https://yt-dl.org/update if you need help updating.' %
3978                     latest_version)
3979
3980     def _setup_opener(self):
3981         if hasattr(self, '_opener'):
3982             return
3983         timeout_val = self.params.get('socket_timeout')
3984         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3985         opts_proxy = self.params.get('proxy')
3986
3987         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3988         if opts_proxy is not None:
3989             if opts_proxy == '':
3990                 proxies = {}
3991             else:
3992                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3993         else:
3994             proxies = urllib.request.getproxies()
3995             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3996             if 'http' in proxies and 'https' not in proxies:
3997                 proxies['https'] = proxies['http']
3998         proxy_handler = PerRequestProxyHandler(proxies)
3999
4000         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
4001         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
4002         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
4003         redirect_handler = YoutubeDLRedirectHandler()
4004         data_handler = urllib.request.DataHandler()
4005
4006         # When passing our own FileHandler instance, build_opener won't add the
4007         # default FileHandler and allows us to disable the file protocol, which
4008         # can be used for malicious purposes (see
4009         # https://github.com/ytdl-org/youtube-dl/issues/8227)
4010         file_handler = urllib.request.FileHandler()
4011
4012         if not self.params.get('enable_file_urls'):
4013             def file_open(*args, **kwargs):
4014                 raise urllib.error.URLError(
4015                     'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
4016                     'Use --enable-file-urls to enable at your own risk.')
4017             file_handler.file_open = file_open
4018
4019         opener = urllib.request.build_opener(
4020             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
4021
4022         # Delete the default user-agent header, which would otherwise apply in
4023         # cases where our custom HTTP handler doesn't come into play
4024         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
4025         opener.addheaders = []
4026         self._opener = opener
4027
4028     @functools.cached_property
4029     def cookiejar(self):
4030         """Global cookiejar instance"""
4031         return load_cookies(
4032             self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4033
4034     def urlopen(self, req):
4035         """ Start an HTTP download """
4036         if isinstance(req, str):
4037             req = sanitized_Request(req)
4038         return self._opener.open(req, timeout=self._socket_timeout)
4039
4040     def encode(self, s):
4041         if isinstance(s, bytes):
4042             return s  # Already encoded
4043
4044         try:
4045             return s.encode(self.get_encoding())
4046         except UnicodeEncodeError as err:
4047             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4048             raise
4049
4050     def get_encoding(self):
4051         encoding = self.params.get('encoding')
4052         if encoding is None:
4053             encoding = preferredencoding()
4054         return encoding
4055
4056     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4057         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4058         if overwrite is None:
4059             overwrite = self.params.get('overwrites', True)
4060         if not self.params.get('writeinfojson'):
4061             return False
4062         elif not infofn:
4063             self.write_debug(f'Skipping writing {label} infojson')
4064             return False
4065         elif not self._ensure_dir_exists(infofn):
4066             return None
4067         elif not overwrite and os.path.exists(infofn):
4068             self.to_screen(f'[info] {label.title()} metadata is already present')
4069             return 'exists'
4070
4071         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4072         try:
4073             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4074             return True
4075         except OSError:
4076             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4077             return None
4078
4079     def _write_description(self, label, ie_result, descfn):
4080         ''' Write description and returns True = written, False = skip, None = error '''
4081         if not self.params.get('writedescription'):
4082             return False
4083         elif not descfn:
4084             self.write_debug(f'Skipping writing {label} description')
4085             return False
4086         elif not self._ensure_dir_exists(descfn):
4087             return None
4088         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4089             self.to_screen(f'[info] {label.title()} description is already present')
4090         elif ie_result.get('description') is None:
4091             self.to_screen(f'[info] There\'s no {label} description to write')
4092             return False
4093         else:
4094             try:
4095                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4096                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4097                     descfile.write(ie_result['description'])
4098             except OSError:
4099                 self.report_error(f'Cannot write {label} description file {descfn}')
4100                 return None
4101         return True
4102
4103     def _write_subtitles(self, info_dict, filename):
4104         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4105         ret = []
4106         subtitles = info_dict.get('requested_subtitles')
4107         if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4108             # subtitles download errors are already managed as troubles in relevant IE
4109             # that way it will silently go on when used with unsupporting IE
4110             return ret
4111         elif not subtitles:
4112             self.to_screen('[info] There are no subtitles for the requested languages')
4113             return ret
4114         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4115         if not sub_filename_base:
4116             self.to_screen('[info] Skipping writing video subtitles')
4117             return ret
4118
4119         for sub_lang, sub_info in subtitles.items():
4120             sub_format = sub_info['ext']
4121             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4122             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4123             existing_sub = self.existing_file((sub_filename_final, sub_filename))
4124             if existing_sub:
4125                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4126                 sub_info['filepath'] = existing_sub
4127                 ret.append((existing_sub, sub_filename_final))
4128                 continue
4129
4130             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4131             if sub_info.get('data') is not None:
4132                 try:
4133                     # Use newline='' to prevent conversion of newline characters
4134                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
4135                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4136                         subfile.write(sub_info['data'])
4137                     sub_info['filepath'] = sub_filename
4138                     ret.append((sub_filename, sub_filename_final))
4139                     continue
4140                 except OSError:
4141                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
4142                     return None
4143
4144             try:
4145                 sub_copy = sub_info.copy()
4146                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4147                 self.dl(sub_filename, sub_copy, subtitle=True)
4148                 sub_info['filepath'] = sub_filename
4149                 ret.append((sub_filename, sub_filename_final))
4150             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4151                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4152                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
4153                     if not self.params.get('ignoreerrors'):
4154                         self.report_error(msg)
4155                     raise DownloadError(msg)
4156                 self.report_warning(msg)
4157         return ret
4158
4159     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4160         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4161         write_all = self.params.get('write_all_thumbnails', False)
4162         thumbnails, ret = [], []
4163         if write_all or self.params.get('writethumbnail', False):
4164             thumbnails = info_dict.get('thumbnails') or []
4165             if not thumbnails:
4166                 self.to_screen(f'[info] There are no {label} thumbnails to download')
4167                 return ret
4168         multiple = write_all and len(thumbnails) > 1
4169
4170         if thumb_filename_base is None:
4171             thumb_filename_base = filename
4172         if thumbnails and not thumb_filename_base:
4173             self.write_debug(f'Skipping writing {label} thumbnail')
4174             return ret
4175
4176         for idx, t in list(enumerate(thumbnails))[::-1]:
4177             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4178             thumb_display_id = f'{label} thumbnail {t["id"]}'
4179             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4180             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4181
4182             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4183             if existing_thumb:
4184                 self.to_screen('[info] %s is already present' % (
4185                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4186                 t['filepath'] = existing_thumb
4187                 ret.append((existing_thumb, thumb_filename_final))
4188             else:
4189                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4190                 try:
4191                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
4192                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4193                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4194                         shutil.copyfileobj(uf, thumbf)
4195                     ret.append((thumb_filename, thumb_filename_final))
4196                     t['filepath'] = thumb_filename
4197                 except network_exceptions as err:
4198                     if isinstance(err, urllib.error.HTTPError) and err.code == 404:
4199                         self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4200                     else:
4201                         self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4202                     thumbnails.pop(idx)
4203             if ret and not write_all:
4204                 break
4205         return ret