yt_dlp/YoutubeDL.py

   1 import collections
   2 import contextlib
   3 import datetime
   4 import errno
   5 import fileinput
   6 import functools
   7 import io
   8 import itertools
   9 import json
  10 import locale
  11 import operator
  12 import os
  13 import random
  14 import re
  15 import shutil
  16 import subprocess
  17 import sys
  18 import tempfile
  19 import time
  20 import tokenize
  21 import traceback
  22 import unicodedata
  23 import urllib.request
  24 from string import ascii_letters
  25
  26 from .cache import Cache
  27 from .compat import compat_os_name, compat_shlex_quote
  28 from .cookies import load_cookies
  29 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  30 from .downloader.rtmp import rtmpdump_version
  31 from .extractor import gen_extractor_classes, get_info_extractor
  32 from .extractor.common import UnsupportedURLIE
  33 from .extractor.openload import PhantomJSwrapper
  34 from .minicurses import format_text
  35 from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
  36 from .postprocessor import (
  37     EmbedThumbnailPP,
  38     FFmpegFixupDuplicateMoovPP,
  39     FFmpegFixupDurationPP,
  40     FFmpegFixupM3u8PP,
  41     FFmpegFixupM4aPP,
  42     FFmpegFixupStretchedPP,
  43     FFmpegFixupTimestampPP,
  44     FFmpegMergerPP,
  45     FFmpegPostProcessor,
  46     FFmpegVideoConvertorPP,
  47     MoveFilesAfterDownloadPP,
  48     get_postprocessor,
  49 )
  50 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
  51 from .update import REPOSITORY, current_git_head, detect_variant
  52 from .utils import (
  53     DEFAULT_OUTTMPL,
  54     IDENTITY,
  55     LINK_TEMPLATES,
  56     MEDIA_EXTENSIONS,
  57     NO_DEFAULT,
  58     NUMBER_RE,
  59     OUTTMPL_TYPES,
  60     POSTPROCESS_WHEN,
  61     STR_FORMAT_RE_TMPL,
  62     STR_FORMAT_TYPES,
  63     ContentTooShortError,
  64     DateRange,
  65     DownloadCancelled,
  66     DownloadError,
  67     EntryNotInPlaylist,
  68     ExistingVideoReached,
  69     ExtractorError,
  70     FormatSorter,
  71     GeoRestrictedError,
  72     HEADRequest,
  73     ISO3166Utils,
  74     LazyList,
  75     MaxDownloadsReached,
  76     Namespace,
  77     PagedList,
  78     PerRequestProxyHandler,
  79     PlaylistEntries,
  80     Popen,
  81     PostProcessingError,
  82     ReExtractInfo,
  83     RejectedVideoReached,
  84     SameFileError,
  85     UnavailableVideoError,
  86     UserNotLive,
  87     YoutubeDLCookieProcessor,
  88     YoutubeDLHandler,
  89     YoutubeDLRedirectHandler,
  90     age_restricted,
  91     args_to_str,
  92     bug_reports_message,
  93     date_from_str,
  94     deprecation_warning,
  95     determine_ext,
  96     determine_protocol,
  97     encode_compat_str,
  98     encodeFilename,
  99     error_to_compat_str,
 100     escapeHTML,
 101     expand_path,
 102     filter_dict,
 103     float_or_none,
 104     format_bytes,
 105     format_decimal_suffix,
 106     format_field,
 107     formatSeconds,
 108     get_compatible_ext,
 109     get_domain,
 110     int_or_none,
 111     iri_to_uri,
 112     is_path_like,
 113     join_nonempty,
 114     locked_file,
 115     make_archive_id,
 116     make_dir,
 117     make_HTTPS_handler,
 118     merge_headers,
 119     network_exceptions,
 120     number_of_digits,
 121     orderedSet,
 122     orderedSet_from_options,
 123     parse_filesize,
 124     preferredencoding,
 125     prepend_extension,
 126     register_socks_protocols,
 127     remove_terminal_sequences,
 128     render_table,
 129     replace_extension,
 130     sanitize_filename,
 131     sanitize_path,
 132     sanitize_url,
 133     sanitized_Request,
 134     std_headers,
 135     str_or_none,
 136     strftime_or_none,
 137     subtitles_filename,
 138     supports_terminal_sequences,
 139     system_identifier,
 140     timetuple_from_msec,
 141     to_high_limit_path,
 142     traverse_obj,
 143     try_call,
 144     try_get,
 145     url_basename,
 146     variadic,
 147     version_tuple,
 148     windows_enable_vt_mode,
 149     write_json_file,
 150     write_string,
 151 )
 152 from .version import RELEASE_GIT_HEAD, VARIANT, __version__
 153
 154 if compat_os_name == 'nt':
 155     import ctypes
 156
 157
 158 class YoutubeDL:
 159     """YoutubeDL class.
 160
 161     YoutubeDL objects are the ones responsible of downloading the
 162     actual video file and writing it to disk if the user has requested
 163     it, among some other tasks. In most cases there should be one per
 164     program. As, given a video URL, the downloader doesn't know how to
 165     extract all the needed information, task that InfoExtractors do, it
 166     has to pass the URL to one of them.
 167
 168     For this, YoutubeDL objects have a method that allows
 169     InfoExtractors to be registered in a given order. When it is passed
 170     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 171     finds that reports being able to handle it. The InfoExtractor extracts
 172     all the information about the video or videos the URL refers to, and
 173     YoutubeDL process the extracted information, possibly using a File
 174     Downloader to download the video.
 175
 176     YoutubeDL objects accept a lot of parameters. In order not to saturate
 177     the object constructor with arguments, it receives a dictionary of
 178     options instead. These options are available through the params
 179     attribute for the InfoExtractors to use. The YoutubeDL also
 180     registers itself as the downloader in charge for the InfoExtractors
 181     that are added to it, so this is a "mutual registration".
 182
 183     Available options:
 184
 185     username:          Username for authentication purposes.
 186     password:          Password for authentication purposes.
 187     videopassword:     Password for accessing a video.
 188     ap_mso:            Adobe Pass multiple-system operator identifier.
 189     ap_username:       Multiple-system operator account username.
 190     ap_password:       Multiple-system operator account password.
 191     usenetrc:          Use netrc for authentication instead.
 192     verbose:           Print additional info to stdout.
 193     quiet:             Do not print messages to stdout.
 194     no_warnings:       Do not print out anything for warnings.
 195     forceprint:        A dict with keys WHEN mapped to a list of templates to
 196                        print to stdout. The allowed keys are video or any of the
 197                        items in utils.POSTPROCESS_WHEN.
 198                        For compatibility, a single list is also accepted
 199     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 200                        a list of tuples with (template, filename)
 201     forcejson:         Force printing info_dict as JSON.
 202     dump_single_json:  Force printing the info_dict of the whole playlist
 203                        (or video) as a single JSON line.
 204     force_write_download_archive: Force writing download archive regardless
 205                        of 'skip_download' or 'simulate'.
 206     simulate:          Do not download the video files. If unset (or None),
 207                        simulate only if listsubtitles, listformats or list_thumbnails is used
 208     format:            Video format code. see "FORMAT SELECTION" for more details.
 209                        You can also pass a function. The function takes 'ctx' as
 210                        argument and returns the formats to download.
 211                        See "build_format_selector" for an implementation
 212     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 213     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 214                        extracting metadata even if the video is not actually
 215                        available for download (experimental)
 216     format_sort:       A list of fields by which to sort the video formats.
 217                        See "Sorting Formats" for more details.
 218     format_sort_force: Force the given format_sort. see "Sorting Formats"
 219                        for more details.
 220     prefer_free_formats: Whether to prefer video formats with free containers
 221                        over non-free ones of same quality.
 222     allow_multiple_video_streams:   Allow multiple video streams to be merged
 223                        into a single file
 224     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 225                        into a single file
 226     check_formats      Whether to test if the formats are downloadable.
 227                        Can be True (check all), False (check none),
 228                        'selected' (check selected formats),
 229                        or None (check only if requested by extractor)
 230     paths:             Dictionary of output paths. The allowed keys are 'home'
 231                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 232     outtmpl:           Dictionary of templates for output names. Allowed keys
 233                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 234                        For compatibility with youtube-dl, a single string can also be used
 235     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 236     restrictfilenames: Do not allow "&" and spaces in file names
 237     trim_file_name:    Limit length of filename (extension excluded)
 238     windowsfilenames:  Force the filenames to be windows compatible
 239     ignoreerrors:      Do not stop on download/postprocessing errors.
 240                        Can be 'only_download' to ignore only download errors.
 241                        Default is 'only_download' for CLI, but False for API
 242     skip_playlist_after_errors: Number of allowed failures until the rest of
 243                        the playlist is skipped
 244     allowed_extractors:  List of regexes to match against extractor names that are allowed
 245     overwrites:        Overwrite all video and metadata files if True,
 246                        overwrite only non-video files if None
 247                        and don't overwrite any file if False
 248                        For compatibility with youtube-dl,
 249                        "nooverwrites" may also be used instead
 250     playlist_items:    Specific indices of playlist to download.
 251     playlistrandom:    Download playlist items in random order.
 252     lazy_playlist:     Process playlist entries as they are received.
 253     matchtitle:        Download only matching titles.
 254     rejecttitle:       Reject downloads for matching titles.
 255     logger:            Log messages to a logging.Logger instance.
 256     logtostderr:       Print everything to stderr instead of stdout.
 257     consoletitle:      Display progress in console window's titlebar.
 258     writedescription:  Write the video description to a .description file
 259     writeinfojson:     Write the video description to a .info.json file
 260     clean_infojson:    Remove private fields from the infojson
 261     getcomments:       Extract video comments. This will not be written to disk
 262                        unless writeinfojson is also given
 263     writeannotations:  Write the video annotations to a .annotations.xml file
 264     writethumbnail:    Write the thumbnail image to a file
 265     allow_playlist_files: Whether to write playlists' description, infojson etc
 266                        also to disk when using the 'write*' options
 267     write_all_thumbnails:  Write all thumbnail formats to files
 268     writelink:         Write an internet shortcut file, depending on the
 269                        current platform (.url/.webloc/.desktop)
 270     writeurllink:      Write a Windows internet shortcut file (.url)
 271     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 272     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 273     writesubtitles:    Write the video subtitles to a file
 274     writeautomaticsub: Write the automatically generated subtitles to a file
 275     listsubtitles:     Lists all available subtitles for the video
 276     subtitlesformat:   The format code for subtitles
 277     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 278                        The list may contain "all" to refer to all the available
 279                        subtitles. The language can be prefixed with a "-" to
 280                        exclude it from the requested languages, e.g. ['all', '-live_chat']
 281     keepvideo:         Keep the video file after post-processing
 282     daterange:         A DateRange object, download only if the upload_date is in the range.
 283     skip_download:     Skip the actual download of the video file
 284     cachedir:          Location of the cache files in the filesystem.
 285                        False to disable filesystem cache.
 286     noplaylist:        Download single video instead of a playlist if in doubt.
 287     age_limit:         An integer representing the user's age in years.
 288                        Unsuitable videos for the given age are skipped.
 289     min_views:         An integer representing the minimum view count the video
 290                        must have in order to not be skipped.
 291                        Videos without view count information are always
 292                        downloaded. None for no limit.
 293     max_views:         An integer representing the maximum view count.
 294                        Videos that are more popular than that are not
 295                        downloaded.
 296                        Videos without view count information are always
 297                        downloaded. None for no limit.
 298     download_archive:  A set, or the name of a file where all downloads are recorded.
 299                        Videos already present in the file are not downloaded again.
 300     break_on_existing: Stop the download process after attempting to download a
 301                        file that is in the archive.
 302     break_on_reject:   Stop the download process when encountering a video that
 303                        has been filtered out.
 304     break_per_url:     Whether break_on_reject and break_on_existing
 305                        should act on each input URL as opposed to for the entire queue
 306     cookiefile:        File name or text stream from where cookies should be read and dumped to
 307     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 308                        name/path from where cookies are loaded, the name of the keyring,
 309                        and the container name, e.g. ('chrome', ) or
 310                        ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
 311     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 312                        support RFC 5746 secure renegotiation
 313     nocheckcertificate:  Do not verify SSL certificates
 314     client_certificate:  Path to client certificate file in PEM format. May include the private key
 315     client_certificate_key:  Path to private key file for client certificate
 316     client_certificate_password:  Password for client certificate private key, if encrypted.
 317                         If not provided and the key is encrypted, yt-dlp will ask interactively
 318     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 319                        (Only supported by some extractors)
 320     http_headers:      A dictionary of custom headers to be used for all requests
 321     proxy:             URL of the proxy server to use
 322     geo_verification_proxy:  URL of the proxy to use for IP address verification
 323                        on geo-restricted sites.
 324     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 325     bidi_workaround:   Work around buggy terminals without bidirectional text
 326                        support, using fridibi
 327     debug_printtraffic:Print out sent and received HTTP traffic
 328     default_search:    Prepend this string if an input url is not valid.
 329                        'auto' for elaborate guessing
 330     encoding:          Use this encoding instead of the system-specified.
 331     extract_flat:      Whether to resolve and process url_results further
 332                        * False:     Always process (default)
 333                        * True:      Never process
 334                        * 'in_playlist': Do not process inside playlist/multi_video
 335                        * 'discard': Always process, but don't return the result
 336                                     from inside playlist/multi_video
 337                        * 'discard_in_playlist': Same as "discard", but only for
 338                                     playlists (not multi_video)
 339     wait_for_video:    If given, wait for scheduled streams to become available.
 340                        The value should be a tuple containing the range
 341                        (min_secs, max_secs) to wait between retries
 342     postprocessors:    A list of dictionaries, each with an entry
 343                        * key:  The name of the postprocessor. See
 344                                yt_dlp/postprocessor/__init__.py for a list.
 345                        * when: When to run the postprocessor. Allowed values are
 346                                the entries of utils.POSTPROCESS_WHEN
 347                                Assumed to be 'post_process' if not given
 348     progress_hooks:    A list of functions that get called on download
 349                        progress, with a dictionary with the entries
 350                        * status: One of "downloading", "error", or "finished".
 351                                  Check this first and ignore unknown values.
 352                        * info_dict: The extracted info_dict
 353
 354                        If status is one of "downloading", or "finished", the
 355                        following properties may also be present:
 356                        * filename: The final filename (always present)
 357                        * tmpfilename: The filename we're currently writing to
 358                        * downloaded_bytes: Bytes on disk
 359                        * total_bytes: Size of the whole file, None if unknown
 360                        * total_bytes_estimate: Guess of the eventual file size,
 361                                                None if unavailable.
 362                        * elapsed: The number of seconds since download started.
 363                        * eta: The estimated time in seconds, None if unknown
 364                        * speed: The download speed in bytes/second, None if
 365                                 unknown
 366                        * fragment_index: The counter of the currently
 367                                          downloaded video fragment.
 368                        * fragment_count: The number of fragments (= individual
 369                                          files that will be merged)
 370
 371                        Progress hooks are guaranteed to be called at least once
 372                        (with status "finished") if the download is successful.
 373     postprocessor_hooks:  A list of functions that get called on postprocessing
 374                        progress, with a dictionary with the entries
 375                        * status: One of "started", "processing", or "finished".
 376                                  Check this first and ignore unknown values.
 377                        * postprocessor: Name of the postprocessor
 378                        * info_dict: The extracted info_dict
 379
 380                        Progress hooks are guaranteed to be called at least twice
 381                        (with status "started" and "finished") if the processing is successful.
 382     merge_output_format: "/" separated list of extensions to use when merging formats.
 383     final_ext:         Expected final extension; used to detect when the file was
 384                        already downloaded and converted
 385     fixup:             Automatically correct known faults of the file.
 386                        One of:
 387                        - "never": do nothing
 388                        - "warn": only emit a warning
 389                        - "detect_or_warn": check whether we can do anything
 390                                            about it, warn otherwise (default)
 391     source_address:    Client-side IP address to bind to.
 392     sleep_interval_requests: Number of seconds to sleep between requests
 393                        during extraction
 394     sleep_interval:    Number of seconds to sleep before each download when
 395                        used alone or a lower bound of a range for randomized
 396                        sleep before each download (minimum possible number
 397                        of seconds to sleep) when used along with
 398                        max_sleep_interval.
 399     max_sleep_interval:Upper bound of a range for randomized sleep before each
 400                        download (maximum possible number of seconds to sleep).
 401                        Must only be used along with sleep_interval.
 402                        Actual sleep time will be a random float from range
 403                        [sleep_interval; max_sleep_interval].
 404     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 405     listformats:       Print an overview of available video formats and exit.
 406     list_thumbnails:   Print a table of all thumbnails and exit.
 407     match_filter:      A function that gets called for every video with the signature
 408                        (info_dict, *, incomplete: bool) -> Optional[str]
 409                        For backward compatibility with youtube-dl, the signature
 410                        (info_dict) -> Optional[str] is also allowed.
 411                        - If it returns a message, the video is ignored.
 412                        - If it returns None, the video is downloaded.
 413                        - If it returns utils.NO_DEFAULT, the user is interactively
 414                          asked whether to download the video.
 415                        match_filter_func in utils.py is one example for this.
 416     no_color:          Do not emit color codes in output.
 417     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 418                        HTTP header
 419     geo_bypass_country:
 420                        Two-letter ISO 3166-2 country code that will be used for
 421                        explicit geographic restriction bypassing via faking
 422                        X-Forwarded-For HTTP header
 423     geo_bypass_ip_block:
 424                        IP range in CIDR notation that will be used similarly to
 425                        geo_bypass_country
 426     external_downloader: A dictionary of protocol keys and the executable of the
 427                        external downloader to use for it. The allowed protocols
 428                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 429                        Set the value to 'native' to use the native downloader
 430     compat_opts:       Compatibility options. See "Differences in default behavior".
 431                        The following options do not work when used through the API:
 432                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 433                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 434                        Refer __init__.py for their implementation
 435     progress_template: Dictionary of templates for progress outputs.
 436                        Allowed keys are 'download', 'postprocess',
 437                        'download-title' (console title) and 'postprocess-title'.
 438                        The template is mapped on a dictionary with keys 'progress' and 'info'
 439     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 440                        as argument and returns the time to sleep in seconds.
 441                        Allowed keys are 'http', 'fragment', 'file_access'
 442     download_ranges:   A callback function that gets called for every video with
 443                        the signature (info_dict, ydl) -> Iterable[Section].
 444                        Only the returned sections will be downloaded.
 445                        Each Section is a dict with the following keys:
 446                        * start_time: Start time of the section in seconds
 447                        * end_time: End time of the section in seconds
 448                        * title: Section title (Optional)
 449                        * index: Section number (Optional)
 450     force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
 451     noprogress:        Do not print the progress bar
 452     live_from_start:   Whether to download livestreams videos from the start
 453
 454     The following parameters are not used by YoutubeDL itself, they are used by
 455     the downloader (see yt_dlp/downloader/common.py):
 456     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 457     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 458     continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 459     external_downloader_args, concurrent_fragment_downloads.
 460
 461     The following options are used by the post processors:
 462     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 463                        to the binary or its containing directory.
 464     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 465                        and a list of additional command-line arguments for the
 466                        postprocessor/executable. The dict can also have "PP+EXE" keys
 467                        which are used when the given exe is used by the given PP.
 468                        Use 'default' as the name for arguments to passed to all PP
 469                        For compatibility with youtube-dl, a single list of args
 470                        can also be used
 471
 472     The following options are used by the extractors:
 473     extractor_retries: Number of times to retry for known errors
 474     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 475     hls_split_discontinuity: Split HLS playlists to different formats at
 476                        discontinuities such as ad breaks (default: False)
 477     extractor_args:    A dictionary of arguments to be passed to the extractors.
 478                        See "EXTRACTOR ARGUMENTS" for details.
 479                        E.g. {'youtube': {'skip': ['dash', 'hls']}}
 480     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 481
 482     The following options are deprecated and may be removed in the future:
 483
 484     force_generic_extractor: Force downloader to use the generic extractor
 485                        - Use allowed_extractors = ['generic', 'default']
 486     playliststart:     - Use playlist_items
 487                        Playlist item to start at.
 488     playlistend:       - Use playlist_items
 489                        Playlist item to end at.
 490     playlistreverse:   - Use playlist_items
 491                        Download playlist items in reverse order.
 492     forceurl:          - Use forceprint
 493                        Force printing final URL.
 494     forcetitle:        - Use forceprint
 495                        Force printing title.
 496     forceid:           - Use forceprint
 497                        Force printing ID.
 498     forcethumbnail:    - Use forceprint
 499                        Force printing thumbnail URL.
 500     forcedescription:  - Use forceprint
 501                        Force printing description.
 502     forcefilename:     - Use forceprint
 503                        Force printing final filename.
 504     forceduration:     - Use forceprint
 505                        Force printing duration.
 506     allsubtitles:      - Use subtitleslangs = ['all']
 507                        Downloads all the subtitles of the video
 508                        (requires writesubtitles or writeautomaticsub)
 509     include_ads:       - Doesn't work
 510                        Download ads as well
 511     call_home:         - Not implemented
 512                        Boolean, true iff we are allowed to contact the
 513                        yt-dlp servers for debugging.
 514     post_hooks:        - Register a custom postprocessor
 515                        A list of functions that get called as the final step
 516                        for each video file, after all postprocessors have been
 517                        called. The filename will be passed as the only argument.
 518     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 519                        Use the native HLS downloader instead of ffmpeg/avconv
 520                        if True, otherwise use ffmpeg/avconv if False, otherwise
 521                        use downloader suggested by extractor if None.
 522     prefer_ffmpeg:     - avconv support is deprecated
 523                        If False, use avconv instead of ffmpeg if both are available,
 524                        otherwise prefer ffmpeg.
 525     youtube_include_dash_manifest: - Use extractor_args
 526                        If True (default), DASH manifests and related
 527                        data will be downloaded and processed by extractor.
 528                        You can reduce network I/O by disabling it if you don't
 529                        care about DASH. (only for youtube)
 530     youtube_include_hls_manifest: - Use extractor_args
 531                        If True (default), HLS manifests and related
 532                        data will be downloaded and processed by extractor.
 533                        You can reduce network I/O by disabling it if you don't
 534                        care about HLS. (only for youtube)
 535     """
 536
 537     _NUMERIC_FIELDS = {
 538         'width', 'height', 'asr', 'audio_channels', 'fps',
 539         'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
 540         'timestamp', 'release_timestamp',
 541         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 542         'average_rating', 'comment_count', 'age_limit',
 543         'start_time', 'end_time',
 544         'chapter_number', 'season_number', 'episode_number',
 545         'track_number', 'disc_number', 'release_year',
 546     }
 547
 548     _format_fields = {
 549         # NB: Keep in sync with the docstring of extractor/common.py
 550         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 551         'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
 552         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
 553         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 554         'preference', 'language', 'language_preference', 'quality', 'source_preference',
 555         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
 556         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 557     }
 558     _format_selection_exts = {
 559         'audio': set(MEDIA_EXTENSIONS.common_audio),
 560         'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
 561         'storyboards': set(MEDIA_EXTENSIONS.storyboards),
 562     }
 563
 564     def __init__(self, params=None, auto_init=True):
 565         """Create a FileDownloader object with the given options.
 566         @param auto_init    Whether to load the default extractors and print header (if verbose).
 567                             Set to 'no_verbose_header' to not print the header
 568         """
 569         if params is None:
 570             params = {}
 571         self.params = params
 572         self._ies = {}
 573         self._ies_instances = {}
 574         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 575         self._printed_messages = set()
 576         self._first_webpage_request = True
 577         self._post_hooks = []
 578         self._progress_hooks = []
 579         self._postprocessor_hooks = []
 580         self._download_retcode = 0
 581         self._num_downloads = 0
 582         self._num_videos = 0
 583         self._playlist_level = 0
 584         self._playlist_urls = set()
 585         self.cache = Cache(self)
 586
 587         windows_enable_vt_mode()
 588         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 589         self._out_files = Namespace(
 590             out=stdout,
 591             error=sys.stderr,
 592             screen=sys.stderr if self.params.get('quiet') else stdout,
 593             console=None if compat_os_name == 'nt' else next(
 594                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 595         )
 596         self._allow_colors = Namespace(**{
 597             type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
 598             for type_, stream in self._out_files.items_ if type_ != 'console'
 599         })
 600
 601         # The code is left like this to be reused for future deprecations
 602         MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
 603         current_version = sys.version_info[:2]
 604         if current_version < MIN_RECOMMENDED:
 605             msg = ('Support for Python version %d.%d has been deprecated. '
 606                    'See  https://github.com/yt-dlp/yt-dlp/issues/3764  for more details.'
 607                    '\n                    You will no longer receive updates on this version')
 608             if current_version < MIN_SUPPORTED:
 609                 msg = 'Python version %d.%d is no longer supported'
 610             self.deprecation_warning(
 611                 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
 612
 613         if self.params.get('allow_unplayable_formats'):
 614             self.report_warning(
 615                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 616                 'This is a developer option intended for debugging. \n'
 617                 '         If you experience any issues while using this option, '
 618                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 619
 620         if self.params.get('bidi_workaround', False):
 621             try:
 622                 import pty
 623                 master, slave = pty.openpty()
 624                 width = shutil.get_terminal_size().columns
 625                 width_args = [] if width is None else ['-w', str(width)]
 626                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 627                 try:
 628                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 629                 except OSError:
 630                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 631                 self._output_channel = os.fdopen(master, 'rb')
 632             except OSError as ose:
 633                 if ose.errno == errno.ENOENT:
 634                     self.report_warning(
 635                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 636                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 637                 else:
 638                     raise
 639
 640         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 641         if auto_init and auto_init != 'no_verbose_header':
 642             self.print_debug_header()
 643
 644         def check_deprecated(param, option, suggestion):
 645             if self.params.get(param) is not None:
 646                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 647                 return True
 648             return False
 649
 650         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 651             if self.params.get('geo_verification_proxy') is None:
 652                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 653
 654         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 655         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 656         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 657
 658         for msg in self.params.get('_warnings', []):
 659             self.report_warning(msg)
 660         for msg in self.params.get('_deprecation_warnings', []):
 661             self.deprecated_feature(msg)
 662
 663         if 'list-formats' in self.params['compat_opts']:
 664             self.params['listformats_table'] = False
 665
 666         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 667             # nooverwrites was unnecessarily changed to overwrites
 668             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 669             # This ensures compatibility with both keys
 670             self.params['overwrites'] = not self.params['nooverwrites']
 671         elif self.params.get('overwrites') is None:
 672             self.params.pop('overwrites', None)
 673         else:
 674             self.params['nooverwrites'] = not self.params['overwrites']
 675
 676         if self.params.get('simulate') is None and any((
 677             self.params.get('list_thumbnails'),
 678             self.params.get('listformats'),
 679             self.params.get('listsubtitles'),
 680         )):
 681             self.params['simulate'] = 'list_only'
 682
 683         self.params.setdefault('forceprint', {})
 684         self.params.setdefault('print_to_file', {})
 685
 686         # Compatibility with older syntax
 687         if not isinstance(params['forceprint'], dict):
 688             self.params['forceprint'] = {'video': params['forceprint']}
 689
 690         if auto_init:
 691             self.add_default_info_extractors()
 692
 693         if (sys.platform != 'win32'
 694                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 695                 and not self.params.get('restrictfilenames', False)):
 696             # Unicode filesystem API will throw errors (#1474, #13027)
 697             self.report_warning(
 698                 'Assuming --restrict-filenames since file system encoding '
 699                 'cannot encode all characters. '
 700                 'Set the LC_ALL environment variable to fix this.')
 701             self.params['restrictfilenames'] = True
 702
 703         self._parse_outtmpl()
 704
 705         # Creating format selector here allows us to catch syntax errors before the extraction
 706         self.format_selector = (
 707             self.params.get('format') if self.params.get('format') in (None, '-')
 708             else self.params['format'] if callable(self.params['format'])
 709             else self.build_format_selector(self.params['format']))
 710
 711         # Set http_headers defaults according to std_headers
 712         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 713
 714         hooks = {
 715             'post_hooks': self.add_post_hook,
 716             'progress_hooks': self.add_progress_hook,
 717             'postprocessor_hooks': self.add_postprocessor_hook,
 718         }
 719         for opt, fn in hooks.items():
 720             for ph in self.params.get(opt, []):
 721                 fn(ph)
 722
 723         for pp_def_raw in self.params.get('postprocessors', []):
 724             pp_def = dict(pp_def_raw)
 725             when = pp_def.pop('when', 'post_process')
 726             self.add_post_processor(
 727                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 728                 when=when)
 729
 730         self._setup_opener()
 731         register_socks_protocols()
 732
 733         def preload_download_archive(fn):
 734             """Preload the archive, if any is specified"""
 735             archive = set()
 736             if fn is None:
 737                 return archive
 738             elif not is_path_like(fn):
 739                 return fn
 740
 741             self.write_debug(f'Loading archive file {fn!r}')
 742             try:
 743                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 744                     for line in archive_file:
 745                         archive.add(line.strip())
 746             except OSError as ioe:
 747                 if ioe.errno != errno.ENOENT:
 748                     raise
 749             return archive
 750
 751         self.archive = preload_download_archive(self.params.get('download_archive'))
 752
 753     def warn_if_short_id(self, argv):
 754         # short YouTube ID starting with dash?
 755         idxs = [
 756             i for i, a in enumerate(argv)
 757             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 758         if idxs:
 759             correct_argv = (
 760                 ['yt-dlp']
 761                 + [a for i, a in enumerate(argv) if i not in idxs]
 762                 + ['--'] + [argv[i] for i in idxs]
 763             )
 764             self.report_warning(
 765                 'Long argument string detected. '
 766                 'Use -- to separate parameters and URLs, like this:\n%s' %
 767                 args_to_str(correct_argv))
 768
 769     def add_info_extractor(self, ie):
 770         """Add an InfoExtractor object to the end of the list."""
 771         ie_key = ie.ie_key()
 772         self._ies[ie_key] = ie
 773         if not isinstance(ie, type):
 774             self._ies_instances[ie_key] = ie
 775             ie.set_downloader(self)
 776
 777     def get_info_extractor(self, ie_key):
 778         """
 779         Get an instance of an IE with name ie_key, it will try to get one from
 780         the _ies list, if there's no instance it will create a new one and add
 781         it to the extractor list.
 782         """
 783         ie = self._ies_instances.get(ie_key)
 784         if ie is None:
 785             ie = get_info_extractor(ie_key)()
 786             self.add_info_extractor(ie)
 787         return ie
 788
 789     def add_default_info_extractors(self):
 790         """
 791         Add the InfoExtractors returned by gen_extractors to the end of the list
 792         """
 793         all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
 794         all_ies['end'] = UnsupportedURLIE()
 795         try:
 796             ie_names = orderedSet_from_options(
 797                 self.params.get('allowed_extractors', ['default']), {
 798                     'all': list(all_ies),
 799                     'default': [name for name, ie in all_ies.items() if ie._ENABLED],
 800                 }, use_regex=True)
 801         except re.error as e:
 802             raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
 803         for name in ie_names:
 804             self.add_info_extractor(all_ies[name])
 805         self.write_debug(f'Loaded {len(ie_names)} extractors')
 806
 807     def add_post_processor(self, pp, when='post_process'):
 808         """Add a PostProcessor object to the end of the chain."""
 809         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 810         self._pps[when].append(pp)
 811         pp.set_downloader(self)
 812
 813     def add_post_hook(self, ph):
 814         """Add the post hook"""
 815         self._post_hooks.append(ph)
 816
 817     def add_progress_hook(self, ph):
 818         """Add the download progress hook"""
 819         self._progress_hooks.append(ph)
 820
 821     def add_postprocessor_hook(self, ph):
 822         """Add the postprocessing progress hook"""
 823         self._postprocessor_hooks.append(ph)
 824         for pps in self._pps.values():
 825             for pp in pps:
 826                 pp.add_progress_hook(ph)
 827
 828     def _bidi_workaround(self, message):
 829         if not hasattr(self, '_output_channel'):
 830             return message
 831
 832         assert hasattr(self, '_output_process')
 833         assert isinstance(message, str)
 834         line_count = message.count('\n') + 1
 835         self._output_process.stdin.write((message + '\n').encode())
 836         self._output_process.stdin.flush()
 837         res = ''.join(self._output_channel.readline().decode()
 838                       for _ in range(line_count))
 839         return res[:-len('\n')]
 840
 841     def _write_string(self, message, out=None, only_once=False):
 842         if only_once:
 843             if message in self._printed_messages:
 844                 return
 845             self._printed_messages.add(message)
 846         write_string(message, out=out, encoding=self.params.get('encoding'))
 847
 848     def to_stdout(self, message, skip_eol=False, quiet=None):
 849         """Print message to stdout"""
 850         if quiet is not None:
 851             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
 852                                      'Use "YoutubeDL.to_screen" instead')
 853         if skip_eol is not False:
 854             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
 855                                      'Use "YoutubeDL.to_screen" instead')
 856         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 857
 858     def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
 859         """Print message to screen if not in quiet mode"""
 860         if self.params.get('logger'):
 861             self.params['logger'].debug(message)
 862             return
 863         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 864             return
 865         self._write_string(
 866             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 867             self._out_files.screen, only_once=only_once)
 868
 869     def to_stderr(self, message, only_once=False):
 870         """Print message to stderr"""
 871         assert isinstance(message, str)
 872         if self.params.get('logger'):
 873             self.params['logger'].error(message)
 874         else:
 875             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 876
 877     def _send_console_code(self, code):
 878         if compat_os_name == 'nt' or not self._out_files.console:
 879             return
 880         self._write_string(code, self._out_files.console)
 881
 882     def to_console_title(self, message):
 883         if not self.params.get('consoletitle', False):
 884             return
 885         message = remove_terminal_sequences(message)
 886         if compat_os_name == 'nt':
 887             if ctypes.windll.kernel32.GetConsoleWindow():
 888                 # c_wchar_p() might not be necessary if `message` is
 889                 # already of type unicode()
 890                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 891         else:
 892             self._send_console_code(f'\033]0;{message}\007')
 893
 894     def save_console_title(self):
 895         if not self.params.get('consoletitle') or self.params.get('simulate'):
 896             return
 897         self._send_console_code('\033[22;0t')  # Save the title on stack
 898
 899     def restore_console_title(self):
 900         if not self.params.get('consoletitle') or self.params.get('simulate'):
 901             return
 902         self._send_console_code('\033[23;0t')  # Restore the title from stack
 903
 904     def __enter__(self):
 905         self.save_console_title()
 906         return self
 907
 908     def __exit__(self, *args):
 909         self.restore_console_title()
 910
 911         if self.params.get('cookiefile') is not None:
 912             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 913
 914     def trouble(self, message=None, tb=None, is_error=True):
 915         """Determine action to take when a download problem appears.
 916
 917         Depending on if the downloader has been configured to ignore
 918         download errors or not, this method may throw an exception or
 919         not when errors are found, after printing the message.
 920
 921         @param tb          If given, is additional traceback information
 922         @param is_error    Whether to raise error according to ignorerrors
 923         """
 924         if message is not None:
 925             self.to_stderr(message)
 926         if self.params.get('verbose'):
 927             if tb is None:
 928                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 929                     tb = ''
 930                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 931                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 932                     tb += encode_compat_str(traceback.format_exc())
 933                 else:
 934                     tb_data = traceback.format_list(traceback.extract_stack())
 935                     tb = ''.join(tb_data)
 936             if tb:
 937                 self.to_stderr(tb)
 938         if not is_error:
 939             return
 940         if not self.params.get('ignoreerrors'):
 941             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 942                 exc_info = sys.exc_info()[1].exc_info
 943             else:
 944                 exc_info = sys.exc_info()
 945             raise DownloadError(message, exc_info)
 946         self._download_retcode = 1
 947
 948     Styles = Namespace(
 949         HEADERS='yellow',
 950         EMPHASIS='light blue',
 951         FILENAME='green',
 952         ID='green',
 953         DELIM='blue',
 954         ERROR='red',
 955         WARNING='yellow',
 956         SUPPRESS='light black',
 957     )
 958
 959     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 960         text = str(text)
 961         if test_encoding:
 962             original_text = text
 963             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 964             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 965             text = text.encode(encoding, 'ignore').decode(encoding)
 966             if fallback is not None and text != original_text:
 967                 text = fallback
 968         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 969
 970     def _format_out(self, *args, **kwargs):
 971         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
 972
 973     def _format_screen(self, *args, **kwargs):
 974         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
 975
 976     def _format_err(self, *args, **kwargs):
 977         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
 978
 979     def report_warning(self, message, only_once=False):
 980         '''
 981         Print the message to stderr, it will be prefixed with 'WARNING:'
 982         If stderr is a tty file the 'WARNING:' will be colored
 983         '''
 984         if self.params.get('logger') is not None:
 985             self.params['logger'].warning(message)
 986         else:
 987             if self.params.get('no_warnings'):
 988                 return
 989             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 990
 991     def deprecation_warning(self, message, *, stacklevel=0):
 992         deprecation_warning(
 993             message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
 994
 995     def deprecated_feature(self, message):
 996         if self.params.get('logger') is not None:
 997             self.params['logger'].warning(f'Deprecated Feature: {message}')
 998         self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
 999
1000     def report_error(self, message, *args, **kwargs):
1001         '''
1002         Do the same as trouble, but prefixes the message with 'ERROR:', colored
1003         in red if stderr is a tty file.
1004         '''
1005         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1006
1007     def write_debug(self, message, only_once=False):
1008         '''Log debug message or Print message to stderr'''
1009         if not self.params.get('verbose', False):
1010             return
1011         message = f'[debug] {message}'
1012         if self.params.get('logger'):
1013             self.params['logger'].debug(message)
1014         else:
1015             self.to_stderr(message, only_once)
1016
1017     def report_file_already_downloaded(self, file_name):
1018         """Report file has already been fully downloaded."""
1019         try:
1020             self.to_screen('[download] %s has already been downloaded' % file_name)
1021         except UnicodeEncodeError:
1022             self.to_screen('[download] The file has already been downloaded')
1023
1024     def report_file_delete(self, file_name):
1025         """Report that existing file will be deleted."""
1026         try:
1027             self.to_screen('Deleting existing file %s' % file_name)
1028         except UnicodeEncodeError:
1029             self.to_screen('Deleting existing file')
1030
1031     def raise_no_formats(self, info, forced=False, *, msg=None):
1032         has_drm = info.get('_has_drm')
1033         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1034         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1035         if forced or not ignored:
1036             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1037                                  expected=has_drm or ignored or expected)
1038         else:
1039             self.report_warning(msg)
1040
1041     def parse_outtmpl(self):
1042         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1043         self._parse_outtmpl()
1044         return self.params['outtmpl']
1045
1046     def _parse_outtmpl(self):
1047         sanitize = IDENTITY
1048         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1049             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1050
1051         outtmpl = self.params.setdefault('outtmpl', {})
1052         if not isinstance(outtmpl, dict):
1053             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1054         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1055
1056     def get_output_path(self, dir_type='', filename=None):
1057         paths = self.params.get('paths', {})
1058         assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1059         path = os.path.join(
1060             expand_path(paths.get('home', '').strip()),
1061             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1062             filename or '')
1063         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1064
1065     @staticmethod
1066     def _outtmpl_expandpath(outtmpl):
1067         # expand_path translates '%%' into '%' and '$$' into '$'
1068         # correspondingly that is not what we want since we need to keep
1069         # '%%' intact for template dict substitution step. Working around
1070         # with boundary-alike separator hack.
1071         sep = ''.join(random.choices(ascii_letters, k=32))
1072         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1073
1074         # outtmpl should be expand_path'ed before template dict substitution
1075         # because meta fields may contain env variables we don't want to
1076         # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1077         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1078         return expand_path(outtmpl).replace(sep, '')
1079
1080     @staticmethod
1081     def escape_outtmpl(outtmpl):
1082         ''' Escape any remaining strings like %s, %abc% etc. '''
1083         return re.sub(
1084             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1085             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1086             outtmpl)
1087
1088     @classmethod
1089     def validate_outtmpl(cls, outtmpl):
1090         ''' @return None or Exception object '''
1091         outtmpl = re.sub(
1092             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1093             lambda mobj: f'{mobj.group(0)[:-1]}s',
1094             cls._outtmpl_expandpath(outtmpl))
1095         try:
1096             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1097             return None
1098         except ValueError as err:
1099             return err
1100
1101     @staticmethod
1102     def _copy_infodict(info_dict):
1103         info_dict = dict(info_dict)
1104         info_dict.pop('__postprocessors', None)
1105         info_dict.pop('__pending_error', None)
1106         return info_dict
1107
1108     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1109         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1110         @param sanitize    Whether to sanitize the output as a filename.
1111                            For backward compatibility, a function can also be passed
1112         """
1113
1114         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1115
1116         info_dict = self._copy_infodict(info_dict)
1117         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1118             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1119             if info_dict.get('duration', None) is not None
1120             else None)
1121         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1122         info_dict['video_autonumber'] = self._num_videos
1123         if info_dict.get('resolution') is None:
1124             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1125
1126         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1127         # of %(field)s to %(field)0Nd for backward compatibility
1128         field_size_compat_map = {
1129             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1130             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1131             'autonumber': self.params.get('autonumber_size') or 5,
1132         }
1133
1134         TMPL_DICT = {}
1135         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1136         MATH_FUNCTIONS = {
1137             '+': float.__add__,
1138             '-': float.__sub__,
1139         }
1140         # Field is of the form key1.key2...
1141         # where keys (except first) can be string, int, slice or "{field, ...}"
1142         FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1143         FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1144             'inner': FIELD_INNER_RE,
1145             'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1146         }
1147         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1148         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1149         INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
1150             (?P<negate>-)?
1151             (?P<fields>{FIELD_RE})
1152             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1153             (?:>(?P<strf_format>.+?))?
1154             (?P<remaining>
1155                 (?P<alternate>(?<!\\),[^|&)]+)?
1156                 (?:&(?P<replacement>.*?))?
1157                 (?:\|(?P<default>.*?))?
1158             )$''')
1159
1160         def _traverse_infodict(fields):
1161             fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1162                       for f in ([x] if x.startswith('{') else x.split('.'))]
1163             for i in (0, -1):
1164                 if fields and not fields[i]:
1165                     fields.pop(i)
1166
1167             for i, f in enumerate(fields):
1168                 if not f.startswith('{'):
1169                     continue
1170                 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1171                 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1172
1173             return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1174
1175         def get_value(mdict):
1176             # Object traversal
1177             value = _traverse_infodict(mdict['fields'])
1178             # Negative
1179             if mdict['negate']:
1180                 value = float_or_none(value)
1181                 if value is not None:
1182                     value *= -1
1183             # Do maths
1184             offset_key = mdict['maths']
1185             if offset_key:
1186                 value = float_or_none(value)
1187                 operator = None
1188                 while offset_key:
1189                     item = re.match(
1190                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1191                         offset_key).group(0)
1192                     offset_key = offset_key[len(item):]
1193                     if operator is None:
1194                         operator = MATH_FUNCTIONS[item]
1195                         continue
1196                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1197                     offset = float_or_none(item)
1198                     if offset is None:
1199                         offset = float_or_none(_traverse_infodict(item))
1200                     try:
1201                         value = operator(value, multiplier * offset)
1202                     except (TypeError, ZeroDivisionError):
1203                         return None
1204                     operator = None
1205             # Datetime formatting
1206             if mdict['strf_format']:
1207                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1208
1209             # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1210             if sanitize and value == '':
1211                 value = None
1212             return value
1213
1214         na = self.params.get('outtmpl_na_placeholder', 'NA')
1215
1216         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1217             return sanitize_filename(str(value), restricted=restricted, is_id=(
1218                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1219                 if 'filename-sanitization' in self.params['compat_opts']
1220                 else NO_DEFAULT))
1221
1222         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1223         sanitize = bool(sanitize)
1224
1225         def _dumpjson_default(obj):
1226             if isinstance(obj, (set, LazyList)):
1227                 return list(obj)
1228             return repr(obj)
1229
1230         def create_key(outer_mobj):
1231             if not outer_mobj.group('has_key'):
1232                 return outer_mobj.group(0)
1233             key = outer_mobj.group('key')
1234             mobj = re.match(INTERNAL_FORMAT_RE, key)
1235             initial_field = mobj.group('fields') if mobj else ''
1236             value, replacement, default = None, None, na
1237             while mobj:
1238                 mobj = mobj.groupdict()
1239                 default = mobj['default'] if mobj['default'] is not None else default
1240                 value = get_value(mobj)
1241                 replacement = mobj['replacement']
1242                 if value is None and mobj['alternate']:
1243                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1244                 else:
1245                     break
1246
1247             fmt = outer_mobj.group('format')
1248             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1249                 fmt = f'0{field_size_compat_map[key]:d}d'
1250
1251             value = default if value is None else value if replacement is None else replacement
1252
1253             flags = outer_mobj.group('conversion') or ''
1254             str_fmt = f'{fmt[:-1]}s'
1255             if fmt[-1] == 'l':  # list
1256                 delim = '\n' if '#' in flags else ', '
1257                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1258             elif fmt[-1] == 'j':  # json
1259                 value, fmt = json.dumps(
1260                     value, default=_dumpjson_default,
1261                     indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1262             elif fmt[-1] == 'h':  # html
1263                 value, fmt = escapeHTML(str(value)), str_fmt
1264             elif fmt[-1] == 'q':  # quoted
1265                 value = map(str, variadic(value) if '#' in flags else [value])
1266                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1267             elif fmt[-1] == 'B':  # bytes
1268                 value = f'%{str_fmt}'.encode() % str(value).encode()
1269                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1270             elif fmt[-1] == 'U':  # unicode normalized
1271                 value, fmt = unicodedata.normalize(
1272                     # "+" = compatibility equivalence, "#" = NFD
1273                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1274                     value), str_fmt
1275             elif fmt[-1] == 'D':  # decimal suffix
1276                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1277                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1278                                               factor=1024 if '#' in flags else 1000)
1279             elif fmt[-1] == 'S':  # filename sanitization
1280                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1281             elif fmt[-1] == 'c':
1282                 if value:
1283                     value = str(value)[0]
1284                 else:
1285                     fmt = str_fmt
1286             elif fmt[-1] not in 'rs':  # numeric
1287                 value = float_or_none(value)
1288                 if value is None:
1289                     value, fmt = default, 's'
1290
1291             if sanitize:
1292                 if fmt[-1] == 'r':
1293                     # If value is an object, sanitize might convert it to a string
1294                     # So we convert it to repr first
1295                     value, fmt = repr(value), str_fmt
1296                 if fmt[-1] in 'csr':
1297                     value = sanitizer(initial_field, value)
1298
1299             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1300             TMPL_DICT[key] = value
1301             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1302
1303         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1304
1305     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1306         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1307         return self.escape_outtmpl(outtmpl) % info_dict
1308
1309     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1310         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1311         if outtmpl is None:
1312             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1313         try:
1314             outtmpl = self._outtmpl_expandpath(outtmpl)
1315             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1316             if not filename:
1317                 return None
1318
1319             if tmpl_type in ('', 'temp'):
1320                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1321                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1322                     filename = replace_extension(filename, ext, final_ext)
1323             elif tmpl_type:
1324                 force_ext = OUTTMPL_TYPES[tmpl_type]
1325                 if force_ext:
1326                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1327
1328             # https://github.com/blackjack4494/youtube-dlc/issues/85
1329             trim_file_name = self.params.get('trim_file_name', False)
1330             if trim_file_name:
1331                 no_ext, *ext = filename.rsplit('.', 2)
1332                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1333
1334             return filename
1335         except ValueError as err:
1336             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1337             return None
1338
1339     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1340         """Generate the output filename"""
1341         if outtmpl:
1342             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1343             dir_type = None
1344         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1345         if not filename and dir_type not in ('', 'temp'):
1346             return ''
1347
1348         if warn:
1349             if not self.params.get('paths'):
1350                 pass
1351             elif filename == '-':
1352                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1353             elif os.path.isabs(filename):
1354                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1355         if filename == '-' or not filename:
1356             return filename
1357
1358         return self.get_output_path(dir_type, filename)
1359
1360     def _match_entry(self, info_dict, incomplete=False, silent=False):
1361         """Returns None if the file should be downloaded"""
1362         _type = info_dict.get('_type', 'video')
1363         assert incomplete or _type == 'video', 'Only video result can be considered complete'
1364
1365         video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1366
1367         def check_filter():
1368             if _type in ('playlist', 'multi_video'):
1369                 return
1370             elif _type in ('url', 'url_transparent') and not try_call(
1371                     lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1372                 return
1373
1374             if 'title' in info_dict:
1375                 # This can happen when we're just evaluating the playlist
1376                 title = info_dict['title']
1377                 matchtitle = self.params.get('matchtitle', False)
1378                 if matchtitle:
1379                     if not re.search(matchtitle, title, re.IGNORECASE):
1380                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1381                 rejecttitle = self.params.get('rejecttitle', False)
1382                 if rejecttitle:
1383                     if re.search(rejecttitle, title, re.IGNORECASE):
1384                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1385
1386             date = info_dict.get('upload_date')
1387             if date is not None:
1388                 dateRange = self.params.get('daterange', DateRange())
1389                 if date not in dateRange:
1390                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1391             view_count = info_dict.get('view_count')
1392             if view_count is not None:
1393                 min_views = self.params.get('min_views')
1394                 if min_views is not None and view_count < min_views:
1395                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1396                 max_views = self.params.get('max_views')
1397                 if max_views is not None and view_count > max_views:
1398                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1399             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1400                 return 'Skipping "%s" because it is age restricted' % video_title
1401
1402             match_filter = self.params.get('match_filter')
1403             if match_filter is not None:
1404                 try:
1405                     ret = match_filter(info_dict, incomplete=incomplete)
1406                 except TypeError:
1407                     # For backward compatibility
1408                     ret = None if incomplete else match_filter(info_dict)
1409                 if ret is NO_DEFAULT:
1410                     while True:
1411                         filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1412                         reply = input(self._format_screen(
1413                             f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1414                         if reply in {'y', ''}:
1415                             return None
1416                         elif reply == 'n':
1417                             return f'Skipping {video_title}'
1418                 elif ret is not None:
1419                     return ret
1420             return None
1421
1422         if self.in_download_archive(info_dict):
1423             reason = '%s has already been recorded in the archive' % video_title
1424             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1425         else:
1426             reason = check_filter()
1427             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1428         if reason is not None:
1429             if not silent:
1430                 self.to_screen('[download] ' + reason)
1431             if self.params.get(break_opt, False):
1432                 raise break_err()
1433         return reason
1434
1435     @staticmethod
1436     def add_extra_info(info_dict, extra_info):
1437         '''Set the keys from extra_info in info dict if they are missing'''
1438         for key, value in extra_info.items():
1439             info_dict.setdefault(key, value)
1440
1441     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1442                      process=True, force_generic_extractor=False):
1443         """
1444         Extract and return the information dictionary of the URL
1445
1446         Arguments:
1447         @param url          URL to extract
1448
1449         Keyword arguments:
1450         @param download     Whether to download videos
1451         @param process      Whether to resolve all unresolved references (URLs, playlist items).
1452                             Must be True for download to work
1453         @param ie_key       Use only the extractor with this key
1454
1455         @param extra_info   Dictionary containing the extra values to add to the info (For internal use only)
1456         @force_generic_extractor  Force using the generic extractor (Deprecated; use ie_key='Generic')
1457         """
1458
1459         if extra_info is None:
1460             extra_info = {}
1461
1462         if not ie_key and force_generic_extractor:
1463             ie_key = 'Generic'
1464
1465         if ie_key:
1466             ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1467         else:
1468             ies = self._ies
1469
1470         for key, ie in ies.items():
1471             if not ie.suitable(url):
1472                 continue
1473
1474             if not ie.working():
1475                 self.report_warning('The program functionality for this site has been marked as broken, '
1476                                     'and will probably not work.')
1477
1478             temp_id = ie.get_temp_id(url)
1479             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1480                 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
1481                 if self.params.get('break_on_existing', False):
1482                     raise ExistingVideoReached()
1483                 break
1484             return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1485         else:
1486             extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1487             self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1488                               tb=False if extractors_restricted else None)
1489
1490     def _handle_extraction_exceptions(func):
1491         @functools.wraps(func)
1492         def wrapper(self, *args, **kwargs):
1493             while True:
1494                 try:
1495                     return func(self, *args, **kwargs)
1496                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1497                     raise
1498                 except ReExtractInfo as e:
1499                     if e.expected:
1500                         self.to_screen(f'{e}; Re-extracting data')
1501                     else:
1502                         self.to_stderr('\r')
1503                         self.report_warning(f'{e}; Re-extracting data')
1504                     continue
1505                 except GeoRestrictedError as e:
1506                     msg = e.msg
1507                     if e.countries:
1508                         msg += '\nThis video is available in %s.' % ', '.join(
1509                             map(ISO3166Utils.short2full, e.countries))
1510                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1511                     self.report_error(msg)
1512                 except ExtractorError as e:  # An error we somewhat expected
1513                     self.report_error(str(e), e.format_traceback())
1514                 except Exception as e:
1515                     if self.params.get('ignoreerrors'):
1516                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1517                     else:
1518                         raise
1519                 break
1520         return wrapper
1521
1522     def _wait_for_video(self, ie_result={}):
1523         if (not self.params.get('wait_for_video')
1524                 or ie_result.get('_type', 'video') != 'video'
1525                 or ie_result.get('formats') or ie_result.get('url')):
1526             return
1527
1528         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1529         last_msg = ''
1530
1531         def progress(msg):
1532             nonlocal last_msg
1533             full_msg = f'{msg}\n'
1534             if not self.params.get('noprogress'):
1535                 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1536             elif last_msg:
1537                 return
1538             self.to_screen(full_msg, skip_eol=True)
1539             last_msg = msg
1540
1541         min_wait, max_wait = self.params.get('wait_for_video')
1542         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1543         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1544             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1545             self.report_warning('Release time of video is not known')
1546         elif ie_result and (diff or 0) <= 0:
1547             self.report_warning('Video should already be available according to extracted info')
1548         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1549         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1550
1551         wait_till = time.time() + diff
1552         try:
1553             while True:
1554                 diff = wait_till - time.time()
1555                 if diff <= 0:
1556                     progress('')
1557                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1558                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1559                 time.sleep(1)
1560         except KeyboardInterrupt:
1561             progress('')
1562             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1563         except BaseException as e:
1564             if not isinstance(e, ReExtractInfo):
1565                 self.to_screen('')
1566             raise
1567
1568     @_handle_extraction_exceptions
1569     def __extract_info(self, url, ie, download, extra_info, process):
1570         try:
1571             ie_result = ie.extract(url)
1572         except UserNotLive as e:
1573             if process:
1574                 if self.params.get('wait_for_video'):
1575                     self.report_warning(e)
1576                 self._wait_for_video()
1577             raise
1578         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1579             self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1580             return
1581         if isinstance(ie_result, list):
1582             # Backwards compatibility: old IE result format
1583             ie_result = {
1584                 '_type': 'compat_list',
1585                 'entries': ie_result,
1586             }
1587         if extra_info.get('original_url'):
1588             ie_result.setdefault('original_url', extra_info['original_url'])
1589         self.add_default_extra_info(ie_result, ie, url)
1590         if process:
1591             self._wait_for_video(ie_result)
1592             return self.process_ie_result(ie_result, download, extra_info)
1593         else:
1594             return ie_result
1595
1596     def add_default_extra_info(self, ie_result, ie, url):
1597         if url is not None:
1598             self.add_extra_info(ie_result, {
1599                 'webpage_url': url,
1600                 'original_url': url,
1601             })
1602         webpage_url = ie_result.get('webpage_url')
1603         if webpage_url:
1604             self.add_extra_info(ie_result, {
1605                 'webpage_url_basename': url_basename(webpage_url),
1606                 'webpage_url_domain': get_domain(webpage_url),
1607             })
1608         if ie is not None:
1609             self.add_extra_info(ie_result, {
1610                 'extractor': ie.IE_NAME,
1611                 'extractor_key': ie.ie_key(),
1612             })
1613
1614     def process_ie_result(self, ie_result, download=True, extra_info=None):
1615         """
1616         Take the result of the ie(may be modified) and resolve all unresolved
1617         references (URLs, playlist items).
1618
1619         It will also download the videos if 'download'.
1620         Returns the resolved ie_result.
1621         """
1622         if extra_info is None:
1623             extra_info = {}
1624         result_type = ie_result.get('_type', 'video')
1625
1626         if result_type in ('url', 'url_transparent'):
1627             ie_result['url'] = sanitize_url(
1628                 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1629             if ie_result.get('original_url') and not extra_info.get('original_url'):
1630                 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1631
1632             extract_flat = self.params.get('extract_flat', False)
1633             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1634                     or extract_flat is True):
1635                 info_copy = ie_result.copy()
1636                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1637                 if ie and not ie_result.get('id'):
1638                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1639                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1640                 self.add_extra_info(info_copy, extra_info)
1641                 info_copy, _ = self.pre_process(info_copy)
1642                 self._fill_common_fields(info_copy, False)
1643                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1644                 self._raise_pending_errors(info_copy)
1645                 if self.params.get('force_write_download_archive', False):
1646                     self.record_download_archive(info_copy)
1647                 return ie_result
1648
1649         if result_type == 'video':
1650             self.add_extra_info(ie_result, extra_info)
1651             ie_result = self.process_video_result(ie_result, download=download)
1652             self._raise_pending_errors(ie_result)
1653             additional_urls = (ie_result or {}).get('additional_urls')
1654             if additional_urls:
1655                 # TODO: Improve MetadataParserPP to allow setting a list
1656                 if isinstance(additional_urls, str):
1657                     additional_urls = [additional_urls]
1658                 self.to_screen(
1659                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1660                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1661                 ie_result['additional_entries'] = [
1662                     self.extract_info(
1663                         url, download, extra_info=extra_info,
1664                         force_generic_extractor=self.params.get('force_generic_extractor'))
1665                     for url in additional_urls
1666                 ]
1667             return ie_result
1668         elif result_type == 'url':
1669             # We have to add extra_info to the results because it may be
1670             # contained in a playlist
1671             return self.extract_info(
1672                 ie_result['url'], download,
1673                 ie_key=ie_result.get('ie_key'),
1674                 extra_info=extra_info)
1675         elif result_type == 'url_transparent':
1676             # Use the information from the embedding page
1677             info = self.extract_info(
1678                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1679                 extra_info=extra_info, download=False, process=False)
1680
1681             # extract_info may return None when ignoreerrors is enabled and
1682             # extraction failed with an error, don't crash and return early
1683             # in this case
1684             if not info:
1685                 return info
1686
1687             exempted_fields = {'_type', 'url', 'ie_key'}
1688             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1689                 # For video clips, the id etc of the clip extractor should be used
1690                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1691
1692             new_result = info.copy()
1693             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1694
1695             # Extracted info may not be a video result (i.e.
1696             # info.get('_type', 'video') != video) but rather an url or
1697             # url_transparent. In such cases outer metadata (from ie_result)
1698             # should be propagated to inner one (info). For this to happen
1699             # _type of info should be overridden with url_transparent. This
1700             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1701             if new_result.get('_type') == 'url':
1702                 new_result['_type'] = 'url_transparent'
1703
1704             return self.process_ie_result(
1705                 new_result, download=download, extra_info=extra_info)
1706         elif result_type in ('playlist', 'multi_video'):
1707             # Protect from infinite recursion due to recursively nested playlists
1708             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1709             webpage_url = ie_result.get('webpage_url')  # Playlists maynot have webpage_url
1710             if webpage_url and webpage_url in self._playlist_urls:
1711                 self.to_screen(
1712                     '[download] Skipping already downloaded playlist: %s'
1713                     % ie_result.get('title') or ie_result.get('id'))
1714                 return
1715
1716             self._playlist_level += 1
1717             self._playlist_urls.add(webpage_url)
1718             self._fill_common_fields(ie_result, False)
1719             self._sanitize_thumbnails(ie_result)
1720             try:
1721                 return self.__process_playlist(ie_result, download)
1722             finally:
1723                 self._playlist_level -= 1
1724                 if not self._playlist_level:
1725                     self._playlist_urls.clear()
1726         elif result_type == 'compat_list':
1727             self.report_warning(
1728                 'Extractor %s returned a compat_list result. '
1729                 'It needs to be updated.' % ie_result.get('extractor'))
1730
1731             def _fixup(r):
1732                 self.add_extra_info(r, {
1733                     'extractor': ie_result['extractor'],
1734                     'webpage_url': ie_result['webpage_url'],
1735                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1736                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1737                     'extractor_key': ie_result['extractor_key'],
1738                 })
1739                 return r
1740             ie_result['entries'] = [
1741                 self.process_ie_result(_fixup(r), download, extra_info)
1742                 for r in ie_result['entries']
1743             ]
1744             return ie_result
1745         else:
1746             raise Exception('Invalid result type: %s' % result_type)
1747
1748     def _ensure_dir_exists(self, path):
1749         return make_dir(path, self.report_error)
1750
1751     @staticmethod
1752     def _playlist_infodict(ie_result, strict=False, **kwargs):
1753         info = {
1754             'playlist_count': ie_result.get('playlist_count'),
1755             'playlist': ie_result.get('title') or ie_result.get('id'),
1756             'playlist_id': ie_result.get('id'),
1757             'playlist_title': ie_result.get('title'),
1758             'playlist_uploader': ie_result.get('uploader'),
1759             'playlist_uploader_id': ie_result.get('uploader_id'),
1760             **kwargs,
1761         }
1762         if strict:
1763             return info
1764         if ie_result.get('webpage_url'):
1765             info.update({
1766                 'webpage_url': ie_result['webpage_url'],
1767                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1768                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1769             })
1770         return {
1771             **info,
1772             'playlist_index': 0,
1773             '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1774             'extractor': ie_result['extractor'],
1775             'extractor_key': ie_result['extractor_key'],
1776         }
1777
1778     def __process_playlist(self, ie_result, download):
1779         """Process each entry in the playlist"""
1780         assert ie_result['_type'] in ('playlist', 'multi_video')
1781
1782         common_info = self._playlist_infodict(ie_result, strict=True)
1783         title = common_info.get('playlist') or '<Untitled>'
1784         if self._match_entry(common_info, incomplete=True) is not None:
1785             return
1786         self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1787
1788         all_entries = PlaylistEntries(self, ie_result)
1789         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1790
1791         lazy = self.params.get('lazy_playlist')
1792         if lazy:
1793             resolved_entries, n_entries = [], 'N/A'
1794             ie_result['requested_entries'], ie_result['entries'] = None, None
1795         else:
1796             entries = resolved_entries = list(entries)
1797             n_entries = len(resolved_entries)
1798             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1799         if not ie_result.get('playlist_count'):
1800             # Better to do this after potentially exhausting entries
1801             ie_result['playlist_count'] = all_entries.get_full_count()
1802
1803         extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1804         ie_copy = collections.ChainMap(ie_result, extra)
1805
1806         _infojson_written = False
1807         write_playlist_files = self.params.get('allow_playlist_files', True)
1808         if write_playlist_files and self.params.get('list_thumbnails'):
1809             self.list_thumbnails(ie_result)
1810         if write_playlist_files and not self.params.get('simulate'):
1811             _infojson_written = self._write_info_json(
1812                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1813             if _infojson_written is None:
1814                 return
1815             if self._write_description('playlist', ie_result,
1816                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1817                 return
1818             # TODO: This should be passed to ThumbnailsConvertor if necessary
1819             self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1820
1821         if lazy:
1822             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1823                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1824         elif self.params.get('playlistreverse'):
1825             entries.reverse()
1826         elif self.params.get('playlistrandom'):
1827             random.shuffle(entries)
1828
1829         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1830                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1831
1832         keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1833         if self.params.get('extract_flat') == 'discard_in_playlist':
1834             keep_resolved_entries = ie_result['_type'] != 'playlist'
1835         if keep_resolved_entries:
1836             self.write_debug('The information of all playlist entries will be held in memory')
1837
1838         failures = 0
1839         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1840         for i, (playlist_index, entry) in enumerate(entries):
1841             if lazy:
1842                 resolved_entries.append((playlist_index, entry))
1843             if not entry:
1844                 continue
1845
1846             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1847             if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1848                 playlist_index = ie_result['requested_entries'][i]
1849
1850             entry_copy = collections.ChainMap(entry, {
1851                 **common_info,
1852                 'n_entries': int_or_none(n_entries),
1853                 'playlist_index': playlist_index,
1854                 'playlist_autonumber': i + 1,
1855             })
1856
1857             if self._match_entry(entry_copy, incomplete=True) is not None:
1858                 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1859                 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1860                 continue
1861
1862             self.to_screen('[download] Downloading item %s of %s' % (
1863                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1864
1865             extra.update({
1866                 'playlist_index': playlist_index,
1867                 'playlist_autonumber': i + 1,
1868             })
1869             entry_result = self.__process_iterable_entry(entry, download, extra)
1870             if not entry_result:
1871                 failures += 1
1872             if failures >= max_failures:
1873                 self.report_error(
1874                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1875                 break
1876             if keep_resolved_entries:
1877                 resolved_entries[i] = (playlist_index, entry_result)
1878
1879         # Update with processed data
1880         ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
1881         ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1882         if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
1883             # Do not set for full playlist
1884             ie_result.pop('requested_entries')
1885
1886         # Write the updated info to json
1887         if _infojson_written is True and self._write_info_json(
1888                 'updated playlist', ie_result,
1889                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1890             return
1891
1892         ie_result = self.run_all_pps('playlist', ie_result)
1893         self.to_screen(f'[download] Finished downloading playlist: {title}')
1894         return ie_result
1895
1896     @_handle_extraction_exceptions
1897     def __process_iterable_entry(self, entry, download, extra_info):
1898         return self.process_ie_result(
1899             entry, download=download, extra_info=extra_info)
1900
1901     def _build_format_filter(self, filter_spec):
1902         " Returns a function to filter the formats according to the filter_spec "
1903
1904         OPERATORS = {
1905             '<': operator.lt,
1906             '<=': operator.le,
1907             '>': operator.gt,
1908             '>=': operator.ge,
1909             '=': operator.eq,
1910             '!=': operator.ne,
1911         }
1912         operator_rex = re.compile(r'''(?x)\s*
1913             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1914             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1915             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1916             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1917         m = operator_rex.fullmatch(filter_spec)
1918         if m:
1919             try:
1920                 comparison_value = int(m.group('value'))
1921             except ValueError:
1922                 comparison_value = parse_filesize(m.group('value'))
1923                 if comparison_value is None:
1924                     comparison_value = parse_filesize(m.group('value') + 'B')
1925                 if comparison_value is None:
1926                     raise ValueError(
1927                         'Invalid value %r in format specification %r' % (
1928                             m.group('value'), filter_spec))
1929             op = OPERATORS[m.group('op')]
1930
1931         if not m:
1932             STR_OPERATORS = {
1933                 '=': operator.eq,
1934                 '^=': lambda attr, value: attr.startswith(value),
1935                 '$=': lambda attr, value: attr.endswith(value),
1936                 '*=': lambda attr, value: value in attr,
1937                 '~=': lambda attr, value: value.search(attr) is not None
1938             }
1939             str_operator_rex = re.compile(r'''(?x)\s*
1940                 (?P<key>[a-zA-Z0-9._-]+)\s*
1941                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1942                 (?P<quote>["'])?
1943                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1944                 (?(quote)(?P=quote))\s*
1945                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1946             m = str_operator_rex.fullmatch(filter_spec)
1947             if m:
1948                 if m.group('op') == '~=':
1949                     comparison_value = re.compile(m.group('value'))
1950                 else:
1951                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1952                 str_op = STR_OPERATORS[m.group('op')]
1953                 if m.group('negation'):
1954                     op = lambda attr, value: not str_op(attr, value)
1955                 else:
1956                     op = str_op
1957
1958         if not m:
1959             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1960
1961         def _filter(f):
1962             actual_value = f.get(m.group('key'))
1963             if actual_value is None:
1964                 return m.group('none_inclusive')
1965             return op(actual_value, comparison_value)
1966         return _filter
1967
1968     def _check_formats(self, formats):
1969         for f in formats:
1970             self.to_screen('[info] Testing format %s' % f['format_id'])
1971             path = self.get_output_path('temp')
1972             if not self._ensure_dir_exists(f'{path}/'):
1973                 continue
1974             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1975             temp_file.close()
1976             try:
1977                 success, _ = self.dl(temp_file.name, f, test=True)
1978             except (DownloadError, OSError, ValueError) + network_exceptions:
1979                 success = False
1980             finally:
1981                 if os.path.exists(temp_file.name):
1982                     try:
1983                         os.remove(temp_file.name)
1984                     except OSError:
1985                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1986             if success:
1987                 yield f
1988             else:
1989                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1990
1991     def _default_format_spec(self, info_dict, download=True):
1992
1993         def can_merge():
1994             merger = FFmpegMergerPP(self)
1995             return merger.available and merger.can_merge()
1996
1997         prefer_best = (
1998             not self.params.get('simulate')
1999             and download
2000             and (
2001                 not can_merge()
2002                 or info_dict.get('is_live') and not self.params.get('live_from_start')
2003                 or self.params['outtmpl']['default'] == '-'))
2004         compat = (
2005             prefer_best
2006             or self.params.get('allow_multiple_audio_streams', False)
2007             or 'format-spec' in self.params['compat_opts'])
2008
2009         return (
2010             'best/bestvideo+bestaudio' if prefer_best
2011             else 'bestvideo*+bestaudio/best' if not compat
2012             else 'bestvideo+bestaudio/best')
2013
2014     def build_format_selector(self, format_spec):
2015         def syntax_error(note, start):
2016             message = (
2017                 'Invalid format specification: '
2018                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2019             return SyntaxError(message)
2020
2021         PICKFIRST = 'PICKFIRST'
2022         MERGE = 'MERGE'
2023         SINGLE = 'SINGLE'
2024         GROUP = 'GROUP'
2025         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2026
2027         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2028                                   'video': self.params.get('allow_multiple_video_streams', False)}
2029
2030         check_formats = self.params.get('check_formats') == 'selected'
2031
2032         def _parse_filter(tokens):
2033             filter_parts = []
2034             for type, string, start, _, _ in tokens:
2035                 if type == tokenize.OP and string == ']':
2036                     return ''.join(filter_parts)
2037                 else:
2038                     filter_parts.append(string)
2039
2040         def _remove_unused_ops(tokens):
2041             # Remove operators that we don't use and join them with the surrounding strings.
2042             # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2043             ALLOWED_OPS = ('/', '+', ',', '(', ')')
2044             last_string, last_start, last_end, last_line = None, None, None, None
2045             for type, string, start, end, line in tokens:
2046                 if type == tokenize.OP and string == '[':
2047                     if last_string:
2048                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2049                         last_string = None
2050                     yield type, string, start, end, line
2051                     # everything inside brackets will be handled by _parse_filter
2052                     for type, string, start, end, line in tokens:
2053                         yield type, string, start, end, line
2054                         if type == tokenize.OP and string == ']':
2055                             break
2056                 elif type == tokenize.OP and string in ALLOWED_OPS:
2057                     if last_string:
2058                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2059                         last_string = None
2060                     yield type, string, start, end, line
2061                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2062                     if not last_string:
2063                         last_string = string
2064                         last_start = start
2065                         last_end = end
2066                     else:
2067                         last_string += string
2068             if last_string:
2069                 yield tokenize.NAME, last_string, last_start, last_end, last_line
2070
2071         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2072             selectors = []
2073             current_selector = None
2074             for type, string, start, _, _ in tokens:
2075                 # ENCODING is only defined in python 3.x
2076                 if type == getattr(tokenize, 'ENCODING', None):
2077                     continue
2078                 elif type in [tokenize.NAME, tokenize.NUMBER]:
2079                     current_selector = FormatSelector(SINGLE, string, [])
2080                 elif type == tokenize.OP:
2081                     if string == ')':
2082                         if not inside_group:
2083                             # ')' will be handled by the parentheses group
2084                             tokens.restore_last_token()
2085                         break
2086                     elif inside_merge and string in ['/', ',']:
2087                         tokens.restore_last_token()
2088                         break
2089                     elif inside_choice and string == ',':
2090                         tokens.restore_last_token()
2091                         break
2092                     elif string == ',':
2093                         if not current_selector:
2094                             raise syntax_error('"," must follow a format selector', start)
2095                         selectors.append(current_selector)
2096                         current_selector = None
2097                     elif string == '/':
2098                         if not current_selector:
2099                             raise syntax_error('"/" must follow a format selector', start)
2100                         first_choice = current_selector
2101                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2102                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2103                     elif string == '[':
2104                         if not current_selector:
2105                             current_selector = FormatSelector(SINGLE, 'best', [])
2106                         format_filter = _parse_filter(tokens)
2107                         current_selector.filters.append(format_filter)
2108                     elif string == '(':
2109                         if current_selector:
2110                             raise syntax_error('Unexpected "("', start)
2111                         group = _parse_format_selection(tokens, inside_group=True)
2112                         current_selector = FormatSelector(GROUP, group, [])
2113                     elif string == '+':
2114                         if not current_selector:
2115                             raise syntax_error('Unexpected "+"', start)
2116                         selector_1 = current_selector
2117                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2118                         if not selector_2:
2119                             raise syntax_error('Expected a selector', start)
2120                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2121                     else:
2122                         raise syntax_error(f'Operator not recognized: "{string}"', start)
2123                 elif type == tokenize.ENDMARKER:
2124                     break
2125             if current_selector:
2126                 selectors.append(current_selector)
2127             return selectors
2128
2129         def _merge(formats_pair):
2130             format_1, format_2 = formats_pair
2131
2132             formats_info = []
2133             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2134             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2135
2136             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2137                 get_no_more = {'video': False, 'audio': False}
2138                 for (i, fmt_info) in enumerate(formats_info):
2139                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2140                         formats_info.pop(i)
2141                         continue
2142                     for aud_vid in ['audio', 'video']:
2143                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2144                             if get_no_more[aud_vid]:
2145                                 formats_info.pop(i)
2146                                 break
2147                             get_no_more[aud_vid] = True
2148
2149             if len(formats_info) == 1:
2150                 return formats_info[0]
2151
2152             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2153             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2154
2155             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2156             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2157
2158             output_ext = get_compatible_ext(
2159                 vcodecs=[f.get('vcodec') for f in video_fmts],
2160                 acodecs=[f.get('acodec') for f in audio_fmts],
2161                 vexts=[f['ext'] for f in video_fmts],
2162                 aexts=[f['ext'] for f in audio_fmts],
2163                 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2164                              or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2165
2166             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2167
2168             new_dict = {
2169                 'requested_formats': formats_info,
2170                 'format': '+'.join(filtered('format')),
2171                 'format_id': '+'.join(filtered('format_id')),
2172                 'ext': output_ext,
2173                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2174                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2175                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2176                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2177                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2178             }
2179
2180             if the_only_video:
2181                 new_dict.update({
2182                     'width': the_only_video.get('width'),
2183                     'height': the_only_video.get('height'),
2184                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2185                     'fps': the_only_video.get('fps'),
2186                     'dynamic_range': the_only_video.get('dynamic_range'),
2187                     'vcodec': the_only_video.get('vcodec'),
2188                     'vbr': the_only_video.get('vbr'),
2189                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2190                     'aspect_ratio': the_only_video.get('aspect_ratio'),
2191                 })
2192
2193             if the_only_audio:
2194                 new_dict.update({
2195                     'acodec': the_only_audio.get('acodec'),
2196                     'abr': the_only_audio.get('abr'),
2197                     'asr': the_only_audio.get('asr'),
2198                     'audio_channels': the_only_audio.get('audio_channels')
2199                 })
2200
2201             return new_dict
2202
2203         def _check_formats(formats):
2204             if not check_formats:
2205                 yield from formats
2206                 return
2207             yield from self._check_formats(formats)
2208
2209         def _build_selector_function(selector):
2210             if isinstance(selector, list):  # ,
2211                 fs = [_build_selector_function(s) for s in selector]
2212
2213                 def selector_function(ctx):
2214                     for f in fs:
2215                         yield from f(ctx)
2216                 return selector_function
2217
2218             elif selector.type == GROUP:  # ()
2219                 selector_function = _build_selector_function(selector.selector)
2220
2221             elif selector.type == PICKFIRST:  # /
2222                 fs = [_build_selector_function(s) for s in selector.selector]
2223
2224                 def selector_function(ctx):
2225                     for f in fs:
2226                         picked_formats = list(f(ctx))
2227                         if picked_formats:
2228                             return picked_formats
2229                     return []
2230
2231             elif selector.type == MERGE:  # +
2232                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2233
2234                 def selector_function(ctx):
2235                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2236                         yield _merge(pair)
2237
2238             elif selector.type == SINGLE:  # atom
2239                 format_spec = selector.selector or 'best'
2240
2241                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2242                 if format_spec == 'all':
2243                     def selector_function(ctx):
2244                         yield from _check_formats(ctx['formats'][::-1])
2245                 elif format_spec == 'mergeall':
2246                     def selector_function(ctx):
2247                         formats = list(_check_formats(
2248                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2249                         if not formats:
2250                             return
2251                         merged_format = formats[-1]
2252                         for f in formats[-2::-1]:
2253                             merged_format = _merge((merged_format, f))
2254                         yield merged_format
2255
2256                 else:
2257                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2258                     mobj = re.match(
2259                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2260                         format_spec)
2261                     if mobj is not None:
2262                         format_idx = int_or_none(mobj.group('n'), default=1)
2263                         format_reverse = mobj.group('bw')[0] == 'b'
2264                         format_type = (mobj.group('type') or [None])[0]
2265                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2266                         format_modified = mobj.group('mod') is not None
2267
2268                         format_fallback = not format_type and not format_modified  # for b, w
2269                         _filter_f = (
2270                             (lambda f: f.get('%scodec' % format_type) != 'none')
2271                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2272                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2273                             if format_type  # bv, ba, wv, wa
2274                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2275                             if not format_modified  # b, w
2276                             else lambda f: True)  # b*, w*
2277                         filter_f = lambda f: _filter_f(f) and (
2278                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2279                     else:
2280                         if format_spec in self._format_selection_exts['audio']:
2281                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2282                         elif format_spec in self._format_selection_exts['video']:
2283                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2284                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2285                         elif format_spec in self._format_selection_exts['storyboards']:
2286                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2287                         else:
2288                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2289
2290                     def selector_function(ctx):
2291                         formats = list(ctx['formats'])
2292                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2293                         if not matches:
2294                             if format_fallback and ctx['incomplete_formats']:
2295                                 # for extractors with incomplete formats (audio only (soundcloud)
2296                                 # or video only (imgur)) best/worst will fallback to
2297                                 # best/worst {video,audio}-only format
2298                                 matches = formats
2299                             elif seperate_fallback and not ctx['has_merged_format']:
2300                                 # for compatibility with youtube-dl when there is no pre-merged format
2301                                 matches = list(filter(seperate_fallback, formats))
2302                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2303                         try:
2304                             yield matches[format_idx - 1]
2305                         except LazyList.IndexError:
2306                             return
2307
2308             filters = [self._build_format_filter(f) for f in selector.filters]
2309
2310             def final_selector(ctx):
2311                 ctx_copy = dict(ctx)
2312                 for _filter in filters:
2313                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2314                 return selector_function(ctx_copy)
2315             return final_selector
2316
2317         stream = io.BytesIO(format_spec.encode())
2318         try:
2319             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2320         except tokenize.TokenError:
2321             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2322
2323         class TokenIterator:
2324             def __init__(self, tokens):
2325                 self.tokens = tokens
2326                 self.counter = 0
2327
2328             def __iter__(self):
2329                 return self
2330
2331             def __next__(self):
2332                 if self.counter >= len(self.tokens):
2333                     raise StopIteration()
2334                 value = self.tokens[self.counter]
2335                 self.counter += 1
2336                 return value
2337
2338             next = __next__
2339
2340             def restore_last_token(self):
2341                 self.counter -= 1
2342
2343         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2344         return _build_selector_function(parsed_selector)
2345
2346     def _calc_headers(self, info_dict):
2347         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2348
2349         cookies = self._calc_cookies(info_dict['url'])
2350         if cookies:
2351             res['Cookie'] = cookies
2352
2353         if 'X-Forwarded-For' not in res:
2354             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2355             if x_forwarded_for_ip:
2356                 res['X-Forwarded-For'] = x_forwarded_for_ip
2357
2358         return res
2359
2360     def _calc_cookies(self, url):
2361         pr = sanitized_Request(url)
2362         self.cookiejar.add_cookie_header(pr)
2363         return pr.get_header('Cookie')
2364
2365     def _sort_thumbnails(self, thumbnails):
2366         thumbnails.sort(key=lambda t: (
2367             t.get('preference') if t.get('preference') is not None else -1,
2368             t.get('width') if t.get('width') is not None else -1,
2369             t.get('height') if t.get('height') is not None else -1,
2370             t.get('id') if t.get('id') is not None else '',
2371             t.get('url')))
2372
2373     def _sanitize_thumbnails(self, info_dict):
2374         thumbnails = info_dict.get('thumbnails')
2375         if thumbnails is None:
2376             thumbnail = info_dict.get('thumbnail')
2377             if thumbnail:
2378                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2379         if not thumbnails:
2380             return
2381
2382         def check_thumbnails(thumbnails):
2383             for t in thumbnails:
2384                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2385                 try:
2386                     self.urlopen(HEADRequest(t['url']))
2387                 except network_exceptions as err:
2388                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2389                     continue
2390                 yield t
2391
2392         self._sort_thumbnails(thumbnails)
2393         for i, t in enumerate(thumbnails):
2394             if t.get('id') is None:
2395                 t['id'] = '%d' % i
2396             if t.get('width') and t.get('height'):
2397                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2398             t['url'] = sanitize_url(t['url'])
2399
2400         if self.params.get('check_formats') is True:
2401             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2402         else:
2403             info_dict['thumbnails'] = thumbnails
2404
2405     def _fill_common_fields(self, info_dict, final=True):
2406         # TODO: move sanitization here
2407         if final:
2408             title = info_dict.get('title', NO_DEFAULT)
2409             if title is NO_DEFAULT:
2410                 raise ExtractorError('Missing "title" field in extractor result',
2411                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2412             info_dict['fulltitle'] = title
2413             if not title:
2414                 if title == '':
2415                     self.write_debug('Extractor gave empty title. Creating a generic title')
2416                 else:
2417                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2418                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2419
2420         if info_dict.get('duration') is not None:
2421             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2422
2423         for ts_key, date_key in (
2424                 ('timestamp', 'upload_date'),
2425                 ('release_timestamp', 'release_date'),
2426                 ('modified_timestamp', 'modified_date'),
2427         ):
2428             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2429                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2430                 # see http://bugs.python.org/issue1646728)
2431                 with contextlib.suppress(ValueError, OverflowError, OSError):
2432                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2433                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2434
2435         live_keys = ('is_live', 'was_live')
2436         live_status = info_dict.get('live_status')
2437         if live_status is None:
2438             for key in live_keys:
2439                 if info_dict.get(key) is False:
2440                     continue
2441                 if info_dict.get(key):
2442                     live_status = key
2443                 break
2444             if all(info_dict.get(key) is False for key in live_keys):
2445                 live_status = 'not_live'
2446         if live_status:
2447             info_dict['live_status'] = live_status
2448             for key in live_keys:
2449                 if info_dict.get(key) is None:
2450                     info_dict[key] = (live_status == key)
2451         if live_status == 'post_live':
2452             info_dict['was_live'] = True
2453
2454         # Auto generate title fields corresponding to the *_number fields when missing
2455         # in order to always have clean titles. This is very common for TV series.
2456         for field in ('chapter', 'season', 'episode'):
2457             if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2458                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2459
2460     def _raise_pending_errors(self, info):
2461         err = info.pop('__pending_error', None)
2462         if err:
2463             self.report_error(err, tb=False)
2464
2465     def sort_formats(self, info_dict):
2466         formats = self._get_formats(info_dict)
2467         if not formats:
2468             return
2469         # Backward compatibility with InfoExtractor._sort_formats
2470         field_preference = formats[0].pop('__sort_fields', None)
2471         if field_preference:
2472             info_dict['_format_sort_fields'] = field_preference
2473
2474         formats.sort(key=FormatSorter(
2475             self, info_dict.get('_format_sort_fields', [])).calculate_preference)
2476
2477     def process_video_result(self, info_dict, download=True):
2478         assert info_dict.get('_type', 'video') == 'video'
2479         self._num_videos += 1
2480
2481         if 'id' not in info_dict:
2482             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2483         elif not info_dict.get('id'):
2484             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2485
2486         def report_force_conversion(field, field_not, conversion):
2487             self.report_warning(
2488                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2489                 % (field, field_not, conversion))
2490
2491         def sanitize_string_field(info, string_field):
2492             field = info.get(string_field)
2493             if field is None or isinstance(field, str):
2494                 return
2495             report_force_conversion(string_field, 'a string', 'string')
2496             info[string_field] = str(field)
2497
2498         def sanitize_numeric_fields(info):
2499             for numeric_field in self._NUMERIC_FIELDS:
2500                 field = info.get(numeric_field)
2501                 if field is None or isinstance(field, (int, float)):
2502                     continue
2503                 report_force_conversion(numeric_field, 'numeric', 'int')
2504                 info[numeric_field] = int_or_none(field)
2505
2506         sanitize_string_field(info_dict, 'id')
2507         sanitize_numeric_fields(info_dict)
2508         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2509             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2510         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2511             self.report_warning('"duration" field is negative, there is an error in extractor')
2512
2513         chapters = info_dict.get('chapters') or []
2514         if chapters and chapters[0].get('start_time'):
2515             chapters.insert(0, {'start_time': 0})
2516
2517         dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2518         for idx, (prev, current, next_) in enumerate(zip(
2519                 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2520             if current.get('start_time') is None:
2521                 current['start_time'] = prev.get('end_time')
2522             if not current.get('end_time'):
2523                 current['end_time'] = next_.get('start_time')
2524             if not current.get('title'):
2525                 current['title'] = f'<Untitled Chapter {idx}>'
2526
2527         if 'playlist' not in info_dict:
2528             # It isn't part of a playlist
2529             info_dict['playlist'] = None
2530             info_dict['playlist_index'] = None
2531
2532         self._sanitize_thumbnails(info_dict)
2533
2534         thumbnail = info_dict.get('thumbnail')
2535         thumbnails = info_dict.get('thumbnails')
2536         if thumbnail:
2537             info_dict['thumbnail'] = sanitize_url(thumbnail)
2538         elif thumbnails:
2539             info_dict['thumbnail'] = thumbnails[-1]['url']
2540
2541         if info_dict.get('display_id') is None and 'id' in info_dict:
2542             info_dict['display_id'] = info_dict['id']
2543
2544         self._fill_common_fields(info_dict)
2545
2546         for cc_kind in ('subtitles', 'automatic_captions'):
2547             cc = info_dict.get(cc_kind)
2548             if cc:
2549                 for _, subtitle in cc.items():
2550                     for subtitle_format in subtitle:
2551                         if subtitle_format.get('url'):
2552                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2553                         if subtitle_format.get('ext') is None:
2554                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2555
2556         automatic_captions = info_dict.get('automatic_captions')
2557         subtitles = info_dict.get('subtitles')
2558
2559         info_dict['requested_subtitles'] = self.process_subtitles(
2560             info_dict['id'], subtitles, automatic_captions)
2561
2562         self.sort_formats(info_dict)
2563         formats = self._get_formats(info_dict)
2564
2565         # or None ensures --clean-infojson removes it
2566         info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
2567         if not self.params.get('allow_unplayable_formats'):
2568             formats = [f for f in formats if not f.get('has_drm')]
2569
2570         if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2571             self.report_warning(
2572                 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2573                 'only images are available for download. Use --list-formats to see them'.capitalize())
2574
2575         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2576         if not get_from_start:
2577             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2578         if info_dict.get('is_live') and formats:
2579             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2580             if get_from_start and not formats:
2581                 self.raise_no_formats(info_dict, msg=(
2582                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2583                     'If you want to download from the current time, use --no-live-from-start'))
2584
2585         def is_wellformed(f):
2586             url = f.get('url')
2587             if not url:
2588                 self.report_warning(
2589                     '"url" field is missing or empty - skipping format, '
2590                     'there is an error in extractor')
2591                 return False
2592             if isinstance(url, bytes):
2593                 sanitize_string_field(f, 'url')
2594             return True
2595
2596         # Filter out malformed formats for better extraction robustness
2597         formats = list(filter(is_wellformed, formats or []))
2598
2599         if not formats:
2600             self.raise_no_formats(info_dict)
2601
2602         formats_dict = {}
2603
2604         # We check that all the formats have the format and format_id fields
2605         for i, format in enumerate(formats):
2606             sanitize_string_field(format, 'format_id')
2607             sanitize_numeric_fields(format)
2608             format['url'] = sanitize_url(format['url'])
2609             if not format.get('format_id'):
2610                 format['format_id'] = str(i)
2611             else:
2612                 # Sanitize format_id from characters used in format selector expression
2613                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2614             format_id = format['format_id']
2615             if format_id not in formats_dict:
2616                 formats_dict[format_id] = []
2617             formats_dict[format_id].append(format)
2618
2619         # Make sure all formats have unique format_id
2620         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2621         for format_id, ambiguous_formats in formats_dict.items():
2622             ambigious_id = len(ambiguous_formats) > 1
2623             for i, format in enumerate(ambiguous_formats):
2624                 if ambigious_id:
2625                     format['format_id'] = '%s-%d' % (format_id, i)
2626                 if format.get('ext') is None:
2627                     format['ext'] = determine_ext(format['url']).lower()
2628                 # Ensure there is no conflict between id and ext in format selection
2629                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2630                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2631                     format['format_id'] = 'f%s' % format['format_id']
2632
2633         for i, format in enumerate(formats):
2634             if format.get('format') is None:
2635                 format['format'] = '{id} - {res}{note}'.format(
2636                     id=format['format_id'],
2637                     res=self.format_resolution(format),
2638                     note=format_field(format, 'format_note', ' (%s)'),
2639                 )
2640             if format.get('protocol') is None:
2641                 format['protocol'] = determine_protocol(format)
2642             if format.get('resolution') is None:
2643                 format['resolution'] = self.format_resolution(format, default=None)
2644             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2645                 format['dynamic_range'] = 'SDR'
2646             if format.get('aspect_ratio') is None:
2647                 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2648             if (info_dict.get('duration') and format.get('tbr')
2649                     and not format.get('filesize') and not format.get('filesize_approx')):
2650                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2651
2652             # Add HTTP headers, so that external programs can use them from the
2653             # json output
2654             full_format_info = info_dict.copy()
2655             full_format_info.update(format)
2656             format['http_headers'] = self._calc_headers(full_format_info)
2657         # Remove private housekeeping stuff
2658         if '__x_forwarded_for_ip' in info_dict:
2659             del info_dict['__x_forwarded_for_ip']
2660
2661         if self.params.get('check_formats') is True:
2662             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2663
2664         if not formats or formats[0] is not info_dict:
2665             # only set the 'formats' fields if the original info_dict list them
2666             # otherwise we end up with a circular reference, the first (and unique)
2667             # element in the 'formats' field in info_dict is info_dict itself,
2668             # which can't be exported to json
2669             info_dict['formats'] = formats
2670
2671         info_dict, _ = self.pre_process(info_dict)
2672
2673         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2674             return info_dict
2675
2676         self.post_extract(info_dict)
2677         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2678
2679         # The pre-processors may have modified the formats
2680         formats = self._get_formats(info_dict)
2681
2682         list_only = self.params.get('simulate') == 'list_only'
2683         interactive_format_selection = not list_only and self.format_selector == '-'
2684         if self.params.get('list_thumbnails'):
2685             self.list_thumbnails(info_dict)
2686         if self.params.get('listsubtitles'):
2687             if 'automatic_captions' in info_dict:
2688                 self.list_subtitles(
2689                     info_dict['id'], automatic_captions, 'automatic captions')
2690             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2691         if self.params.get('listformats') or interactive_format_selection:
2692             self.list_formats(info_dict)
2693         if list_only:
2694             # Without this printing, -F --print-json will not work
2695             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2696             return info_dict
2697
2698         format_selector = self.format_selector
2699         if format_selector is None:
2700             req_format = self._default_format_spec(info_dict, download=download)
2701             self.write_debug('Default format spec: %s' % req_format)
2702             format_selector = self.build_format_selector(req_format)
2703
2704         while True:
2705             if interactive_format_selection:
2706                 req_format = input(
2707                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2708                 try:
2709                     format_selector = self.build_format_selector(req_format)
2710                 except SyntaxError as err:
2711                     self.report_error(err, tb=False, is_error=False)
2712                     continue
2713
2714             formats_to_download = list(format_selector({
2715                 'formats': formats,
2716                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2717                 'incomplete_formats': (
2718                     # All formats are video-only or
2719                     all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2720                     # all formats are audio-only
2721                     or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2722             }))
2723             if interactive_format_selection and not formats_to_download:
2724                 self.report_error('Requested format is not available', tb=False, is_error=False)
2725                 continue
2726             break
2727
2728         if not formats_to_download:
2729             if not self.params.get('ignore_no_formats_error'):
2730                 raise ExtractorError(
2731                     'Requested format is not available. Use --list-formats for a list of available formats',
2732                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2733             self.report_warning('Requested format is not available')
2734             # Process what we can, even without any available formats.
2735             formats_to_download = [{}]
2736
2737         requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2738         best_format, downloaded_formats = formats_to_download[-1], []
2739         if download:
2740             if best_format and requested_ranges:
2741                 def to_screen(*msg):
2742                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2743
2744                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2745                           (f['format_id'] for f in formats_to_download))
2746                 if requested_ranges != ({}, ):
2747                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2748                               (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2749             max_downloads_reached = False
2750
2751             for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2752                 new_info = self._copy_infodict(info_dict)
2753                 new_info.update(fmt)
2754                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2755                 end_time = offset + min(chapter.get('end_time', duration), duration)
2756                 if chapter or offset:
2757                     new_info.update({
2758                         'section_start': offset + chapter.get('start_time', 0),
2759                         # duration may not be accurate. So allow deviations <1sec
2760                         'section_end': end_time if end_time <= offset + duration + 1 else None,
2761                         'section_title': chapter.get('title'),
2762                         'section_number': chapter.get('index'),
2763                     })
2764                 downloaded_formats.append(new_info)
2765                 try:
2766                     self.process_info(new_info)
2767                 except MaxDownloadsReached:
2768                     max_downloads_reached = True
2769                 self._raise_pending_errors(new_info)
2770                 # Remove copied info
2771                 for key, val in tuple(new_info.items()):
2772                     if info_dict.get(key) == val:
2773                         new_info.pop(key)
2774                 if max_downloads_reached:
2775                     break
2776
2777             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2778             assert write_archive.issubset({True, False, 'ignore'})
2779             if True in write_archive and False not in write_archive:
2780                 self.record_download_archive(info_dict)
2781
2782             info_dict['requested_downloads'] = downloaded_formats
2783             info_dict = self.run_all_pps('after_video', info_dict)
2784             if max_downloads_reached:
2785                 raise MaxDownloadsReached()
2786
2787         # We update the info dict with the selected best quality format (backwards compatibility)
2788         info_dict.update(best_format)
2789         return info_dict
2790
2791     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2792         """Select the requested subtitles and their format"""
2793         available_subs, normal_sub_langs = {}, []
2794         if normal_subtitles and self.params.get('writesubtitles'):
2795             available_subs.update(normal_subtitles)
2796             normal_sub_langs = tuple(normal_subtitles.keys())
2797         if automatic_captions and self.params.get('writeautomaticsub'):
2798             for lang, cap_info in automatic_captions.items():
2799                 if lang not in available_subs:
2800                     available_subs[lang] = cap_info
2801
2802         if not available_subs or (
2803                 not self.params.get('writesubtitles')
2804                 and not self.params.get('writeautomaticsub')):
2805             return None
2806
2807         all_sub_langs = tuple(available_subs.keys())
2808         if self.params.get('allsubtitles', False):
2809             requested_langs = all_sub_langs
2810         elif self.params.get('subtitleslangs', False):
2811             try:
2812                 requested_langs = orderedSet_from_options(
2813                     self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2814             except re.error as e:
2815                 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2816         elif normal_sub_langs:
2817             requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
2818         else:
2819             requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
2820         if requested_langs:
2821             self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2822
2823         formats_query = self.params.get('subtitlesformat', 'best')
2824         formats_preference = formats_query.split('/') if formats_query else []
2825         subs = {}
2826         for lang in requested_langs:
2827             formats = available_subs.get(lang)
2828             if formats is None:
2829                 self.report_warning(f'{lang} subtitles not available for {video_id}')
2830                 continue
2831             for ext in formats_preference:
2832                 if ext == 'best':
2833                     f = formats[-1]
2834                     break
2835                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2836                 if matches:
2837                     f = matches[-1]
2838                     break
2839             else:
2840                 f = formats[-1]
2841                 self.report_warning(
2842                     'No subtitle format found matching "%s" for language %s, '
2843                     'using %s' % (formats_query, lang, f['ext']))
2844             subs[lang] = f
2845         return subs
2846
2847     def _forceprint(self, key, info_dict):
2848         if info_dict is None:
2849             return
2850         info_copy = info_dict.copy()
2851         info_copy['formats_table'] = self.render_formats_table(info_dict)
2852         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2853         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2854         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2855
2856         def format_tmpl(tmpl):
2857             mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
2858             if not mobj:
2859                 return tmpl
2860
2861             fmt = '%({})s'
2862             if tmpl.startswith('{'):
2863                 tmpl = f'.{tmpl}'
2864             if tmpl.endswith('='):
2865                 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
2866             return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
2867
2868         for tmpl in self.params['forceprint'].get(key, []):
2869             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2870
2871         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2872             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2873             tmpl = format_tmpl(tmpl)
2874             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2875             if self._ensure_dir_exists(filename):
2876                 with open(filename, 'a', encoding='utf-8') as f:
2877                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2878
2879     def __forced_printings(self, info_dict, filename, incomplete):
2880         def print_mandatory(field, actual_field=None):
2881             if actual_field is None:
2882                 actual_field = field
2883             if (self.params.get('force%s' % field, False)
2884                     and (not incomplete or info_dict.get(actual_field) is not None)):
2885                 self.to_stdout(info_dict[actual_field])
2886
2887         def print_optional(field):
2888             if (self.params.get('force%s' % field, False)
2889                     and info_dict.get(field) is not None):
2890                 self.to_stdout(info_dict[field])
2891
2892         info_dict = info_dict.copy()
2893         if filename is not None:
2894             info_dict['filename'] = filename
2895         if info_dict.get('requested_formats') is not None:
2896             # For RTMP URLs, also include the playpath
2897             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2898         elif info_dict.get('url'):
2899             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2900
2901         if (self.params.get('forcejson')
2902                 or self.params['forceprint'].get('video')
2903                 or self.params['print_to_file'].get('video')):
2904             self.post_extract(info_dict)
2905         self._forceprint('video', info_dict)
2906
2907         print_mandatory('title')
2908         print_mandatory('id')
2909         print_mandatory('url', 'urls')
2910         print_optional('thumbnail')
2911         print_optional('description')
2912         print_optional('filename')
2913         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2914             self.to_stdout(formatSeconds(info_dict['duration']))
2915         print_mandatory('format')
2916
2917         if self.params.get('forcejson'):
2918             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2919
2920     def dl(self, name, info, subtitle=False, test=False):
2921         if not info.get('url'):
2922             self.raise_no_formats(info, True)
2923
2924         if test:
2925             verbose = self.params.get('verbose')
2926             params = {
2927                 'test': True,
2928                 'quiet': self.params.get('quiet') or not verbose,
2929                 'verbose': verbose,
2930                 'noprogress': not verbose,
2931                 'nopart': True,
2932                 'skip_unavailable_fragments': False,
2933                 'keep_fragments': False,
2934                 'overwrites': True,
2935                 '_no_ytdl_file': True,
2936             }
2937         else:
2938             params = self.params
2939         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2940         if not test:
2941             for ph in self._progress_hooks:
2942                 fd.add_progress_hook(ph)
2943             urls = '", "'.join(
2944                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2945                 for f in info.get('requested_formats', []) or [info])
2946             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
2947
2948         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2949         # But it may contain objects that are not deep-copyable
2950         new_info = self._copy_infodict(info)
2951         if new_info.get('http_headers') is None:
2952             new_info['http_headers'] = self._calc_headers(new_info)
2953         return fd.download(name, new_info, subtitle)
2954
2955     def existing_file(self, filepaths, *, default_overwrite=True):
2956         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2957         if existing_files and not self.params.get('overwrites', default_overwrite):
2958             return existing_files[0]
2959
2960         for file in existing_files:
2961             self.report_file_delete(file)
2962             os.remove(file)
2963         return None
2964
2965     def process_info(self, info_dict):
2966         """Process a single resolved IE result. (Modifies it in-place)"""
2967
2968         assert info_dict.get('_type', 'video') == 'video'
2969         original_infodict = info_dict
2970
2971         if 'format' not in info_dict and 'ext' in info_dict:
2972             info_dict['format'] = info_dict['ext']
2973
2974         if self._match_entry(info_dict) is not None:
2975             info_dict['__write_download_archive'] = 'ignore'
2976             return
2977
2978         # Does nothing under normal operation - for backward compatibility of process_info
2979         self.post_extract(info_dict)
2980
2981         def replace_info_dict(new_info):
2982             nonlocal info_dict
2983             if new_info == info_dict:
2984                 return
2985             info_dict.clear()
2986             info_dict.update(new_info)
2987
2988         new_info, _ = self.pre_process(info_dict, 'video')
2989         replace_info_dict(new_info)
2990         self._num_downloads += 1
2991
2992         # info_dict['_filename'] needs to be set for backward compatibility
2993         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2994         temp_filename = self.prepare_filename(info_dict, 'temp')
2995         files_to_move = {}
2996
2997         # Forced printings
2998         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2999
3000         def check_max_downloads():
3001             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3002                 raise MaxDownloadsReached()
3003
3004         if self.params.get('simulate'):
3005             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3006             check_max_downloads()
3007             return
3008
3009         if full_filename is None:
3010             return
3011         if not self._ensure_dir_exists(encodeFilename(full_filename)):
3012             return
3013         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3014             return
3015
3016         if self._write_description('video', info_dict,
3017                                    self.prepare_filename(info_dict, 'description')) is None:
3018             return
3019
3020         sub_files = self._write_subtitles(info_dict, temp_filename)
3021         if sub_files is None:
3022             return
3023         files_to_move.update(dict(sub_files))
3024
3025         thumb_files = self._write_thumbnails(
3026             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3027         if thumb_files is None:
3028             return
3029         files_to_move.update(dict(thumb_files))
3030
3031         infofn = self.prepare_filename(info_dict, 'infojson')
3032         _infojson_written = self._write_info_json('video', info_dict, infofn)
3033         if _infojson_written:
3034             info_dict['infojson_filename'] = infofn
3035             # For backward compatibility, even though it was a private field
3036             info_dict['__infojson_filename'] = infofn
3037         elif _infojson_written is None:
3038             return
3039
3040         # Note: Annotations are deprecated
3041         annofn = None
3042         if self.params.get('writeannotations', False):
3043             annofn = self.prepare_filename(info_dict, 'annotation')
3044         if annofn:
3045             if not self._ensure_dir_exists(encodeFilename(annofn)):
3046                 return
3047             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3048                 self.to_screen('[info] Video annotations are already present')
3049             elif not info_dict.get('annotations'):
3050                 self.report_warning('There are no annotations to write.')
3051             else:
3052                 try:
3053                     self.to_screen('[info] Writing video annotations to: ' + annofn)
3054                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3055                         annofile.write(info_dict['annotations'])
3056                 except (KeyError, TypeError):
3057                     self.report_warning('There are no annotations to write.')
3058                 except OSError:
3059                     self.report_error('Cannot write annotations file: ' + annofn)
3060                     return
3061
3062         # Write internet shortcut files
3063         def _write_link_file(link_type):
3064             url = try_get(info_dict['webpage_url'], iri_to_uri)
3065             if not url:
3066                 self.report_warning(
3067                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3068                 return True
3069             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3070             if not self._ensure_dir_exists(encodeFilename(linkfn)):
3071                 return False
3072             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3073                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3074                 return True
3075             try:
3076                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3077                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3078                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3079                     template_vars = {'url': url}
3080                     if link_type == 'desktop':
3081                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3082                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3083             except OSError:
3084                 self.report_error(f'Cannot write internet shortcut {linkfn}')
3085                 return False
3086             return True
3087
3088         write_links = {
3089             'url': self.params.get('writeurllink'),
3090             'webloc': self.params.get('writewebloclink'),
3091             'desktop': self.params.get('writedesktoplink'),
3092         }
3093         if self.params.get('writelink'):
3094             link_type = ('webloc' if sys.platform == 'darwin'
3095                          else 'desktop' if sys.platform.startswith('linux')
3096                          else 'url')
3097             write_links[link_type] = True
3098
3099         if any(should_write and not _write_link_file(link_type)
3100                for link_type, should_write in write_links.items()):
3101             return
3102
3103         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3104         replace_info_dict(new_info)
3105
3106         if self.params.get('skip_download'):
3107             info_dict['filepath'] = temp_filename
3108             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3109             info_dict['__files_to_move'] = files_to_move
3110             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3111             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3112         else:
3113             # Download
3114             info_dict.setdefault('__postprocessors', [])
3115             try:
3116
3117                 def existing_video_file(*filepaths):
3118                     ext = info_dict.get('ext')
3119                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3120                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3121                                               default_overwrite=False)
3122                     if file:
3123                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3124                     return file
3125
3126                 fd, success = None, True
3127                 if info_dict.get('protocol') or info_dict.get('url'):
3128                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3129                     if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3130                             info_dict.get('section_start') or info_dict.get('section_end')):
3131                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3132                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
3133                         self.report_error(f'{msg}. Aborting')
3134                         return
3135
3136                 if info_dict.get('requested_formats') is not None:
3137                     requested_formats = info_dict['requested_formats']
3138                     old_ext = info_dict['ext']
3139                     if self.params.get('merge_output_format') is None:
3140                         if (info_dict['ext'] == 'webm'
3141                                 and info_dict.get('thumbnails')
3142                                 # check with type instead of pp_key, __name__, or isinstance
3143                                 # since we dont want any custom PPs to trigger this
3144                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3145                             info_dict['ext'] = 'mkv'
3146                             self.report_warning(
3147                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3148                     new_ext = info_dict['ext']
3149
3150                     def correct_ext(filename, ext=new_ext):
3151                         if filename == '-':
3152                             return filename
3153                         filename_real_ext = os.path.splitext(filename)[1][1:]
3154                         filename_wo_ext = (
3155                             os.path.splitext(filename)[0]
3156                             if filename_real_ext in (old_ext, new_ext)
3157                             else filename)
3158                         return f'{filename_wo_ext}.{ext}'
3159
3160                     # Ensure filename always has a correct extension for successful merge
3161                     full_filename = correct_ext(full_filename)
3162                     temp_filename = correct_ext(temp_filename)
3163                     dl_filename = existing_video_file(full_filename, temp_filename)
3164                     info_dict['__real_download'] = False
3165
3166                     merger = FFmpegMergerPP(self)
3167                     downloaded = []
3168                     if dl_filename is not None:
3169                         self.report_file_already_downloaded(dl_filename)
3170                     elif fd:
3171                         for f in requested_formats if fd != FFmpegFD else []:
3172                             f['filepath'] = fname = prepend_extension(
3173                                 correct_ext(temp_filename, info_dict['ext']),
3174                                 'f%s' % f['format_id'], info_dict['ext'])
3175                             downloaded.append(fname)
3176                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3177                         success, real_download = self.dl(temp_filename, info_dict)
3178                         info_dict['__real_download'] = real_download
3179                     else:
3180                         if self.params.get('allow_unplayable_formats'):
3181                             self.report_warning(
3182                                 'You have requested merging of multiple formats '
3183                                 'while also allowing unplayable formats to be downloaded. '
3184                                 'The formats won\'t be merged to prevent data corruption.')
3185                         elif not merger.available:
3186                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3187                             if not self.params.get('ignoreerrors'):
3188                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3189                                 return
3190                             self.report_warning(f'{msg}. The formats won\'t be merged')
3191
3192                         if temp_filename == '-':
3193                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3194                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3195                                       else 'but ffmpeg is not installed')
3196                             self.report_warning(
3197                                 f'You have requested downloading multiple formats to stdout {reason}. '
3198                                 'The formats will be streamed one after the other')
3199                             fname = temp_filename
3200                         for f in requested_formats:
3201                             new_info = dict(info_dict)
3202                             del new_info['requested_formats']
3203                             new_info.update(f)
3204                             if temp_filename != '-':
3205                                 fname = prepend_extension(
3206                                     correct_ext(temp_filename, new_info['ext']),
3207                                     'f%s' % f['format_id'], new_info['ext'])
3208                                 if not self._ensure_dir_exists(fname):
3209                                     return
3210                                 f['filepath'] = fname
3211                                 downloaded.append(fname)
3212                             partial_success, real_download = self.dl(fname, new_info)
3213                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3214                             success = success and partial_success
3215
3216                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3217                         info_dict['__postprocessors'].append(merger)
3218                         info_dict['__files_to_merge'] = downloaded
3219                         # Even if there were no downloads, it is being merged only now
3220                         info_dict['__real_download'] = True
3221                     else:
3222                         for file in downloaded:
3223                             files_to_move[file] = None
3224                 else:
3225                     # Just a single file
3226                     dl_filename = existing_video_file(full_filename, temp_filename)
3227                     if dl_filename is None or dl_filename == temp_filename:
3228                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3229                         # So we should try to resume the download
3230                         success, real_download = self.dl(temp_filename, info_dict)
3231                         info_dict['__real_download'] = real_download
3232                     else:
3233                         self.report_file_already_downloaded(dl_filename)
3234
3235                 dl_filename = dl_filename or temp_filename
3236                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3237
3238             except network_exceptions as err:
3239                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3240                 return
3241             except OSError as err:
3242                 raise UnavailableVideoError(err)
3243             except (ContentTooShortError, ) as err:
3244                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3245                 return
3246
3247             self._raise_pending_errors(info_dict)
3248             if success and full_filename != '-':
3249
3250                 def fixup():
3251                     do_fixup = True
3252                     fixup_policy = self.params.get('fixup')
3253                     vid = info_dict['id']
3254
3255                     if fixup_policy in ('ignore', 'never'):
3256                         return
3257                     elif fixup_policy == 'warn':
3258                         do_fixup = 'warn'
3259                     elif fixup_policy != 'force':
3260                         assert fixup_policy in ('detect_or_warn', None)
3261                         if not info_dict.get('__real_download'):
3262                             do_fixup = False
3263
3264                     def ffmpeg_fixup(cndn, msg, cls):
3265                         if not (do_fixup and cndn):
3266                             return
3267                         elif do_fixup == 'warn':
3268                             self.report_warning(f'{vid}: {msg}')
3269                             return
3270                         pp = cls(self)
3271                         if pp.available:
3272                             info_dict['__postprocessors'].append(pp)
3273                         else:
3274                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3275
3276                     stretched_ratio = info_dict.get('stretched_ratio')
3277                     ffmpeg_fixup(stretched_ratio not in (1, None),
3278                                  f'Non-uniform pixel ratio {stretched_ratio}',
3279                                  FFmpegFixupStretchedPP)
3280
3281                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3282                     downloader = downloader.FD_NAME if downloader else None
3283
3284                     ext = info_dict.get('ext')
3285                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3286                         isinstance(pp, FFmpegVideoConvertorPP)
3287                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3288                     ) for pp in self._pps['post_process'])
3289
3290                     if not postprocessed_by_ffmpeg:
3291                         ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3292                                      'writing DASH m4a. Only some players support this container',
3293                                      FFmpegFixupM4aPP)
3294                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3295                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3296                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3297                                      FFmpegFixupM3u8PP)
3298                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3299                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3300
3301                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3302                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3303
3304                 fixup()
3305                 try:
3306                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3307                 except PostProcessingError as err:
3308                     self.report_error('Postprocessing: %s' % str(err))
3309                     return
3310                 try:
3311                     for ph in self._post_hooks:
3312                         ph(info_dict['filepath'])
3313                 except Exception as err:
3314                     self.report_error('post hooks: %s' % str(err))
3315                     return
3316                 info_dict['__write_download_archive'] = True
3317
3318         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3319         if self.params.get('force_write_download_archive'):
3320             info_dict['__write_download_archive'] = True
3321         check_max_downloads()
3322
3323     def __download_wrapper(self, func):
3324         @functools.wraps(func)
3325         def wrapper(*args, **kwargs):
3326             try:
3327                 res = func(*args, **kwargs)
3328             except UnavailableVideoError as e:
3329                 self.report_error(e)
3330             except DownloadCancelled as e:
3331                 self.to_screen(f'[info] {e}')
3332                 if not self.params.get('break_per_url'):
3333                     raise
3334                 self._num_downloads = 0
3335             else:
3336                 if self.params.get('dump_single_json', False):
3337                     self.post_extract(res)
3338                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3339         return wrapper
3340
3341     def download(self, url_list):
3342         """Download a given list of URLs."""
3343         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3344         outtmpl = self.params['outtmpl']['default']
3345         if (len(url_list) > 1
3346                 and outtmpl != '-'
3347                 and '%' not in outtmpl
3348                 and self.params.get('max_downloads') != 1):
3349             raise SameFileError(outtmpl)
3350
3351         for url in url_list:
3352             self.__download_wrapper(self.extract_info)(
3353                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3354
3355         return self._download_retcode
3356
3357     def download_with_info_file(self, info_filename):
3358         with contextlib.closing(fileinput.FileInput(
3359                 [info_filename], mode='r',
3360                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3361             # FileInput doesn't have a read method, we can't call json.load
3362             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3363         try:
3364             self.__download_wrapper(self.process_ie_result)(info, download=True)
3365         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3366             if not isinstance(e, EntryNotInPlaylist):
3367                 self.to_stderr('\r')
3368             webpage_url = info.get('webpage_url')
3369             if webpage_url is not None:
3370                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3371                 return self.download([webpage_url])
3372             else:
3373                 raise
3374         return self._download_retcode
3375
3376     @staticmethod
3377     def sanitize_info(info_dict, remove_private_keys=False):
3378         ''' Sanitize the infodict for converting to json '''
3379         if info_dict is None:
3380             return info_dict
3381         info_dict.setdefault('epoch', int(time.time()))
3382         info_dict.setdefault('_type', 'video')
3383         info_dict.setdefault('_version', {
3384             'version': __version__,
3385             'current_git_head': current_git_head(),
3386             'release_git_head': RELEASE_GIT_HEAD,
3387             'repository': REPOSITORY,
3388         })
3389
3390         if remove_private_keys:
3391             reject = lambda k, v: v is None or k.startswith('__') or k in {
3392                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3393                 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3394             }
3395         else:
3396             reject = lambda k, v: False
3397
3398         def filter_fn(obj):
3399             if isinstance(obj, dict):
3400                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3401             elif isinstance(obj, (list, tuple, set, LazyList)):
3402                 return list(map(filter_fn, obj))
3403             elif obj is None or isinstance(obj, (str, int, float, bool)):
3404                 return obj
3405             else:
3406                 return repr(obj)
3407
3408         return filter_fn(info_dict)
3409
3410     @staticmethod
3411     def filter_requested_info(info_dict, actually_filter=True):
3412         ''' Alias of sanitize_info for backward compatibility '''
3413         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3414
3415     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3416         for filename in set(filter(None, files_to_delete)):
3417             if msg:
3418                 self.to_screen(msg % filename)
3419             try:
3420                 os.remove(filename)
3421             except OSError:
3422                 self.report_warning(f'Unable to delete file {filename}')
3423             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3424                 del info['__files_to_move'][filename]
3425
3426     @staticmethod
3427     def post_extract(info_dict):
3428         def actual_post_extract(info_dict):
3429             if info_dict.get('_type') in ('playlist', 'multi_video'):
3430                 for video_dict in info_dict.get('entries', {}):
3431                     actual_post_extract(video_dict or {})
3432                 return
3433
3434             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3435             info_dict.update(post_extractor())
3436
3437         actual_post_extract(info_dict or {})
3438
3439     def run_pp(self, pp, infodict):
3440         files_to_delete = []
3441         if '__files_to_move' not in infodict:
3442             infodict['__files_to_move'] = {}
3443         try:
3444             files_to_delete, infodict = pp.run(infodict)
3445         except PostProcessingError as e:
3446             # Must be True and not 'only_download'
3447             if self.params.get('ignoreerrors') is True:
3448                 self.report_error(e)
3449                 return infodict
3450             raise
3451
3452         if not files_to_delete:
3453             return infodict
3454         if self.params.get('keepvideo', False):
3455             for f in files_to_delete:
3456                 infodict['__files_to_move'].setdefault(f, '')
3457         else:
3458             self._delete_downloaded_files(
3459                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3460         return infodict
3461
3462     def run_all_pps(self, key, info, *, additional_pps=None):
3463         self._forceprint(key, info)
3464         for pp in (additional_pps or []) + self._pps[key]:
3465             info = self.run_pp(pp, info)
3466         return info
3467
3468     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3469         info = dict(ie_info)
3470         info['__files_to_move'] = files_to_move or {}
3471         try:
3472             info = self.run_all_pps(key, info)
3473         except PostProcessingError as err:
3474             msg = f'Preprocessing: {err}'
3475             info.setdefault('__pending_error', msg)
3476             self.report_error(msg, is_error=False)
3477         return info, info.pop('__files_to_move', None)
3478
3479     def post_process(self, filename, info, files_to_move=None):
3480         """Run all the postprocessors on the given file."""
3481         info['filepath'] = filename
3482         info['__files_to_move'] = files_to_move or {}
3483         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3484         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3485         del info['__files_to_move']
3486         return self.run_all_pps('after_move', info)
3487
3488     def _make_archive_id(self, info_dict):
3489         video_id = info_dict.get('id')
3490         if not video_id:
3491             return
3492         # Future-proof against any change in case
3493         # and backwards compatibility with prior versions
3494         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3495         if extractor is None:
3496             url = str_or_none(info_dict.get('url'))
3497             if not url:
3498                 return
3499             # Try to find matching extractor for the URL and take its ie_key
3500             for ie_key, ie in self._ies.items():
3501                 if ie.suitable(url):
3502                     extractor = ie_key
3503                     break
3504             else:
3505                 return
3506         return make_archive_id(extractor, video_id)
3507
3508     def in_download_archive(self, info_dict):
3509         if not self.archive:
3510             return False
3511
3512         vid_ids = [self._make_archive_id(info_dict)]
3513         vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3514         return any(id_ in self.archive for id_ in vid_ids)
3515
3516     def record_download_archive(self, info_dict):
3517         fn = self.params.get('download_archive')
3518         if fn is None:
3519             return
3520         vid_id = self._make_archive_id(info_dict)
3521         assert vid_id
3522
3523         self.write_debug(f'Adding to archive: {vid_id}')
3524         if is_path_like(fn):
3525             with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3526                 archive_file.write(vid_id + '\n')
3527         self.archive.add(vid_id)
3528
3529     @staticmethod
3530     def format_resolution(format, default='unknown'):
3531         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3532             return 'audio only'
3533         if format.get('resolution') is not None:
3534             return format['resolution']
3535         if format.get('width') and format.get('height'):
3536             return '%dx%d' % (format['width'], format['height'])
3537         elif format.get('height'):
3538             return '%sp' % format['height']
3539         elif format.get('width'):
3540             return '%dx?' % format['width']
3541         return default
3542
3543     def _list_format_headers(self, *headers):
3544         if self.params.get('listformats_table', True) is not False:
3545             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3546         return headers
3547
3548     def _format_note(self, fdict):
3549         res = ''
3550         if fdict.get('ext') in ['f4f', 'f4m']:
3551             res += '(unsupported)'
3552         if fdict.get('language'):
3553             if res:
3554                 res += ' '
3555             res += '[%s]' % fdict['language']
3556         if fdict.get('format_note') is not None:
3557             if res:
3558                 res += ' '
3559             res += fdict['format_note']
3560         if fdict.get('tbr') is not None:
3561             if res:
3562                 res += ', '
3563             res += '%4dk' % fdict['tbr']
3564         if fdict.get('container') is not None:
3565             if res:
3566                 res += ', '
3567             res += '%s container' % fdict['container']
3568         if (fdict.get('vcodec') is not None
3569                 and fdict.get('vcodec') != 'none'):
3570             if res:
3571                 res += ', '
3572             res += fdict['vcodec']
3573             if fdict.get('vbr') is not None:
3574                 res += '@'
3575         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3576             res += 'video@'
3577         if fdict.get('vbr') is not None:
3578             res += '%4dk' % fdict['vbr']
3579         if fdict.get('fps') is not None:
3580             if res:
3581                 res += ', '
3582             res += '%sfps' % fdict['fps']
3583         if fdict.get('acodec') is not None:
3584             if res:
3585                 res += ', '
3586             if fdict['acodec'] == 'none':
3587                 res += 'video only'
3588             else:
3589                 res += '%-5s' % fdict['acodec']
3590         elif fdict.get('abr') is not None:
3591             if res:
3592                 res += ', '
3593             res += 'audio'
3594         if fdict.get('abr') is not None:
3595             res += '@%3dk' % fdict['abr']
3596         if fdict.get('asr') is not None:
3597             res += ' (%5dHz)' % fdict['asr']
3598         if fdict.get('filesize') is not None:
3599             if res:
3600                 res += ', '
3601             res += format_bytes(fdict['filesize'])
3602         elif fdict.get('filesize_approx') is not None:
3603             if res:
3604                 res += ', '
3605             res += '~' + format_bytes(fdict['filesize_approx'])
3606         return res
3607
3608     def _get_formats(self, info_dict):
3609         if info_dict.get('formats') is None:
3610             if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3611                 return [info_dict]
3612             return []
3613         return info_dict['formats']
3614
3615     def render_formats_table(self, info_dict):
3616         formats = self._get_formats(info_dict)
3617         if not formats:
3618             return
3619         if not self.params.get('listformats_table', True) is not False:
3620             table = [
3621                 [
3622                     format_field(f, 'format_id'),
3623                     format_field(f, 'ext'),
3624                     self.format_resolution(f),
3625                     self._format_note(f)
3626                 ] for f in formats if (f.get('preference') or 0) >= -1000]
3627             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3628
3629         def simplified_codec(f, field):
3630             assert field in ('acodec', 'vcodec')
3631             codec = f.get(field, 'unknown')
3632             if not codec:
3633                 return 'unknown'
3634             elif codec != 'none':
3635                 return '.'.join(codec.split('.')[:4])
3636
3637             if field == 'vcodec' and f.get('acodec') == 'none':
3638                 return 'images'
3639             elif field == 'acodec' and f.get('vcodec') == 'none':
3640                 return ''
3641             return self._format_out('audio only' if field == 'vcodec' else 'video only',
3642                                     self.Styles.SUPPRESS)
3643
3644         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3645         table = [
3646             [
3647                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3648                 format_field(f, 'ext'),
3649                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3650                 format_field(f, 'fps', '\t%d', func=round),
3651                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3652                 format_field(f, 'audio_channels', '\t%s'),
3653                 delim,
3654                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3655                 format_field(f, 'tbr', '\t%dk', func=round),
3656                 shorten_protocol_name(f.get('protocol', '')),
3657                 delim,
3658                 simplified_codec(f, 'vcodec'),
3659                 format_field(f, 'vbr', '\t%dk', func=round),
3660                 simplified_codec(f, 'acodec'),
3661                 format_field(f, 'abr', '\t%dk', func=round),
3662                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3663                 join_nonempty(
3664                     self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3665                     format_field(f, 'language', '[%s]'),
3666                     join_nonempty(format_field(f, 'format_note'),
3667                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3668                                   delim=', '),
3669                     delim=' '),
3670             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3671         header_line = self._list_format_headers(
3672             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3673             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3674
3675         return render_table(
3676             header_line, table, hide_empty=True,
3677             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3678
3679     def render_thumbnails_table(self, info_dict):
3680         thumbnails = list(info_dict.get('thumbnails') or [])
3681         if not thumbnails:
3682             return None
3683         return render_table(
3684             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3685             [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3686
3687     def render_subtitles_table(self, video_id, subtitles):
3688         def _row(lang, formats):
3689             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3690             if len(set(names)) == 1:
3691                 names = [] if names[0] == 'unknown' else names[:1]
3692             return [lang, ', '.join(names), ', '.join(exts)]
3693
3694         if not subtitles:
3695             return None
3696         return render_table(
3697             self._list_format_headers('Language', 'Name', 'Formats'),
3698             [_row(lang, formats) for lang, formats in subtitles.items()],
3699             hide_empty=True)
3700
3701     def __list_table(self, video_id, name, func, *args):
3702         table = func(*args)
3703         if not table:
3704             self.to_screen(f'{video_id} has no {name}')
3705             return
3706         self.to_screen(f'[info] Available {name} for {video_id}:')
3707         self.to_stdout(table)
3708
3709     def list_formats(self, info_dict):
3710         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3711
3712     def list_thumbnails(self, info_dict):
3713         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3714
3715     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3716         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3717
3718     def urlopen(self, req):
3719         """ Start an HTTP download """
3720         if isinstance(req, str):
3721             req = sanitized_Request(req)
3722         return self._opener.open(req, timeout=self._socket_timeout)
3723
3724     def print_debug_header(self):
3725         if not self.params.get('verbose'):
3726             return
3727
3728         from . import _IN_CLI  # Must be delayed import
3729
3730         # These imports can be slow. So import them only as needed
3731         from .extractor.extractors import _LAZY_LOADER
3732         from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3733
3734         def get_encoding(stream):
3735             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3736             if not supports_terminal_sequences(stream):
3737                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3738                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3739             return ret
3740
3741         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3742             locale.getpreferredencoding(),
3743             sys.getfilesystemencoding(),
3744             self.get_encoding(),
3745             ', '.join(
3746                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3747                 if stream is not None and key != 'console')
3748         )
3749
3750         logger = self.params.get('logger')
3751         if logger:
3752             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3753             write_debug(encoding_str)
3754         else:
3755             write_string(f'[debug] {encoding_str}\n', encoding=None)
3756             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3757
3758         source = detect_variant()
3759         if VARIANT not in (None, 'pip'):
3760             source += '*'
3761         write_debug(join_nonempty(
3762             f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3763             __version__,
3764             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3765             '' if source == 'unknown' else f'({source})',
3766             '' if _IN_CLI else 'API',
3767             delim=' '))
3768
3769         if not _IN_CLI:
3770             write_debug(f'params: {self.params}')
3771
3772         if not _LAZY_LOADER:
3773             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3774                 write_debug('Lazy loading extractors is forcibly disabled')
3775             else:
3776                 write_debug('Lazy loading extractors is disabled')
3777         if plugin_extractors or plugin_postprocessors:
3778             write_debug('Plugins: %s' % [
3779                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3780                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3781         if self.params['compat_opts']:
3782             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3783
3784         if current_git_head():
3785             write_debug(f'Git HEAD: {current_git_head()}')
3786         write_debug(system_identifier())
3787
3788         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3789         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3790         if ffmpeg_features:
3791             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3792
3793         exe_versions['rtmpdump'] = rtmpdump_version()
3794         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3795         exe_str = ', '.join(
3796             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3797         ) or 'none'
3798         write_debug('exe versions: %s' % exe_str)
3799
3800         from .compat.compat_utils import get_package_info
3801         from .dependencies import available_dependencies
3802
3803         write_debug('Optional libraries: %s' % (', '.join(sorted({
3804             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3805         })) or 'none'))
3806
3807         self._setup_opener()
3808         proxy_map = {}
3809         for handler in self._opener.handlers:
3810             if hasattr(handler, 'proxies'):
3811                 proxy_map.update(handler.proxies)
3812         write_debug(f'Proxy map: {proxy_map}')
3813
3814         # Not implemented
3815         if False and self.params.get('call_home'):
3816             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3817             write_debug('Public IP address: %s' % ipaddr)
3818             latest_version = self.urlopen(
3819                 'https://yt-dl.org/latest/version').read().decode()
3820             if version_tuple(latest_version) > version_tuple(__version__):
3821                 self.report_warning(
3822                     'You are using an outdated version (newest version: %s)! '
3823                     'See https://yt-dl.org/update if you need help updating.' %
3824                     latest_version)
3825
3826     def _setup_opener(self):
3827         if hasattr(self, '_opener'):
3828             return
3829         timeout_val = self.params.get('socket_timeout')
3830         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3831
3832         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3833         opts_cookiefile = self.params.get('cookiefile')
3834         opts_proxy = self.params.get('proxy')
3835
3836         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3837
3838         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3839         if opts_proxy is not None:
3840             if opts_proxy == '':
3841                 proxies = {}
3842             else:
3843                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3844         else:
3845             proxies = urllib.request.getproxies()
3846             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3847             if 'http' in proxies and 'https' not in proxies:
3848                 proxies['https'] = proxies['http']
3849         proxy_handler = PerRequestProxyHandler(proxies)
3850
3851         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3852         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3853         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3854         redirect_handler = YoutubeDLRedirectHandler()
3855         data_handler = urllib.request.DataHandler()
3856
3857         # When passing our own FileHandler instance, build_opener won't add the
3858         # default FileHandler and allows us to disable the file protocol, which
3859         # can be used for malicious purposes (see
3860         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3861         file_handler = urllib.request.FileHandler()
3862
3863         def file_open(*args, **kwargs):
3864             raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3865         file_handler.file_open = file_open
3866
3867         opener = urllib.request.build_opener(
3868             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3869
3870         # Delete the default user-agent header, which would otherwise apply in
3871         # cases where our custom HTTP handler doesn't come into play
3872         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3873         opener.addheaders = []
3874         self._opener = opener
3875
3876     def encode(self, s):
3877         if isinstance(s, bytes):
3878             return s  # Already encoded
3879
3880         try:
3881             return s.encode(self.get_encoding())
3882         except UnicodeEncodeError as err:
3883             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3884             raise
3885
3886     def get_encoding(self):
3887         encoding = self.params.get('encoding')
3888         if encoding is None:
3889             encoding = preferredencoding()
3890         return encoding
3891
3892     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3893         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3894         if overwrite is None:
3895             overwrite = self.params.get('overwrites', True)
3896         if not self.params.get('writeinfojson'):
3897             return False
3898         elif not infofn:
3899             self.write_debug(f'Skipping writing {label} infojson')
3900             return False
3901         elif not self._ensure_dir_exists(infofn):
3902             return None
3903         elif not overwrite and os.path.exists(infofn):
3904             self.to_screen(f'[info] {label.title()} metadata is already present')
3905             return 'exists'
3906
3907         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3908         try:
3909             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3910             return True
3911         except OSError:
3912             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3913             return None
3914
3915     def _write_description(self, label, ie_result, descfn):
3916         ''' Write description and returns True = written, False = skip, None = error '''
3917         if not self.params.get('writedescription'):
3918             return False
3919         elif not descfn:
3920             self.write_debug(f'Skipping writing {label} description')
3921             return False
3922         elif not self._ensure_dir_exists(descfn):
3923             return None
3924         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3925             self.to_screen(f'[info] {label.title()} description is already present')
3926         elif ie_result.get('description') is None:
3927             self.report_warning(f'There\'s no {label} description to write')
3928             return False
3929         else:
3930             try:
3931                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3932                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3933                     descfile.write(ie_result['description'])
3934             except OSError:
3935                 self.report_error(f'Cannot write {label} description file {descfn}')
3936                 return None
3937         return True
3938
3939     def _write_subtitles(self, info_dict, filename):
3940         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3941         ret = []
3942         subtitles = info_dict.get('requested_subtitles')
3943         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3944             # subtitles download errors are already managed as troubles in relevant IE
3945             # that way it will silently go on when used with unsupporting IE
3946             return ret
3947
3948         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3949         if not sub_filename_base:
3950             self.to_screen('[info] Skipping writing video subtitles')
3951             return ret
3952         for sub_lang, sub_info in subtitles.items():
3953             sub_format = sub_info['ext']
3954             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3955             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3956             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3957             if existing_sub:
3958                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3959                 sub_info['filepath'] = existing_sub
3960                 ret.append((existing_sub, sub_filename_final))
3961                 continue
3962
3963             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3964             if sub_info.get('data') is not None:
3965                 try:
3966                     # Use newline='' to prevent conversion of newline characters
3967                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3968                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3969                         subfile.write(sub_info['data'])
3970                     sub_info['filepath'] = sub_filename
3971                     ret.append((sub_filename, sub_filename_final))
3972                     continue
3973                 except OSError:
3974                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3975                     return None
3976
3977             try:
3978                 sub_copy = sub_info.copy()
3979                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3980                 self.dl(sub_filename, sub_copy, subtitle=True)
3981                 sub_info['filepath'] = sub_filename
3982                 ret.append((sub_filename, sub_filename_final))
3983             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3984                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3985                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3986                     if not self.params.get('ignoreerrors'):
3987                         self.report_error(msg)
3988                     raise DownloadError(msg)
3989                 self.report_warning(msg)
3990         return ret
3991
3992     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3993         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3994         write_all = self.params.get('write_all_thumbnails', False)
3995         thumbnails, ret = [], []
3996         if write_all or self.params.get('writethumbnail', False):
3997             thumbnails = info_dict.get('thumbnails') or []
3998         multiple = write_all and len(thumbnails) > 1
3999
4000         if thumb_filename_base is None:
4001             thumb_filename_base = filename
4002         if thumbnails and not thumb_filename_base:
4003             self.write_debug(f'Skipping writing {label} thumbnail')
4004             return ret
4005
4006         for idx, t in list(enumerate(thumbnails))[::-1]:
4007             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4008             thumb_display_id = f'{label} thumbnail {t["id"]}'
4009             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4010             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4011
4012             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4013             if existing_thumb:
4014                 self.to_screen('[info] %s is already present' % (
4015                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4016                 t['filepath'] = existing_thumb
4017                 ret.append((existing_thumb, thumb_filename_final))
4018             else:
4019                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4020                 try:
4021                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
4022                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4023                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4024                         shutil.copyfileobj(uf, thumbf)
4025                     ret.append((thumb_filename, thumb_filename_final))
4026                     t['filepath'] = thumb_filename
4027                 except network_exceptions as err:
4028                     thumbnails.pop(idx)
4029                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4030             if ret and not write_all:
4031                 break
4032         return ret