yt_dlp/YoutubeDL.py

   1 import collections
   2 import contextlib
   3 import datetime
   4 import errno
   5 import fileinput
   6 import functools
   7 import io
   8 import itertools
   9 import json
  10 import locale
  11 import operator
  12 import os
  13 import random
  14 import re
  15 import shutil
  16 import subprocess
  17 import sys
  18 import tempfile
  19 import time
  20 import tokenize
  21 import traceback
  22 import unicodedata
  23 import urllib.request
  24 from string import ascii_letters
  25
  26 from .cache import Cache
  27 from .compat import compat_os_name, compat_shlex_quote
  28 from .cookies import load_cookies
  29 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  30 from .downloader.rtmp import rtmpdump_version
  31 from .extractor import gen_extractor_classes, get_info_extractor
  32 from .extractor.openload import PhantomJSwrapper
  33 from .minicurses import format_text
  34 from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
  35 from .postprocessor import (
  36     EmbedThumbnailPP,
  37     FFmpegFixupDuplicateMoovPP,
  38     FFmpegFixupDurationPP,
  39     FFmpegFixupM3u8PP,
  40     FFmpegFixupM4aPP,
  41     FFmpegFixupStretchedPP,
  42     FFmpegFixupTimestampPP,
  43     FFmpegMergerPP,
  44     FFmpegPostProcessor,
  45     FFmpegVideoConvertorPP,
  46     MoveFilesAfterDownloadPP,
  47     get_postprocessor,
  48 )
  49 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
  50 from .update import detect_variant
  51 from .utils import (
  52     DEFAULT_OUTTMPL,
  53     IDENTITY,
  54     LINK_TEMPLATES,
  55     NO_DEFAULT,
  56     NUMBER_RE,
  57     OUTTMPL_TYPES,
  58     POSTPROCESS_WHEN,
  59     STR_FORMAT_RE_TMPL,
  60     STR_FORMAT_TYPES,
  61     ContentTooShortError,
  62     DateRange,
  63     DownloadCancelled,
  64     DownloadError,
  65     EntryNotInPlaylist,
  66     ExistingVideoReached,
  67     ExtractorError,
  68     GeoRestrictedError,
  69     HEADRequest,
  70     ISO3166Utils,
  71     LazyList,
  72     MaxDownloadsReached,
  73     Namespace,
  74     PagedList,
  75     PerRequestProxyHandler,
  76     PlaylistEntries,
  77     Popen,
  78     PostProcessingError,
  79     ReExtractInfo,
  80     RejectedVideoReached,
  81     SameFileError,
  82     UnavailableVideoError,
  83     YoutubeDLCookieProcessor,
  84     YoutubeDLHandler,
  85     YoutubeDLRedirectHandler,
  86     age_restricted,
  87     args_to_str,
  88     bug_reports_message,
  89     date_from_str,
  90     determine_ext,
  91     determine_protocol,
  92     encode_compat_str,
  93     encodeFilename,
  94     error_to_compat_str,
  95     escapeHTML,
  96     expand_path,
  97     filter_dict,
  98     float_or_none,
  99     format_bytes,
 100     format_decimal_suffix,
 101     format_field,
 102     formatSeconds,
 103     get_domain,
 104     int_or_none,
 105     iri_to_uri,
 106     join_nonempty,
 107     locked_file,
 108     make_dir,
 109     make_HTTPS_handler,
 110     merge_headers,
 111     network_exceptions,
 112     number_of_digits,
 113     orderedSet,
 114     parse_filesize,
 115     preferredencoding,
 116     prepend_extension,
 117     register_socks_protocols,
 118     remove_terminal_sequences,
 119     render_table,
 120     replace_extension,
 121     sanitize_filename,
 122     sanitize_path,
 123     sanitize_url,
 124     sanitized_Request,
 125     std_headers,
 126     str_or_none,
 127     strftime_or_none,
 128     subtitles_filename,
 129     supports_terminal_sequences,
 130     system_identifier,
 131     timetuple_from_msec,
 132     to_high_limit_path,
 133     traverse_obj,
 134     try_get,
 135     url_basename,
 136     variadic,
 137     version_tuple,
 138     windows_enable_vt_mode,
 139     write_json_file,
 140     write_string,
 141 )
 142 from .version import RELEASE_GIT_HEAD, __version__
 143
 144 if compat_os_name == 'nt':
 145     import ctypes
 146
 147
 148 class YoutubeDL:
 149     """YoutubeDL class.
 150
 151     YoutubeDL objects are the ones responsible of downloading the
 152     actual video file and writing it to disk if the user has requested
 153     it, among some other tasks. In most cases there should be one per
 154     program. As, given a video URL, the downloader doesn't know how to
 155     extract all the needed information, task that InfoExtractors do, it
 156     has to pass the URL to one of them.
 157
 158     For this, YoutubeDL objects have a method that allows
 159     InfoExtractors to be registered in a given order. When it is passed
 160     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 161     finds that reports being able to handle it. The InfoExtractor extracts
 162     all the information about the video or videos the URL refers to, and
 163     YoutubeDL process the extracted information, possibly using a File
 164     Downloader to download the video.
 165
 166     YoutubeDL objects accept a lot of parameters. In order not to saturate
 167     the object constructor with arguments, it receives a dictionary of
 168     options instead. These options are available through the params
 169     attribute for the InfoExtractors to use. The YoutubeDL also
 170     registers itself as the downloader in charge for the InfoExtractors
 171     that are added to it, so this is a "mutual registration".
 172
 173     Available options:
 174
 175     username:          Username for authentication purposes.
 176     password:          Password for authentication purposes.
 177     videopassword:     Password for accessing a video.
 178     ap_mso:            Adobe Pass multiple-system operator identifier.
 179     ap_username:       Multiple-system operator account username.
 180     ap_password:       Multiple-system operator account password.
 181     usenetrc:          Use netrc for authentication instead.
 182     verbose:           Print additional info to stdout.
 183     quiet:             Do not print messages to stdout.
 184     no_warnings:       Do not print out anything for warnings.
 185     forceprint:        A dict with keys WHEN mapped to a list of templates to
 186                        print to stdout. The allowed keys are video or any of the
 187                        items in utils.POSTPROCESS_WHEN.
 188                        For compatibility, a single list is also accepted
 189     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 190                        a list of tuples with (template, filename)
 191     forcejson:         Force printing info_dict as JSON.
 192     dump_single_json:  Force printing the info_dict of the whole playlist
 193                        (or video) as a single JSON line.
 194     force_write_download_archive: Force writing download archive regardless
 195                        of 'skip_download' or 'simulate'.
 196     simulate:          Do not download the video files. If unset (or None),
 197                        simulate only if listsubtitles, listformats or list_thumbnails is used
 198     format:            Video format code. see "FORMAT SELECTION" for more details.
 199                        You can also pass a function. The function takes 'ctx' as
 200                        argument and returns the formats to download.
 201                        See "build_format_selector" for an implementation
 202     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 203     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 204                        extracting metadata even if the video is not actually
 205                        available for download (experimental)
 206     format_sort:       A list of fields by which to sort the video formats.
 207                        See "Sorting Formats" for more details.
 208     format_sort_force: Force the given format_sort. see "Sorting Formats"
 209                        for more details.
 210     prefer_free_formats: Whether to prefer video formats with free containers
 211                        over non-free ones of same quality.
 212     allow_multiple_video_streams:   Allow multiple video streams to be merged
 213                        into a single file
 214     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 215                        into a single file
 216     check_formats      Whether to test if the formats are downloadable.
 217                        Can be True (check all), False (check none),
 218                        'selected' (check selected formats),
 219                        or None (check only if requested by extractor)
 220     paths:             Dictionary of output paths. The allowed keys are 'home'
 221                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 222     outtmpl:           Dictionary of templates for output names. Allowed keys
 223                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 224                        For compatibility with youtube-dl, a single string can also be used
 225     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 226     restrictfilenames: Do not allow "&" and spaces in file names
 227     trim_file_name:    Limit length of filename (extension excluded)
 228     windowsfilenames:  Force the filenames to be windows compatible
 229     ignoreerrors:      Do not stop on download/postprocessing errors.
 230                        Can be 'only_download' to ignore only download errors.
 231                        Default is 'only_download' for CLI, but False for API
 232     skip_playlist_after_errors: Number of allowed failures until the rest of
 233                        the playlist is skipped
 234     force_generic_extractor: Force downloader to use the generic extractor
 235     overwrites:        Overwrite all video and metadata files if True,
 236                        overwrite only non-video files if None
 237                        and don't overwrite any file if False
 238                        For compatibility with youtube-dl,
 239                        "nooverwrites" may also be used instead
 240     playlist_items:    Specific indices of playlist to download.
 241     playlistrandom:    Download playlist items in random order.
 242     lazy_playlist:     Process playlist entries as they are received.
 243     matchtitle:        Download only matching titles.
 244     rejecttitle:       Reject downloads for matching titles.
 245     logger:            Log messages to a logging.Logger instance.
 246     logtostderr:       Log messages to stderr instead of stdout.
 247     consoletitle:       Display progress in console window's titlebar.
 248     writedescription:  Write the video description to a .description file
 249     writeinfojson:     Write the video description to a .info.json file
 250     clean_infojson:    Remove private fields from the infojson
 251     getcomments:       Extract video comments. This will not be written to disk
 252                        unless writeinfojson is also given
 253     writeannotations:  Write the video annotations to a .annotations.xml file
 254     writethumbnail:    Write the thumbnail image to a file
 255     allow_playlist_files: Whether to write playlists' description, infojson etc
 256                        also to disk when using the 'write*' options
 257     write_all_thumbnails:  Write all thumbnail formats to files
 258     writelink:         Write an internet shortcut file, depending on the
 259                        current platform (.url/.webloc/.desktop)
 260     writeurllink:      Write a Windows internet shortcut file (.url)
 261     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 262     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 263     writesubtitles:    Write the video subtitles to a file
 264     writeautomaticsub: Write the automatically generated subtitles to a file
 265     listsubtitles:     Lists all available subtitles for the video
 266     subtitlesformat:   The format code for subtitles
 267     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 268                        The list may contain "all" to refer to all the available
 269                        subtitles. The language can be prefixed with a "-" to
 270                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 271     keepvideo:         Keep the video file after post-processing
 272     daterange:         A DateRange object, download only if the upload_date is in the range.
 273     skip_download:     Skip the actual download of the video file
 274     cachedir:          Location of the cache files in the filesystem.
 275                        False to disable filesystem cache.
 276     noplaylist:        Download single video instead of a playlist if in doubt.
 277     age_limit:         An integer representing the user's age in years.
 278                        Unsuitable videos for the given age are skipped.
 279     min_views:         An integer representing the minimum view count the video
 280                        must have in order to not be skipped.
 281                        Videos without view count information are always
 282                        downloaded. None for no limit.
 283     max_views:         An integer representing the maximum view count.
 284                        Videos that are more popular than that are not
 285                        downloaded.
 286                        Videos without view count information are always
 287                        downloaded. None for no limit.
 288     download_archive:  File name of a file where all downloads are recorded.
 289                        Videos already present in the file are not downloaded
 290                        again.
 291     break_on_existing: Stop the download process after attempting to download a
 292                        file that is in the archive.
 293     break_on_reject:   Stop the download process when encountering a video that
 294                        has been filtered out.
 295     break_per_url:     Whether break_on_reject and break_on_existing
 296                        should act on each input URL as opposed to for the entire queue
 297     cookiefile:        File name or text stream from where cookies should be read and dumped to
 298     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 299                        name/pathfrom where cookies are loaded, and the name of the
 300                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 301     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 302                        support RFC 5746 secure renegotiation
 303     nocheckcertificate:  Do not verify SSL certificates
 304     client_certificate:  Path to client certificate file in PEM format. May include the private key
 305     client_certificate_key:  Path to private key file for client certificate
 306     client_certificate_password:  Password for client certificate private key, if encrypted.
 307                         If not provided and the key is encrypted, yt-dlp will ask interactively
 308     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 309                        (Only supported by some extractors)
 310     http_headers:      A dictionary of custom headers to be used for all requests
 311     proxy:             URL of the proxy server to use
 312     geo_verification_proxy:  URL of the proxy to use for IP address verification
 313                        on geo-restricted sites.
 314     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 315     bidi_workaround:   Work around buggy terminals without bidirectional text
 316                        support, using fridibi
 317     debug_printtraffic:Print out sent and received HTTP traffic
 318     default_search:    Prepend this string if an input url is not valid.
 319                        'auto' for elaborate guessing
 320     encoding:          Use this encoding instead of the system-specified.
 321     extract_flat:      Whether to resolve and process url_results further
 322                        * False:     Always process (default)
 323                        * True:      Never process
 324                        * 'in_playlist': Do not process inside playlist/multi_video
 325                        * 'discard': Always process, but don't return the result
 326                                     from inside playlist/multi_video
 327                        * 'discard_in_playlist': Same as "discard", but only for
 328                                     playlists (not multi_video)
 329     wait_for_video:    If given, wait for scheduled streams to become available.
 330                        The value should be a tuple containing the range
 331                        (min_secs, max_secs) to wait between retries
 332     postprocessors:    A list of dictionaries, each with an entry
 333                        * key:  The name of the postprocessor. See
 334                                yt_dlp/postprocessor/__init__.py for a list.
 335                        * when: When to run the postprocessor. Allowed values are
 336                                the entries of utils.POSTPROCESS_WHEN
 337                                Assumed to be 'post_process' if not given
 338     progress_hooks:    A list of functions that get called on download
 339                        progress, with a dictionary with the entries
 340                        * status: One of "downloading", "error", or "finished".
 341                                  Check this first and ignore unknown values.
 342                        * info_dict: The extracted info_dict
 343
 344                        If status is one of "downloading", or "finished", the
 345                        following properties may also be present:
 346                        * filename: The final filename (always present)
 347                        * tmpfilename: The filename we're currently writing to
 348                        * downloaded_bytes: Bytes on disk
 349                        * total_bytes: Size of the whole file, None if unknown
 350                        * total_bytes_estimate: Guess of the eventual file size,
 351                                                None if unavailable.
 352                        * elapsed: The number of seconds since download started.
 353                        * eta: The estimated time in seconds, None if unknown
 354                        * speed: The download speed in bytes/second, None if
 355                                 unknown
 356                        * fragment_index: The counter of the currently
 357                                          downloaded video fragment.
 358                        * fragment_count: The number of fragments (= individual
 359                                          files that will be merged)
 360
 361                        Progress hooks are guaranteed to be called at least once
 362                        (with status "finished") if the download is successful.
 363     postprocessor_hooks:  A list of functions that get called on postprocessing
 364                        progress, with a dictionary with the entries
 365                        * status: One of "started", "processing", or "finished".
 366                                  Check this first and ignore unknown values.
 367                        * postprocessor: Name of the postprocessor
 368                        * info_dict: The extracted info_dict
 369
 370                        Progress hooks are guaranteed to be called at least twice
 371                        (with status "started" and "finished") if the processing is successful.
 372     merge_output_format: Extension to use when merging formats.
 373     final_ext:         Expected final extension; used to detect when the file was
 374                        already downloaded and converted
 375     fixup:             Automatically correct known faults of the file.
 376                        One of:
 377                        - "never": do nothing
 378                        - "warn": only emit a warning
 379                        - "detect_or_warn": check whether we can do anything
 380                                            about it, warn otherwise (default)
 381     source_address:    Client-side IP address to bind to.
 382     sleep_interval_requests: Number of seconds to sleep between requests
 383                        during extraction
 384     sleep_interval:    Number of seconds to sleep before each download when
 385                        used alone or a lower bound of a range for randomized
 386                        sleep before each download (minimum possible number
 387                        of seconds to sleep) when used along with
 388                        max_sleep_interval.
 389     max_sleep_interval:Upper bound of a range for randomized sleep before each
 390                        download (maximum possible number of seconds to sleep).
 391                        Must only be used along with sleep_interval.
 392                        Actual sleep time will be a random float from range
 393                        [sleep_interval; max_sleep_interval].
 394     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 395     listformats:       Print an overview of available video formats and exit.
 396     list_thumbnails:   Print a table of all thumbnails and exit.
 397     match_filter:      A function that gets called for every video with the signature
 398                        (info_dict, *, incomplete: bool) -> Optional[str]
 399                        For backward compatibility with youtube-dl, the signature
 400                        (info_dict) -> Optional[str] is also allowed.
 401                        - If it returns a message, the video is ignored.
 402                        - If it returns None, the video is downloaded.
 403                        - If it returns utils.NO_DEFAULT, the user is interactively
 404                          asked whether to download the video.
 405                        match_filter_func in utils.py is one example for this.
 406     no_color:          Do not emit color codes in output.
 407     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 408                        HTTP header
 409     geo_bypass_country:
 410                        Two-letter ISO 3166-2 country code that will be used for
 411                        explicit geographic restriction bypassing via faking
 412                        X-Forwarded-For HTTP header
 413     geo_bypass_ip_block:
 414                        IP range in CIDR notation that will be used similarly to
 415                        geo_bypass_country
 416     external_downloader: A dictionary of protocol keys and the executable of the
 417                        external downloader to use for it. The allowed protocols
 418                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 419                        Set the value to 'native' to use the native downloader
 420     compat_opts:       Compatibility options. See "Differences in default behavior".
 421                        The following options do not work when used through the API:
 422                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 423                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 424                        Refer __init__.py for their implementation
 425     progress_template: Dictionary of templates for progress outputs.
 426                        Allowed keys are 'download', 'postprocess',
 427                        'download-title' (console title) and 'postprocess-title'.
 428                        The template is mapped on a dictionary with keys 'progress' and 'info'
 429     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 430                        as argument and returns the time to sleep in seconds.
 431                        Allowed keys are 'http', 'fragment', 'file_access'
 432     download_ranges:   A callback function that gets called for every video with
 433                        the signature (info_dict, ydl) -> Iterable[Section].
 434                        Only the returned sections will be downloaded.
 435                        Each Section is a dict with the following keys:
 436                        * start_time: Start time of the section in seconds
 437                        * end_time: End time of the section in seconds
 438                        * title: Section title (Optional)
 439                        * index: Section number (Optional)
 440     force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
 441     noprogress:        Do not print the progress bar
 442
 443     The following parameters are not used by YoutubeDL itself, they are used by
 444     the downloader (see yt_dlp/downloader/common.py):
 445     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 446     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 447     continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 448     external_downloader_args, concurrent_fragment_downloads.
 449
 450     The following options are used by the post processors:
 451     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 452                        to the binary or its containing directory.
 453     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 454                        and a list of additional command-line arguments for the
 455                        postprocessor/executable. The dict can also have "PP+EXE" keys
 456                        which are used when the given exe is used by the given PP.
 457                        Use 'default' as the name for arguments to passed to all PP
 458                        For compatibility with youtube-dl, a single list of args
 459                        can also be used
 460
 461     The following options are used by the extractors:
 462     extractor_retries: Number of times to retry for known errors
 463     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 464     hls_split_discontinuity: Split HLS playlists to different formats at
 465                        discontinuities such as ad breaks (default: False)
 466     extractor_args:    A dictionary of arguments to be passed to the extractors.
 467                        See "EXTRACTOR ARGUMENTS" for details.
 468                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 469     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 470
 471     The following options are deprecated and may be removed in the future:
 472
 473     playliststart:     - Use playlist_items
 474                        Playlist item to start at.
 475     playlistend:       - Use playlist_items
 476                        Playlist item to end at.
 477     playlistreverse:   - Use playlist_items
 478                        Download playlist items in reverse order.
 479     forceurl:          - Use forceprint
 480                        Force printing final URL.
 481     forcetitle:        - Use forceprint
 482                        Force printing title.
 483     forceid:           - Use forceprint
 484                        Force printing ID.
 485     forcethumbnail:    - Use forceprint
 486                        Force printing thumbnail URL.
 487     forcedescription:  - Use forceprint
 488                        Force printing description.
 489     forcefilename:     - Use forceprint
 490                        Force printing final filename.
 491     forceduration:     - Use forceprint
 492                        Force printing duration.
 493     allsubtitles:      - Use subtitleslangs = ['all']
 494                        Downloads all the subtitles of the video
 495                        (requires writesubtitles or writeautomaticsub)
 496     include_ads:       - Doesn't work
 497                        Download ads as well
 498     call_home:         - Not implemented
 499                        Boolean, true iff we are allowed to contact the
 500                        yt-dlp servers for debugging.
 501     post_hooks:        - Register a custom postprocessor
 502                        A list of functions that get called as the final step
 503                        for each video file, after all postprocessors have been
 504                        called. The filename will be passed as the only argument.
 505     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 506                        Use the native HLS downloader instead of ffmpeg/avconv
 507                        if True, otherwise use ffmpeg/avconv if False, otherwise
 508                        use downloader suggested by extractor if None.
 509     prefer_ffmpeg:     - avconv support is deprecated
 510                        If False, use avconv instead of ffmpeg if both are available,
 511                        otherwise prefer ffmpeg.
 512     youtube_include_dash_manifest: - Use extractor_args
 513                        If True (default), DASH manifests and related
 514                        data will be downloaded and processed by extractor.
 515                        You can reduce network I/O by disabling it if you don't
 516                        care about DASH. (only for youtube)
 517     youtube_include_hls_manifest: - Use extractor_args
 518                        If True (default), HLS manifests and related
 519                        data will be downloaded and processed by extractor.
 520                        You can reduce network I/O by disabling it if you don't
 521                        care about HLS. (only for youtube)
 522     """
 523
 524     _NUMERIC_FIELDS = {
 525         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 526         'timestamp', 'release_timestamp',
 527         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 528         'average_rating', 'comment_count', 'age_limit',
 529         'start_time', 'end_time',
 530         'chapter_number', 'season_number', 'episode_number',
 531         'track_number', 'disc_number', 'release_year',
 532     }
 533
 534     _format_fields = {
 535         # NB: Keep in sync with the docstring of extractor/common.py
 536         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 537         'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
 538         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
 539         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 540         'preference', 'language', 'language_preference', 'quality', 'source_preference',
 541         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
 542         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 543     }
 544     _format_selection_exts = {
 545         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 546         'video': {'mp4', 'flv', 'webm', '3gp'},
 547         'storyboards': {'mhtml'},
 548     }
 549
 550     def __init__(self, params=None, auto_init=True):
 551         """Create a FileDownloader object with the given options.
 552         @param auto_init    Whether to load the default extractors and print header (if verbose).
 553                             Set to 'no_verbose_header' to not print the header
 554         """
 555         if params is None:
 556             params = {}
 557         self.params = params
 558         self._ies = {}
 559         self._ies_instances = {}
 560         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 561         self._printed_messages = set()
 562         self._first_webpage_request = True
 563         self._post_hooks = []
 564         self._progress_hooks = []
 565         self._postprocessor_hooks = []
 566         self._download_retcode = 0
 567         self._num_downloads = 0
 568         self._num_videos = 0
 569         self._playlist_level = 0
 570         self._playlist_urls = set()
 571         self.cache = Cache(self)
 572
 573         windows_enable_vt_mode()
 574         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 575         self._out_files = Namespace(
 576             out=stdout,
 577             error=sys.stderr,
 578             screen=sys.stderr if self.params.get('quiet') else stdout,
 579             console=None if compat_os_name == 'nt' else next(
 580                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 581         )
 582         self._allow_colors = Namespace(**{
 583             type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
 584             for type_, stream in self._out_files.items_ if type_ != 'console'
 585         })
 586
 587         MIN_SUPPORTED, MIN_RECOMMENDED = (3, 6), (3, 7)
 588         current_version = sys.version_info[:2]
 589         if current_version < MIN_RECOMMENDED:
 590             msg = ('Support for Python version %d.%d has been deprecated. '
 591                    'See  https://github.com/yt-dlp/yt-dlp/issues/3764  for more details.'
 592                    '\n                    You will no longer receive updates on this version')
 593             if current_version < MIN_SUPPORTED:
 594                 msg = 'Python version %d.%d is no longer supported'
 595             self.deprecation_warning(
 596                 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
 597
 598         if self.params.get('allow_unplayable_formats'):
 599             self.report_warning(
 600                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 601                 'This is a developer option intended for debugging. \n'
 602                 '         If you experience any issues while using this option, '
 603                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 604
 605         def check_deprecated(param, option, suggestion):
 606             if self.params.get(param) is not None:
 607                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 608                 return True
 609             return False
 610
 611         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 612             if self.params.get('geo_verification_proxy') is None:
 613                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 614
 615         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 616         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 617         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 618
 619         for msg in self.params.get('_warnings', []):
 620             self.report_warning(msg)
 621         for msg in self.params.get('_deprecation_warnings', []):
 622             self.deprecation_warning(msg)
 623
 624         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 625         if 'list-formats' in self.params['compat_opts']:
 626             self.params['listformats_table'] = False
 627
 628         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 629             # nooverwrites was unnecessarily changed to overwrites
 630             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 631             # This ensures compatibility with both keys
 632             self.params['overwrites'] = not self.params['nooverwrites']
 633         elif self.params.get('overwrites') is None:
 634             self.params.pop('overwrites', None)
 635         else:
 636             self.params['nooverwrites'] = not self.params['overwrites']
 637
 638         self.params.setdefault('forceprint', {})
 639         self.params.setdefault('print_to_file', {})
 640
 641         # Compatibility with older syntax
 642         if not isinstance(params['forceprint'], dict):
 643             self.params['forceprint'] = {'video': params['forceprint']}
 644
 645         if self.params.get('bidi_workaround', False):
 646             try:
 647                 import pty
 648                 master, slave = pty.openpty()
 649                 width = shutil.get_terminal_size().columns
 650                 width_args = [] if width is None else ['-w', str(width)]
 651                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 652                 try:
 653                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 654                 except OSError:
 655                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 656                 self._output_channel = os.fdopen(master, 'rb')
 657             except OSError as ose:
 658                 if ose.errno == errno.ENOENT:
 659                     self.report_warning(
 660                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 661                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 662                 else:
 663                     raise
 664
 665         if auto_init:
 666             if auto_init != 'no_verbose_header':
 667                 self.print_debug_header()
 668             self.add_default_info_extractors()
 669
 670         if (sys.platform != 'win32'
 671                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 672                 and not self.params.get('restrictfilenames', False)):
 673             # Unicode filesystem API will throw errors (#1474, #13027)
 674             self.report_warning(
 675                 'Assuming --restrict-filenames since file system encoding '
 676                 'cannot encode all characters. '
 677                 'Set the LC_ALL environment variable to fix this.')
 678             self.params['restrictfilenames'] = True
 679
 680         self._parse_outtmpl()
 681
 682         # Creating format selector here allows us to catch syntax errors before the extraction
 683         self.format_selector = (
 684             self.params.get('format') if self.params.get('format') in (None, '-')
 685             else self.params['format'] if callable(self.params['format'])
 686             else self.build_format_selector(self.params['format']))
 687
 688         # Set http_headers defaults according to std_headers
 689         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 690
 691         hooks = {
 692             'post_hooks': self.add_post_hook,
 693             'progress_hooks': self.add_progress_hook,
 694             'postprocessor_hooks': self.add_postprocessor_hook,
 695         }
 696         for opt, fn in hooks.items():
 697             for ph in self.params.get(opt, []):
 698                 fn(ph)
 699
 700         for pp_def_raw in self.params.get('postprocessors', []):
 701             pp_def = dict(pp_def_raw)
 702             when = pp_def.pop('when', 'post_process')
 703             self.add_post_processor(
 704                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 705                 when=when)
 706
 707         self._setup_opener()
 708         register_socks_protocols()
 709
 710         def preload_download_archive(fn):
 711             """Preload the archive, if any is specified"""
 712             if fn is None:
 713                 return False
 714             self.write_debug(f'Loading archive file {fn!r}')
 715             try:
 716                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 717                     for line in archive_file:
 718                         self.archive.add(line.strip())
 719             except OSError as ioe:
 720                 if ioe.errno != errno.ENOENT:
 721                     raise
 722                 return False
 723             return True
 724
 725         self.archive = set()
 726         preload_download_archive(self.params.get('download_archive'))
 727
 728     def warn_if_short_id(self, argv):
 729         # short YouTube ID starting with dash?
 730         idxs = [
 731             i for i, a in enumerate(argv)
 732             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 733         if idxs:
 734             correct_argv = (
 735                 ['yt-dlp']
 736                 + [a for i, a in enumerate(argv) if i not in idxs]
 737                 + ['--'] + [argv[i] for i in idxs]
 738             )
 739             self.report_warning(
 740                 'Long argument string detected. '
 741                 'Use -- to separate parameters and URLs, like this:\n%s' %
 742                 args_to_str(correct_argv))
 743
 744     def add_info_extractor(self, ie):
 745         """Add an InfoExtractor object to the end of the list."""
 746         ie_key = ie.ie_key()
 747         self._ies[ie_key] = ie
 748         if not isinstance(ie, type):
 749             self._ies_instances[ie_key] = ie
 750             ie.set_downloader(self)
 751
 752     def _get_info_extractor_class(self, ie_key):
 753         ie = self._ies.get(ie_key)
 754         if ie is None:
 755             ie = get_info_extractor(ie_key)
 756             self.add_info_extractor(ie)
 757         return ie
 758
 759     def get_info_extractor(self, ie_key):
 760         """
 761         Get an instance of an IE with name ie_key, it will try to get one from
 762         the _ies list, if there's no instance it will create a new one and add
 763         it to the extractor list.
 764         """
 765         ie = self._ies_instances.get(ie_key)
 766         if ie is None:
 767             ie = get_info_extractor(ie_key)()
 768             self.add_info_extractor(ie)
 769         return ie
 770
 771     def add_default_info_extractors(self):
 772         """
 773         Add the InfoExtractors returned by gen_extractors to the end of the list
 774         """
 775         for ie in gen_extractor_classes():
 776             self.add_info_extractor(ie)
 777
 778     def add_post_processor(self, pp, when='post_process'):
 779         """Add a PostProcessor object to the end of the chain."""
 780         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 781         self._pps[when].append(pp)
 782         pp.set_downloader(self)
 783
 784     def add_post_hook(self, ph):
 785         """Add the post hook"""
 786         self._post_hooks.append(ph)
 787
 788     def add_progress_hook(self, ph):
 789         """Add the download progress hook"""
 790         self._progress_hooks.append(ph)
 791
 792     def add_postprocessor_hook(self, ph):
 793         """Add the postprocessing progress hook"""
 794         self._postprocessor_hooks.append(ph)
 795         for pps in self._pps.values():
 796             for pp in pps:
 797                 pp.add_progress_hook(ph)
 798
 799     def _bidi_workaround(self, message):
 800         if not hasattr(self, '_output_channel'):
 801             return message
 802
 803         assert hasattr(self, '_output_process')
 804         assert isinstance(message, str)
 805         line_count = message.count('\n') + 1
 806         self._output_process.stdin.write((message + '\n').encode())
 807         self._output_process.stdin.flush()
 808         res = ''.join(self._output_channel.readline().decode()
 809                       for _ in range(line_count))
 810         return res[:-len('\n')]
 811
 812     def _write_string(self, message, out=None, only_once=False):
 813         if only_once:
 814             if message in self._printed_messages:
 815                 return
 816             self._printed_messages.add(message)
 817         write_string(message, out=out, encoding=self.params.get('encoding'))
 818
 819     def to_stdout(self, message, skip_eol=False, quiet=None):
 820         """Print message to stdout"""
 821         if quiet is not None:
 822             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
 823         if skip_eol is not False:
 824             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
 825         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 826
 827     def to_screen(self, message, skip_eol=False, quiet=None):
 828         """Print message to screen if not in quiet mode"""
 829         if self.params.get('logger'):
 830             self.params['logger'].debug(message)
 831             return
 832         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 833             return
 834         self._write_string(
 835             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 836             self._out_files.screen)
 837
 838     def to_stderr(self, message, only_once=False):
 839         """Print message to stderr"""
 840         assert isinstance(message, str)
 841         if self.params.get('logger'):
 842             self.params['logger'].error(message)
 843         else:
 844             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 845
 846     def _send_console_code(self, code):
 847         if compat_os_name == 'nt' or not self._out_files.console:
 848             return
 849         self._write_string(code, self._out_files.console)
 850
 851     def to_console_title(self, message):
 852         if not self.params.get('consoletitle', False):
 853             return
 854         message = remove_terminal_sequences(message)
 855         if compat_os_name == 'nt':
 856             if ctypes.windll.kernel32.GetConsoleWindow():
 857                 # c_wchar_p() might not be necessary if `message` is
 858                 # already of type unicode()
 859                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 860         else:
 861             self._send_console_code(f'\033]0;{message}\007')
 862
 863     def save_console_title(self):
 864         if not self.params.get('consoletitle') or self.params.get('simulate'):
 865             return
 866         self._send_console_code('\033[22;0t')  # Save the title on stack
 867
 868     def restore_console_title(self):
 869         if not self.params.get('consoletitle') or self.params.get('simulate'):
 870             return
 871         self._send_console_code('\033[23;0t')  # Restore the title from stack
 872
 873     def __enter__(self):
 874         self.save_console_title()
 875         return self
 876
 877     def __exit__(self, *args):
 878         self.restore_console_title()
 879
 880         if self.params.get('cookiefile') is not None:
 881             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 882
 883     def trouble(self, message=None, tb=None, is_error=True):
 884         """Determine action to take when a download problem appears.
 885
 886         Depending on if the downloader has been configured to ignore
 887         download errors or not, this method may throw an exception or
 888         not when errors are found, after printing the message.
 889
 890         @param tb          If given, is additional traceback information
 891         @param is_error    Whether to raise error according to ignorerrors
 892         """
 893         if message is not None:
 894             self.to_stderr(message)
 895         if self.params.get('verbose'):
 896             if tb is None:
 897                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 898                     tb = ''
 899                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 900                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 901                     tb += encode_compat_str(traceback.format_exc())
 902                 else:
 903                     tb_data = traceback.format_list(traceback.extract_stack())
 904                     tb = ''.join(tb_data)
 905             if tb:
 906                 self.to_stderr(tb)
 907         if not is_error:
 908             return
 909         if not self.params.get('ignoreerrors'):
 910             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 911                 exc_info = sys.exc_info()[1].exc_info
 912             else:
 913                 exc_info = sys.exc_info()
 914             raise DownloadError(message, exc_info)
 915         self._download_retcode = 1
 916
 917     Styles = Namespace(
 918         HEADERS='yellow',
 919         EMPHASIS='light blue',
 920         FILENAME='green',
 921         ID='green',
 922         DELIM='blue',
 923         ERROR='red',
 924         WARNING='yellow',
 925         SUPPRESS='light black',
 926     )
 927
 928     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 929         text = str(text)
 930         if test_encoding:
 931             original_text = text
 932             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 933             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 934             text = text.encode(encoding, 'ignore').decode(encoding)
 935             if fallback is not None and text != original_text:
 936                 text = fallback
 937         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 938
 939     def _format_out(self, *args, **kwargs):
 940         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
 941
 942     def _format_screen(self, *args, **kwargs):
 943         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
 944
 945     def _format_err(self, *args, **kwargs):
 946         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
 947
 948     def report_warning(self, message, only_once=False):
 949         '''
 950         Print the message to stderr, it will be prefixed with 'WARNING:'
 951         If stderr is a tty file the 'WARNING:' will be colored
 952         '''
 953         if self.params.get('logger') is not None:
 954             self.params['logger'].warning(message)
 955         else:
 956             if self.params.get('no_warnings'):
 957                 return
 958             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 959
 960     def deprecation_warning(self, message):
 961         if self.params.get('logger') is not None:
 962             self.params['logger'].warning(f'DeprecationWarning: {message}')
 963         else:
 964             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 965
 966     def report_error(self, message, *args, **kwargs):
 967         '''
 968         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 969         in red if stderr is a tty file.
 970         '''
 971         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 972
 973     def write_debug(self, message, only_once=False):
 974         '''Log debug message or Print message to stderr'''
 975         if not self.params.get('verbose', False):
 976             return
 977         message = f'[debug] {message}'
 978         if self.params.get('logger'):
 979             self.params['logger'].debug(message)
 980         else:
 981             self.to_stderr(message, only_once)
 982
 983     def report_file_already_downloaded(self, file_name):
 984         """Report file has already been fully downloaded."""
 985         try:
 986             self.to_screen('[download] %s has already been downloaded' % file_name)
 987         except UnicodeEncodeError:
 988             self.to_screen('[download] The file has already been downloaded')
 989
 990     def report_file_delete(self, file_name):
 991         """Report that existing file will be deleted."""
 992         try:
 993             self.to_screen('Deleting existing file %s' % file_name)
 994         except UnicodeEncodeError:
 995             self.to_screen('Deleting existing file')
 996
 997     def raise_no_formats(self, info, forced=False, *, msg=None):
 998         has_drm = info.get('_has_drm')
 999         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1000         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1001         if forced or not ignored:
1002             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1003                                  expected=has_drm or ignored or expected)
1004         else:
1005             self.report_warning(msg)
1006
1007     def parse_outtmpl(self):
1008         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1009         self._parse_outtmpl()
1010         return self.params['outtmpl']
1011
1012     def _parse_outtmpl(self):
1013         sanitize = IDENTITY
1014         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1015             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1016
1017         outtmpl = self.params.setdefault('outtmpl', {})
1018         if not isinstance(outtmpl, dict):
1019             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1020         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1021
1022     def get_output_path(self, dir_type='', filename=None):
1023         paths = self.params.get('paths', {})
1024         assert isinstance(paths, dict)
1025         path = os.path.join(
1026             expand_path(paths.get('home', '').strip()),
1027             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1028             filename or '')
1029         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1030
1031     @staticmethod
1032     def _outtmpl_expandpath(outtmpl):
1033         # expand_path translates '%%' into '%' and '$$' into '$'
1034         # correspondingly that is not what we want since we need to keep
1035         # '%%' intact for template dict substitution step. Working around
1036         # with boundary-alike separator hack.
1037         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1038         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1039
1040         # outtmpl should be expand_path'ed before template dict substitution
1041         # because meta fields may contain env variables we don't want to
1042         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1043         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1044         return expand_path(outtmpl).replace(sep, '')
1045
1046     @staticmethod
1047     def escape_outtmpl(outtmpl):
1048         ''' Escape any remaining strings like %s, %abc% etc. '''
1049         return re.sub(
1050             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1051             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1052             outtmpl)
1053
1054     @classmethod
1055     def validate_outtmpl(cls, outtmpl):
1056         ''' @return None or Exception object '''
1057         outtmpl = re.sub(
1058             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1059             lambda mobj: f'{mobj.group(0)[:-1]}s',
1060             cls._outtmpl_expandpath(outtmpl))
1061         try:
1062             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1063             return None
1064         except ValueError as err:
1065             return err
1066
1067     @staticmethod
1068     def _copy_infodict(info_dict):
1069         info_dict = dict(info_dict)
1070         info_dict.pop('__postprocessors', None)
1071         info_dict.pop('__pending_error', None)
1072         return info_dict
1073
1074     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1075         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1076         @param sanitize    Whether to sanitize the output as a filename.
1077                            For backward compatibility, a function can also be passed
1078         """
1079
1080         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1081
1082         info_dict = self._copy_infodict(info_dict)
1083         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1084             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1085             if info_dict.get('duration', None) is not None
1086             else None)
1087         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1088         info_dict['video_autonumber'] = self._num_videos
1089         if info_dict.get('resolution') is None:
1090             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1091
1092         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1093         # of %(field)s to %(field)0Nd for backward compatibility
1094         field_size_compat_map = {
1095             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1096             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1097             'autonumber': self.params.get('autonumber_size') or 5,
1098         }
1099
1100         TMPL_DICT = {}
1101         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1102         MATH_FUNCTIONS = {
1103             '+': float.__add__,
1104             '-': float.__sub__,
1105         }
1106         # Field is of the form key1.key2...
1107         # where keys (except first) can be string, int or slice
1108         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1109         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1110         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1111         INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
1112             (?P<negate>-)?
1113             (?P<fields>{FIELD_RE})
1114             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1115             (?:>(?P<strf_format>.+?))?
1116             (?P<remaining>
1117                 (?P<alternate>(?<!\\),[^|&)]+)?
1118                 (?:&(?P<replacement>.*?))?
1119                 (?:\|(?P<default>.*?))?
1120             )$''')
1121
1122         def _traverse_infodict(k):
1123             k = k.split('.')
1124             if k[0] == '':
1125                 k.pop(0)
1126             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1127
1128         def get_value(mdict):
1129             # Object traversal
1130             value = _traverse_infodict(mdict['fields'])
1131             # Negative
1132             if mdict['negate']:
1133                 value = float_or_none(value)
1134                 if value is not None:
1135                     value *= -1
1136             # Do maths
1137             offset_key = mdict['maths']
1138             if offset_key:
1139                 value = float_or_none(value)
1140                 operator = None
1141                 while offset_key:
1142                     item = re.match(
1143                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1144                         offset_key).group(0)
1145                     offset_key = offset_key[len(item):]
1146                     if operator is None:
1147                         operator = MATH_FUNCTIONS[item]
1148                         continue
1149                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1150                     offset = float_or_none(item)
1151                     if offset is None:
1152                         offset = float_or_none(_traverse_infodict(item))
1153                     try:
1154                         value = operator(value, multiplier * offset)
1155                     except (TypeError, ZeroDivisionError):
1156                         return None
1157                     operator = None
1158             # Datetime formatting
1159             if mdict['strf_format']:
1160                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1161
1162             return value
1163
1164         na = self.params.get('outtmpl_na_placeholder', 'NA')
1165
1166         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1167             return sanitize_filename(str(value), restricted=restricted, is_id=(
1168                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1169                 if 'filename-sanitization' in self.params['compat_opts']
1170                 else NO_DEFAULT))
1171
1172         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1173         sanitize = bool(sanitize)
1174
1175         def _dumpjson_default(obj):
1176             if isinstance(obj, (set, LazyList)):
1177                 return list(obj)
1178             return repr(obj)
1179
1180         def create_key(outer_mobj):
1181             if not outer_mobj.group('has_key'):
1182                 return outer_mobj.group(0)
1183             key = outer_mobj.group('key')
1184             mobj = re.match(INTERNAL_FORMAT_RE, key)
1185             initial_field = mobj.group('fields') if mobj else ''
1186             value, replacement, default = None, None, na
1187             while mobj:
1188                 mobj = mobj.groupdict()
1189                 default = mobj['default'] if mobj['default'] is not None else default
1190                 value = get_value(mobj)
1191                 replacement = mobj['replacement']
1192                 if value is None and mobj['alternate']:
1193                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1194                 else:
1195                     break
1196
1197             fmt = outer_mobj.group('format')
1198             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1199                 fmt = f'0{field_size_compat_map[key]:d}d'
1200
1201             value = default if value is None else value if replacement is None else replacement
1202
1203             flags = outer_mobj.group('conversion') or ''
1204             str_fmt = f'{fmt[:-1]}s'
1205             if fmt[-1] == 'l':  # list
1206                 delim = '\n' if '#' in flags else ', '
1207                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1208             elif fmt[-1] == 'j':  # json
1209                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1210             elif fmt[-1] == 'h':  # html
1211                 value, fmt = escapeHTML(value), str_fmt
1212             elif fmt[-1] == 'q':  # quoted
1213                 value = map(str, variadic(value) if '#' in flags else [value])
1214                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1215             elif fmt[-1] == 'B':  # bytes
1216                 value = f'%{str_fmt}'.encode() % str(value).encode()
1217                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1218             elif fmt[-1] == 'U':  # unicode normalized
1219                 value, fmt = unicodedata.normalize(
1220                     # "+" = compatibility equivalence, "#" = NFD
1221                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1222                     value), str_fmt
1223             elif fmt[-1] == 'D':  # decimal suffix
1224                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1225                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1226                                               factor=1024 if '#' in flags else 1000)
1227             elif fmt[-1] == 'S':  # filename sanitization
1228                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1229             elif fmt[-1] == 'c':
1230                 if value:
1231                     value = str(value)[0]
1232                 else:
1233                     fmt = str_fmt
1234             elif fmt[-1] not in 'rs':  # numeric
1235                 value = float_or_none(value)
1236                 if value is None:
1237                     value, fmt = default, 's'
1238
1239             if sanitize:
1240                 if fmt[-1] == 'r':
1241                     # If value is an object, sanitize might convert it to a string
1242                     # So we convert it to repr first
1243                     value, fmt = repr(value), str_fmt
1244                 if fmt[-1] in 'csr':
1245                     value = sanitizer(initial_field, value)
1246
1247             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1248             TMPL_DICT[key] = value
1249             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1250
1251         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1252
1253     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1254         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1255         return self.escape_outtmpl(outtmpl) % info_dict
1256
1257     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1258         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1259         if outtmpl is None:
1260             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1261         try:
1262             outtmpl = self._outtmpl_expandpath(outtmpl)
1263             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1264             if not filename:
1265                 return None
1266
1267             if tmpl_type in ('', 'temp'):
1268                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1269                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1270                     filename = replace_extension(filename, ext, final_ext)
1271             elif tmpl_type:
1272                 force_ext = OUTTMPL_TYPES[tmpl_type]
1273                 if force_ext:
1274                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1275
1276             # https://github.com/blackjack4494/youtube-dlc/issues/85
1277             trim_file_name = self.params.get('trim_file_name', False)
1278             if trim_file_name:
1279                 no_ext, *ext = filename.rsplit('.', 2)
1280                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1281
1282             return filename
1283         except ValueError as err:
1284             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1285             return None
1286
1287     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1288         """Generate the output filename"""
1289         if outtmpl:
1290             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1291             dir_type = None
1292         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1293         if not filename and dir_type not in ('', 'temp'):
1294             return ''
1295
1296         if warn:
1297             if not self.params.get('paths'):
1298                 pass
1299             elif filename == '-':
1300                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1301             elif os.path.isabs(filename):
1302                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1303         if filename == '-' or not filename:
1304             return filename
1305
1306         return self.get_output_path(dir_type, filename)
1307
1308     def _match_entry(self, info_dict, incomplete=False, silent=False):
1309         """ Returns None if the file should be downloaded """
1310
1311         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1312
1313         def check_filter():
1314             if 'title' in info_dict:
1315                 # This can happen when we're just evaluating the playlist
1316                 title = info_dict['title']
1317                 matchtitle = self.params.get('matchtitle', False)
1318                 if matchtitle:
1319                     if not re.search(matchtitle, title, re.IGNORECASE):
1320                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1321                 rejecttitle = self.params.get('rejecttitle', False)
1322                 if rejecttitle:
1323                     if re.search(rejecttitle, title, re.IGNORECASE):
1324                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1325             date = info_dict.get('upload_date')
1326             if date is not None:
1327                 dateRange = self.params.get('daterange', DateRange())
1328                 if date not in dateRange:
1329                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1330             view_count = info_dict.get('view_count')
1331             if view_count is not None:
1332                 min_views = self.params.get('min_views')
1333                 if min_views is not None and view_count < min_views:
1334                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1335                 max_views = self.params.get('max_views')
1336                 if max_views is not None and view_count > max_views:
1337                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1338             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1339                 return 'Skipping "%s" because it is age restricted' % video_title
1340
1341             match_filter = self.params.get('match_filter')
1342             if match_filter is not None:
1343                 try:
1344                     ret = match_filter(info_dict, incomplete=incomplete)
1345                 except TypeError:
1346                     # For backward compatibility
1347                     ret = None if incomplete else match_filter(info_dict)
1348                 if ret is NO_DEFAULT:
1349                     while True:
1350                         filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1351                         reply = input(self._format_screen(
1352                             f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1353                         if reply in {'y', ''}:
1354                             return None
1355                         elif reply == 'n':
1356                             return f'Skipping {video_title}'
1357                 elif ret is not None:
1358                     return ret
1359             return None
1360
1361         if self.in_download_archive(info_dict):
1362             reason = '%s has already been recorded in the archive' % video_title
1363             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1364         else:
1365             reason = check_filter()
1366             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1367         if reason is not None:
1368             if not silent:
1369                 self.to_screen('[download] ' + reason)
1370             if self.params.get(break_opt, False):
1371                 raise break_err()
1372         return reason
1373
1374     @staticmethod
1375     def add_extra_info(info_dict, extra_info):
1376         '''Set the keys from extra_info in info dict if they are missing'''
1377         for key, value in extra_info.items():
1378             info_dict.setdefault(key, value)
1379
1380     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1381                      process=True, force_generic_extractor=False):
1382         """
1383         Return a list with a dictionary for each video extracted.
1384
1385         Arguments:
1386         url -- URL to extract
1387
1388         Keyword arguments:
1389         download -- whether to download videos during extraction
1390         ie_key -- extractor key hint
1391         extra_info -- dictionary containing the extra values to add to each result
1392         process -- whether to resolve all unresolved references (URLs, playlist items),
1393             must be True for download to work.
1394         force_generic_extractor -- force using the generic extractor
1395         """
1396
1397         if extra_info is None:
1398             extra_info = {}
1399
1400         if not ie_key and force_generic_extractor:
1401             ie_key = 'Generic'
1402
1403         if ie_key:
1404             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1405         else:
1406             ies = self._ies
1407
1408         for ie_key, ie in ies.items():
1409             if not ie.suitable(url):
1410                 continue
1411
1412             if not ie.working():
1413                 self.report_warning('The program functionality for this site has been marked as broken, '
1414                                     'and will probably not work.')
1415
1416             temp_id = ie.get_temp_id(url)
1417             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1418                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1419                 if self.params.get('break_on_existing', False):
1420                     raise ExistingVideoReached()
1421                 break
1422             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1423         else:
1424             self.report_error('no suitable InfoExtractor for URL %s' % url)
1425
1426     def _handle_extraction_exceptions(func):
1427         @functools.wraps(func)
1428         def wrapper(self, *args, **kwargs):
1429             while True:
1430                 try:
1431                     return func(self, *args, **kwargs)
1432                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1433                     raise
1434                 except ReExtractInfo as e:
1435                     if e.expected:
1436                         self.to_screen(f'{e}; Re-extracting data')
1437                     else:
1438                         self.to_stderr('\r')
1439                         self.report_warning(f'{e}; Re-extracting data')
1440                     continue
1441                 except GeoRestrictedError as e:
1442                     msg = e.msg
1443                     if e.countries:
1444                         msg += '\nThis video is available in %s.' % ', '.join(
1445                             map(ISO3166Utils.short2full, e.countries))
1446                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1447                     self.report_error(msg)
1448                 except ExtractorError as e:  # An error we somewhat expected
1449                     self.report_error(str(e), e.format_traceback())
1450                 except Exception as e:
1451                     if self.params.get('ignoreerrors'):
1452                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1453                     else:
1454                         raise
1455                 break
1456         return wrapper
1457
1458     def _wait_for_video(self, ie_result):
1459         if (not self.params.get('wait_for_video')
1460                 or ie_result.get('_type', 'video') != 'video'
1461                 or ie_result.get('formats') or ie_result.get('url')):
1462             return
1463
1464         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1465         last_msg = ''
1466
1467         def progress(msg):
1468             nonlocal last_msg
1469             full_msg = f'{msg}\n'
1470             if not self.params.get('noprogress'):
1471                 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1472             elif last_msg:
1473                 return
1474             self.to_screen(full_msg, skip_eol=True)
1475             last_msg = msg
1476
1477         min_wait, max_wait = self.params.get('wait_for_video')
1478         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1479         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1480             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1481             self.report_warning('Release time of video is not known')
1482         elif (diff or 0) <= 0:
1483             self.report_warning('Video should already be available according to extracted info')
1484         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1485         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1486
1487         wait_till = time.time() + diff
1488         try:
1489             while True:
1490                 diff = wait_till - time.time()
1491                 if diff <= 0:
1492                     progress('')
1493                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1494                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1495                 time.sleep(1)
1496         except KeyboardInterrupt:
1497             progress('')
1498             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1499         except BaseException as e:
1500             if not isinstance(e, ReExtractInfo):
1501                 self.to_screen('')
1502             raise
1503
1504     @_handle_extraction_exceptions
1505     def __extract_info(self, url, ie, download, extra_info, process):
1506         ie_result = ie.extract(url)
1507         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1508             self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1509             return
1510         if isinstance(ie_result, list):
1511             # Backwards compatibility: old IE result format
1512             ie_result = {
1513                 '_type': 'compat_list',
1514                 'entries': ie_result,
1515             }
1516         if extra_info.get('original_url'):
1517             ie_result.setdefault('original_url', extra_info['original_url'])
1518         self.add_default_extra_info(ie_result, ie, url)
1519         if process:
1520             self._wait_for_video(ie_result)
1521             return self.process_ie_result(ie_result, download, extra_info)
1522         else:
1523             return ie_result
1524
1525     def add_default_extra_info(self, ie_result, ie, url):
1526         if url is not None:
1527             self.add_extra_info(ie_result, {
1528                 'webpage_url': url,
1529                 'original_url': url,
1530             })
1531         webpage_url = ie_result.get('webpage_url')
1532         if webpage_url:
1533             self.add_extra_info(ie_result, {
1534                 'webpage_url_basename': url_basename(webpage_url),
1535                 'webpage_url_domain': get_domain(webpage_url),
1536             })
1537         if ie is not None:
1538             self.add_extra_info(ie_result, {
1539                 'extractor': ie.IE_NAME,
1540                 'extractor_key': ie.ie_key(),
1541             })
1542
1543     def process_ie_result(self, ie_result, download=True, extra_info=None):
1544         """
1545         Take the result of the ie(may be modified) and resolve all unresolved
1546         references (URLs, playlist items).
1547
1548         It will also download the videos if 'download'.
1549         Returns the resolved ie_result.
1550         """
1551         if extra_info is None:
1552             extra_info = {}
1553         result_type = ie_result.get('_type', 'video')
1554
1555         if result_type in ('url', 'url_transparent'):
1556             ie_result['url'] = sanitize_url(ie_result['url'])
1557             if ie_result.get('original_url'):
1558                 extra_info.setdefault('original_url', ie_result['original_url'])
1559
1560             extract_flat = self.params.get('extract_flat', False)
1561             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1562                     or extract_flat is True):
1563                 info_copy = ie_result.copy()
1564                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1565                 if ie and not ie_result.get('id'):
1566                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1567                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1568                 self.add_extra_info(info_copy, extra_info)
1569                 info_copy, _ = self.pre_process(info_copy)
1570                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1571                 self._raise_pending_errors(info_copy)
1572                 if self.params.get('force_write_download_archive', False):
1573                     self.record_download_archive(info_copy)
1574                 return ie_result
1575
1576         if result_type == 'video':
1577             self.add_extra_info(ie_result, extra_info)
1578             ie_result = self.process_video_result(ie_result, download=download)
1579             self._raise_pending_errors(ie_result)
1580             additional_urls = (ie_result or {}).get('additional_urls')
1581             if additional_urls:
1582                 # TODO: Improve MetadataParserPP to allow setting a list
1583                 if isinstance(additional_urls, str):
1584                     additional_urls = [additional_urls]
1585                 self.to_screen(
1586                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1587                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1588                 ie_result['additional_entries'] = [
1589                     self.extract_info(
1590                         url, download, extra_info=extra_info,
1591                         force_generic_extractor=self.params.get('force_generic_extractor'))
1592                     for url in additional_urls
1593                 ]
1594             return ie_result
1595         elif result_type == 'url':
1596             # We have to add extra_info to the results because it may be
1597             # contained in a playlist
1598             return self.extract_info(
1599                 ie_result['url'], download,
1600                 ie_key=ie_result.get('ie_key'),
1601                 extra_info=extra_info)
1602         elif result_type == 'url_transparent':
1603             # Use the information from the embedding page
1604             info = self.extract_info(
1605                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1606                 extra_info=extra_info, download=False, process=False)
1607
1608             # extract_info may return None when ignoreerrors is enabled and
1609             # extraction failed with an error, don't crash and return early
1610             # in this case
1611             if not info:
1612                 return info
1613
1614             exempted_fields = {'_type', 'url', 'ie_key'}
1615             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1616                 # For video clips, the id etc of the clip extractor should be used
1617                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1618
1619             new_result = info.copy()
1620             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1621
1622             # Extracted info may not be a video result (i.e.
1623             # info.get('_type', 'video') != video) but rather an url or
1624             # url_transparent. In such cases outer metadata (from ie_result)
1625             # should be propagated to inner one (info). For this to happen
1626             # _type of info should be overridden with url_transparent. This
1627             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1628             if new_result.get('_type') == 'url':
1629                 new_result['_type'] = 'url_transparent'
1630
1631             return self.process_ie_result(
1632                 new_result, download=download, extra_info=extra_info)
1633         elif result_type in ('playlist', 'multi_video'):
1634             # Protect from infinite recursion due to recursively nested playlists
1635             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1636             webpage_url = ie_result['webpage_url']
1637             if webpage_url in self._playlist_urls:
1638                 self.to_screen(
1639                     '[download] Skipping already downloaded playlist: %s'
1640                     % ie_result.get('title') or ie_result.get('id'))
1641                 return
1642
1643             self._playlist_level += 1
1644             self._playlist_urls.add(webpage_url)
1645             self._fill_common_fields(ie_result, False)
1646             self._sanitize_thumbnails(ie_result)
1647             try:
1648                 return self.__process_playlist(ie_result, download)
1649             finally:
1650                 self._playlist_level -= 1
1651                 if not self._playlist_level:
1652                     self._playlist_urls.clear()
1653         elif result_type == 'compat_list':
1654             self.report_warning(
1655                 'Extractor %s returned a compat_list result. '
1656                 'It needs to be updated.' % ie_result.get('extractor'))
1657
1658             def _fixup(r):
1659                 self.add_extra_info(r, {
1660                     'extractor': ie_result['extractor'],
1661                     'webpage_url': ie_result['webpage_url'],
1662                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1663                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1664                     'extractor_key': ie_result['extractor_key'],
1665                 })
1666                 return r
1667             ie_result['entries'] = [
1668                 self.process_ie_result(_fixup(r), download, extra_info)
1669                 for r in ie_result['entries']
1670             ]
1671             return ie_result
1672         else:
1673             raise Exception('Invalid result type: %s' % result_type)
1674
1675     def _ensure_dir_exists(self, path):
1676         return make_dir(path, self.report_error)
1677
1678     @staticmethod
1679     def _playlist_infodict(ie_result, **kwargs):
1680         return {
1681             **ie_result,
1682             'playlist': ie_result.get('title') or ie_result.get('id'),
1683             'playlist_id': ie_result.get('id'),
1684             'playlist_title': ie_result.get('title'),
1685             'playlist_uploader': ie_result.get('uploader'),
1686             'playlist_uploader_id': ie_result.get('uploader_id'),
1687             'playlist_index': 0,
1688             **kwargs,
1689         }
1690
1691     def __process_playlist(self, ie_result, download):
1692         """Process each entry in the playlist"""
1693         assert ie_result['_type'] in ('playlist', 'multi_video')
1694
1695         title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
1696         self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1697
1698         all_entries = PlaylistEntries(self, ie_result)
1699         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1700
1701         lazy = self.params.get('lazy_playlist')
1702         if lazy:
1703             resolved_entries, n_entries = [], 'N/A'
1704             ie_result['requested_entries'], ie_result['entries'] = None, None
1705         else:
1706             entries = resolved_entries = list(entries)
1707             n_entries = len(resolved_entries)
1708             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1709         if not ie_result.get('playlist_count'):
1710             # Better to do this after potentially exhausting entries
1711             ie_result['playlist_count'] = all_entries.get_full_count()
1712
1713         _infojson_written = False
1714         write_playlist_files = self.params.get('allow_playlist_files', True)
1715         if write_playlist_files and self.params.get('list_thumbnails'):
1716             self.list_thumbnails(ie_result)
1717         if write_playlist_files and not self.params.get('simulate'):
1718             ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1719             _infojson_written = self._write_info_json(
1720                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1721             if _infojson_written is None:
1722                 return
1723             if self._write_description('playlist', ie_result,
1724                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1725                 return
1726             # TODO: This should be passed to ThumbnailsConvertor if necessary
1727             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1728
1729         if lazy:
1730             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1731                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1732         elif self.params.get('playlistreverse'):
1733             entries.reverse()
1734         elif self.params.get('playlistrandom'):
1735             random.shuffle(entries)
1736
1737         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
1738                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1739
1740         keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1741         if self.params.get('extract_flat') == 'discard_in_playlist':
1742             keep_resolved_entries = ie_result['_type'] != 'playlist'
1743         if keep_resolved_entries:
1744             self.write_debug('The information of all playlist entries will be held in memory')
1745
1746         failures = 0
1747         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1748         for i, (playlist_index, entry) in enumerate(entries):
1749             if lazy:
1750                 resolved_entries.append((playlist_index, entry))
1751
1752             # TODO: Add auto-generated fields
1753             if not entry or self._match_entry(entry, incomplete=True) is not None:
1754                 continue
1755
1756             self.to_screen('[download] Downloading video %s of %s' % (
1757                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1758
1759             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1760             if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1761                 playlist_index = ie_result['requested_entries'][i]
1762
1763             entry_result = self.__process_iterable_entry(entry, download, {
1764                 'n_entries': int_or_none(n_entries),
1765                 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1766                 'playlist_count': ie_result.get('playlist_count'),
1767                 'playlist_index': playlist_index,
1768                 'playlist_autonumber': i + 1,
1769                 'playlist': title,
1770                 'playlist_id': ie_result.get('id'),
1771                 'playlist_title': ie_result.get('title'),
1772                 'playlist_uploader': ie_result.get('uploader'),
1773                 'playlist_uploader_id': ie_result.get('uploader_id'),
1774                 'extractor': ie_result['extractor'],
1775                 'webpage_url': ie_result['webpage_url'],
1776                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1777                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1778                 'extractor_key': ie_result['extractor_key'],
1779             })
1780             if not entry_result:
1781                 failures += 1
1782             if failures >= max_failures:
1783                 self.report_error(
1784                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1785                 break
1786             if keep_resolved_entries:
1787                 resolved_entries[i] = (playlist_index, entry_result)
1788
1789         # Update with processed data
1790         ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1791
1792         # Write the updated info to json
1793         if _infojson_written is True and self._write_info_json(
1794                 'updated playlist', ie_result,
1795                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1796             return
1797
1798         ie_result = self.run_all_pps('playlist', ie_result)
1799         self.to_screen(f'[download] Finished downloading playlist: {title}')
1800         return ie_result
1801
1802     @_handle_extraction_exceptions
1803     def __process_iterable_entry(self, entry, download, extra_info):
1804         return self.process_ie_result(
1805             entry, download=download, extra_info=extra_info)
1806
1807     def _build_format_filter(self, filter_spec):
1808         " Returns a function to filter the formats according to the filter_spec "
1809
1810         OPERATORS = {
1811             '<': operator.lt,
1812             '<=': operator.le,
1813             '>': operator.gt,
1814             '>=': operator.ge,
1815             '=': operator.eq,
1816             '!=': operator.ne,
1817         }
1818         operator_rex = re.compile(r'''(?x)\s*
1819             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1820             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1821             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1822             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1823         m = operator_rex.fullmatch(filter_spec)
1824         if m:
1825             try:
1826                 comparison_value = int(m.group('value'))
1827             except ValueError:
1828                 comparison_value = parse_filesize(m.group('value'))
1829                 if comparison_value is None:
1830                     comparison_value = parse_filesize(m.group('value') + 'B')
1831                 if comparison_value is None:
1832                     raise ValueError(
1833                         'Invalid value %r in format specification %r' % (
1834                             m.group('value'), filter_spec))
1835             op = OPERATORS[m.group('op')]
1836
1837         if not m:
1838             STR_OPERATORS = {
1839                 '=': operator.eq,
1840                 '^=': lambda attr, value: attr.startswith(value),
1841                 '$=': lambda attr, value: attr.endswith(value),
1842                 '*=': lambda attr, value: value in attr,
1843                 '~=': lambda attr, value: value.search(attr) is not None
1844             }
1845             str_operator_rex = re.compile(r'''(?x)\s*
1846                 (?P<key>[a-zA-Z0-9._-]+)\s*
1847                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1848                 (?P<quote>["'])?
1849                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1850                 (?(quote)(?P=quote))\s*
1851                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1852             m = str_operator_rex.fullmatch(filter_spec)
1853             if m:
1854                 if m.group('op') == '~=':
1855                     comparison_value = re.compile(m.group('value'))
1856                 else:
1857                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1858                 str_op = STR_OPERATORS[m.group('op')]
1859                 if m.group('negation'):
1860                     op = lambda attr, value: not str_op(attr, value)
1861                 else:
1862                     op = str_op
1863
1864         if not m:
1865             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1866
1867         def _filter(f):
1868             actual_value = f.get(m.group('key'))
1869             if actual_value is None:
1870                 return m.group('none_inclusive')
1871             return op(actual_value, comparison_value)
1872         return _filter
1873
1874     def _check_formats(self, formats):
1875         for f in formats:
1876             self.to_screen('[info] Testing format %s' % f['format_id'])
1877             path = self.get_output_path('temp')
1878             if not self._ensure_dir_exists(f'{path}/'):
1879                 continue
1880             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1881             temp_file.close()
1882             try:
1883                 success, _ = self.dl(temp_file.name, f, test=True)
1884             except (DownloadError, OSError, ValueError) + network_exceptions:
1885                 success = False
1886             finally:
1887                 if os.path.exists(temp_file.name):
1888                     try:
1889                         os.remove(temp_file.name)
1890                     except OSError:
1891                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1892             if success:
1893                 yield f
1894             else:
1895                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1896
1897     def _default_format_spec(self, info_dict, download=True):
1898
1899         def can_merge():
1900             merger = FFmpegMergerPP(self)
1901             return merger.available and merger.can_merge()
1902
1903         prefer_best = (
1904             not self.params.get('simulate')
1905             and download
1906             and (
1907                 not can_merge()
1908                 or info_dict.get('is_live') and not self.params.get('live_from_start')
1909                 or self.params['outtmpl']['default'] == '-'))
1910         compat = (
1911             prefer_best
1912             or self.params.get('allow_multiple_audio_streams', False)
1913             or 'format-spec' in self.params['compat_opts'])
1914
1915         return (
1916             'best/bestvideo+bestaudio' if prefer_best
1917             else 'bestvideo*+bestaudio/best' if not compat
1918             else 'bestvideo+bestaudio/best')
1919
1920     def build_format_selector(self, format_spec):
1921         def syntax_error(note, start):
1922             message = (
1923                 'Invalid format specification: '
1924                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
1925             return SyntaxError(message)
1926
1927         PICKFIRST = 'PICKFIRST'
1928         MERGE = 'MERGE'
1929         SINGLE = 'SINGLE'
1930         GROUP = 'GROUP'
1931         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1932
1933         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1934                                   'video': self.params.get('allow_multiple_video_streams', False)}
1935
1936         check_formats = self.params.get('check_formats') == 'selected'
1937
1938         def _parse_filter(tokens):
1939             filter_parts = []
1940             for type, string, start, _, _ in tokens:
1941                 if type == tokenize.OP and string == ']':
1942                     return ''.join(filter_parts)
1943                 else:
1944                     filter_parts.append(string)
1945
1946         def _remove_unused_ops(tokens):
1947             # Remove operators that we don't use and join them with the surrounding strings
1948             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1949             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1950             last_string, last_start, last_end, last_line = None, None, None, None
1951             for type, string, start, end, line in tokens:
1952                 if type == tokenize.OP and string == '[':
1953                     if last_string:
1954                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1955                         last_string = None
1956                     yield type, string, start, end, line
1957                     # everything inside brackets will be handled by _parse_filter
1958                     for type, string, start, end, line in tokens:
1959                         yield type, string, start, end, line
1960                         if type == tokenize.OP and string == ']':
1961                             break
1962                 elif type == tokenize.OP and string in ALLOWED_OPS:
1963                     if last_string:
1964                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1965                         last_string = None
1966                     yield type, string, start, end, line
1967                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1968                     if not last_string:
1969                         last_string = string
1970                         last_start = start
1971                         last_end = end
1972                     else:
1973                         last_string += string
1974             if last_string:
1975                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1976
1977         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1978             selectors = []
1979             current_selector = None
1980             for type, string, start, _, _ in tokens:
1981                 # ENCODING is only defined in python 3.x
1982                 if type == getattr(tokenize, 'ENCODING', None):
1983                     continue
1984                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1985                     current_selector = FormatSelector(SINGLE, string, [])
1986                 elif type == tokenize.OP:
1987                     if string == ')':
1988                         if not inside_group:
1989                             # ')' will be handled by the parentheses group
1990                             tokens.restore_last_token()
1991                         break
1992                     elif inside_merge and string in ['/', ',']:
1993                         tokens.restore_last_token()
1994                         break
1995                     elif inside_choice and string == ',':
1996                         tokens.restore_last_token()
1997                         break
1998                     elif string == ',':
1999                         if not current_selector:
2000                             raise syntax_error('"," must follow a format selector', start)
2001                         selectors.append(current_selector)
2002                         current_selector = None
2003                     elif string == '/':
2004                         if not current_selector:
2005                             raise syntax_error('"/" must follow a format selector', start)
2006                         first_choice = current_selector
2007                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2008                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2009                     elif string == '[':
2010                         if not current_selector:
2011                             current_selector = FormatSelector(SINGLE, 'best', [])
2012                         format_filter = _parse_filter(tokens)
2013                         current_selector.filters.append(format_filter)
2014                     elif string == '(':
2015                         if current_selector:
2016                             raise syntax_error('Unexpected "("', start)
2017                         group = _parse_format_selection(tokens, inside_group=True)
2018                         current_selector = FormatSelector(GROUP, group, [])
2019                     elif string == '+':
2020                         if not current_selector:
2021                             raise syntax_error('Unexpected "+"', start)
2022                         selector_1 = current_selector
2023                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2024                         if not selector_2:
2025                             raise syntax_error('Expected a selector', start)
2026                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2027                     else:
2028                         raise syntax_error(f'Operator not recognized: "{string}"', start)
2029                 elif type == tokenize.ENDMARKER:
2030                     break
2031             if current_selector:
2032                 selectors.append(current_selector)
2033             return selectors
2034
2035         def _merge(formats_pair):
2036             format_1, format_2 = formats_pair
2037
2038             formats_info = []
2039             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2040             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2041
2042             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2043                 get_no_more = {'video': False, 'audio': False}
2044                 for (i, fmt_info) in enumerate(formats_info):
2045                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2046                         formats_info.pop(i)
2047                         continue
2048                     for aud_vid in ['audio', 'video']:
2049                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2050                             if get_no_more[aud_vid]:
2051                                 formats_info.pop(i)
2052                                 break
2053                             get_no_more[aud_vid] = True
2054
2055             if len(formats_info) == 1:
2056                 return formats_info[0]
2057
2058             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2059             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2060
2061             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2062             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2063
2064             output_ext = self.params.get('merge_output_format')
2065             if not output_ext:
2066                 if the_only_video:
2067                     output_ext = the_only_video['ext']
2068                 elif the_only_audio and not video_fmts:
2069                     output_ext = the_only_audio['ext']
2070                 else:
2071                     output_ext = 'mkv'
2072
2073             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2074
2075             new_dict = {
2076                 'requested_formats': formats_info,
2077                 'format': '+'.join(filtered('format')),
2078                 'format_id': '+'.join(filtered('format_id')),
2079                 'ext': output_ext,
2080                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2081                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2082                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2083                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2084                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2085             }
2086
2087             if the_only_video:
2088                 new_dict.update({
2089                     'width': the_only_video.get('width'),
2090                     'height': the_only_video.get('height'),
2091                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2092                     'fps': the_only_video.get('fps'),
2093                     'dynamic_range': the_only_video.get('dynamic_range'),
2094                     'vcodec': the_only_video.get('vcodec'),
2095                     'vbr': the_only_video.get('vbr'),
2096                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2097                 })
2098
2099             if the_only_audio:
2100                 new_dict.update({
2101                     'acodec': the_only_audio.get('acodec'),
2102                     'abr': the_only_audio.get('abr'),
2103                     'asr': the_only_audio.get('asr'),
2104                 })
2105
2106             return new_dict
2107
2108         def _check_formats(formats):
2109             if not check_formats:
2110                 yield from formats
2111                 return
2112             yield from self._check_formats(formats)
2113
2114         def _build_selector_function(selector):
2115             if isinstance(selector, list):  # ,
2116                 fs = [_build_selector_function(s) for s in selector]
2117
2118                 def selector_function(ctx):
2119                     for f in fs:
2120                         yield from f(ctx)
2121                 return selector_function
2122
2123             elif selector.type == GROUP:  # ()
2124                 selector_function = _build_selector_function(selector.selector)
2125
2126             elif selector.type == PICKFIRST:  # /
2127                 fs = [_build_selector_function(s) for s in selector.selector]
2128
2129                 def selector_function(ctx):
2130                     for f in fs:
2131                         picked_formats = list(f(ctx))
2132                         if picked_formats:
2133                             return picked_formats
2134                     return []
2135
2136             elif selector.type == MERGE:  # +
2137                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2138
2139                 def selector_function(ctx):
2140                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2141                         yield _merge(pair)
2142
2143             elif selector.type == SINGLE:  # atom
2144                 format_spec = selector.selector or 'best'
2145
2146                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2147                 if format_spec == 'all':
2148                     def selector_function(ctx):
2149                         yield from _check_formats(ctx['formats'][::-1])
2150                 elif format_spec == 'mergeall':
2151                     def selector_function(ctx):
2152                         formats = list(_check_formats(
2153                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2154                         if not formats:
2155                             return
2156                         merged_format = formats[-1]
2157                         for f in formats[-2::-1]:
2158                             merged_format = _merge((merged_format, f))
2159                         yield merged_format
2160
2161                 else:
2162                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2163                     mobj = re.match(
2164                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2165                         format_spec)
2166                     if mobj is not None:
2167                         format_idx = int_or_none(mobj.group('n'), default=1)
2168                         format_reverse = mobj.group('bw')[0] == 'b'
2169                         format_type = (mobj.group('type') or [None])[0]
2170                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2171                         format_modified = mobj.group('mod') is not None
2172
2173                         format_fallback = not format_type and not format_modified  # for b, w
2174                         _filter_f = (
2175                             (lambda f: f.get('%scodec' % format_type) != 'none')
2176                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2177                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2178                             if format_type  # bv, ba, wv, wa
2179                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2180                             if not format_modified  # b, w
2181                             else lambda f: True)  # b*, w*
2182                         filter_f = lambda f: _filter_f(f) and (
2183                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2184                     else:
2185                         if format_spec in self._format_selection_exts['audio']:
2186                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2187                         elif format_spec in self._format_selection_exts['video']:
2188                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2189                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2190                         elif format_spec in self._format_selection_exts['storyboards']:
2191                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2192                         else:
2193                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2194
2195                     def selector_function(ctx):
2196                         formats = list(ctx['formats'])
2197                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2198                         if not matches:
2199                             if format_fallback and ctx['incomplete_formats']:
2200                                 # for extractors with incomplete formats (audio only (soundcloud)
2201                                 # or video only (imgur)) best/worst will fallback to
2202                                 # best/worst {video,audio}-only format
2203                                 matches = formats
2204                             elif seperate_fallback and not ctx['has_merged_format']:
2205                                 # for compatibility with youtube-dl when there is no pre-merged format
2206                                 matches = list(filter(seperate_fallback, formats))
2207                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2208                         try:
2209                             yield matches[format_idx - 1]
2210                         except LazyList.IndexError:
2211                             return
2212
2213             filters = [self._build_format_filter(f) for f in selector.filters]
2214
2215             def final_selector(ctx):
2216                 ctx_copy = dict(ctx)
2217                 for _filter in filters:
2218                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2219                 return selector_function(ctx_copy)
2220             return final_selector
2221
2222         stream = io.BytesIO(format_spec.encode())
2223         try:
2224             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2225         except tokenize.TokenError:
2226             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2227
2228         class TokenIterator:
2229             def __init__(self, tokens):
2230                 self.tokens = tokens
2231                 self.counter = 0
2232
2233             def __iter__(self):
2234                 return self
2235
2236             def __next__(self):
2237                 if self.counter >= len(self.tokens):
2238                     raise StopIteration()
2239                 value = self.tokens[self.counter]
2240                 self.counter += 1
2241                 return value
2242
2243             next = __next__
2244
2245             def restore_last_token(self):
2246                 self.counter -= 1
2247
2248         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2249         return _build_selector_function(parsed_selector)
2250
2251     def _calc_headers(self, info_dict):
2252         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2253
2254         cookies = self._calc_cookies(info_dict['url'])
2255         if cookies:
2256             res['Cookie'] = cookies
2257
2258         if 'X-Forwarded-For' not in res:
2259             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2260             if x_forwarded_for_ip:
2261                 res['X-Forwarded-For'] = x_forwarded_for_ip
2262
2263         return res
2264
2265     def _calc_cookies(self, url):
2266         pr = sanitized_Request(url)
2267         self.cookiejar.add_cookie_header(pr)
2268         return pr.get_header('Cookie')
2269
2270     def _sort_thumbnails(self, thumbnails):
2271         thumbnails.sort(key=lambda t: (
2272             t.get('preference') if t.get('preference') is not None else -1,
2273             t.get('width') if t.get('width') is not None else -1,
2274             t.get('height') if t.get('height') is not None else -1,
2275             t.get('id') if t.get('id') is not None else '',
2276             t.get('url')))
2277
2278     def _sanitize_thumbnails(self, info_dict):
2279         thumbnails = info_dict.get('thumbnails')
2280         if thumbnails is None:
2281             thumbnail = info_dict.get('thumbnail')
2282             if thumbnail:
2283                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2284         if not thumbnails:
2285             return
2286
2287         def check_thumbnails(thumbnails):
2288             for t in thumbnails:
2289                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2290                 try:
2291                     self.urlopen(HEADRequest(t['url']))
2292                 except network_exceptions as err:
2293                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2294                     continue
2295                 yield t
2296
2297         self._sort_thumbnails(thumbnails)
2298         for i, t in enumerate(thumbnails):
2299             if t.get('id') is None:
2300                 t['id'] = '%d' % i
2301             if t.get('width') and t.get('height'):
2302                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2303             t['url'] = sanitize_url(t['url'])
2304
2305         if self.params.get('check_formats') is True:
2306             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2307         else:
2308             info_dict['thumbnails'] = thumbnails
2309
2310     def _fill_common_fields(self, info_dict, is_video=True):
2311         # TODO: move sanitization here
2312         if is_video:
2313             # playlists are allowed to lack "title"
2314             title = info_dict.get('title', NO_DEFAULT)
2315             if title is NO_DEFAULT:
2316                 raise ExtractorError('Missing "title" field in extractor result',
2317                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2318             info_dict['fulltitle'] = title
2319             if not title:
2320                 if title == '':
2321                     self.write_debug('Extractor gave empty title. Creating a generic title')
2322                 else:
2323                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2324                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2325
2326         if info_dict.get('duration') is not None:
2327             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2328
2329         for ts_key, date_key in (
2330                 ('timestamp', 'upload_date'),
2331                 ('release_timestamp', 'release_date'),
2332                 ('modified_timestamp', 'modified_date'),
2333         ):
2334             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2335                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2336                 # see http://bugs.python.org/issue1646728)
2337                 with contextlib.suppress(ValueError, OverflowError, OSError):
2338                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2339                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2340
2341         live_keys = ('is_live', 'was_live')
2342         live_status = info_dict.get('live_status')
2343         if live_status is None:
2344             for key in live_keys:
2345                 if info_dict.get(key) is False:
2346                     continue
2347                 if info_dict.get(key):
2348                     live_status = key
2349                 break
2350             if all(info_dict.get(key) is False for key in live_keys):
2351                 live_status = 'not_live'
2352         if live_status:
2353             info_dict['live_status'] = live_status
2354             for key in live_keys:
2355                 if info_dict.get(key) is None:
2356                     info_dict[key] = (live_status == key)
2357
2358         # Auto generate title fields corresponding to the *_number fields when missing
2359         # in order to always have clean titles. This is very common for TV series.
2360         for field in ('chapter', 'season', 'episode'):
2361             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2362                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2363
2364     def _raise_pending_errors(self, info):
2365         err = info.pop('__pending_error', None)
2366         if err:
2367             self.report_error(err, tb=False)
2368
2369     def process_video_result(self, info_dict, download=True):
2370         assert info_dict.get('_type', 'video') == 'video'
2371         self._num_videos += 1
2372
2373         if 'id' not in info_dict:
2374             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2375         elif not info_dict.get('id'):
2376             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2377
2378         def report_force_conversion(field, field_not, conversion):
2379             self.report_warning(
2380                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2381                 % (field, field_not, conversion))
2382
2383         def sanitize_string_field(info, string_field):
2384             field = info.get(string_field)
2385             if field is None or isinstance(field, str):
2386                 return
2387             report_force_conversion(string_field, 'a string', 'string')
2388             info[string_field] = str(field)
2389
2390         def sanitize_numeric_fields(info):
2391             for numeric_field in self._NUMERIC_FIELDS:
2392                 field = info.get(numeric_field)
2393                 if field is None or isinstance(field, (int, float)):
2394                     continue
2395                 report_force_conversion(numeric_field, 'numeric', 'int')
2396                 info[numeric_field] = int_or_none(field)
2397
2398         sanitize_string_field(info_dict, 'id')
2399         sanitize_numeric_fields(info_dict)
2400         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2401             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2402         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2403             self.report_warning('"duration" field is negative, there is an error in extractor')
2404
2405         chapters = info_dict.get('chapters') or []
2406         if chapters and chapters[0].get('start_time'):
2407             chapters.insert(0, {'start_time': 0})
2408
2409         dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2410         for idx, (prev, current, next_) in enumerate(zip(
2411                 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2412             if current.get('start_time') is None:
2413                 current['start_time'] = prev.get('end_time')
2414             if not current.get('end_time'):
2415                 current['end_time'] = next_.get('start_time')
2416             if not current.get('title'):
2417                 current['title'] = f'<Untitled Chapter {idx}>'
2418
2419         if 'playlist' not in info_dict:
2420             # It isn't part of a playlist
2421             info_dict['playlist'] = None
2422             info_dict['playlist_index'] = None
2423
2424         self._sanitize_thumbnails(info_dict)
2425
2426         thumbnail = info_dict.get('thumbnail')
2427         thumbnails = info_dict.get('thumbnails')
2428         if thumbnail:
2429             info_dict['thumbnail'] = sanitize_url(thumbnail)
2430         elif thumbnails:
2431             info_dict['thumbnail'] = thumbnails[-1]['url']
2432
2433         if info_dict.get('display_id') is None and 'id' in info_dict:
2434             info_dict['display_id'] = info_dict['id']
2435
2436         self._fill_common_fields(info_dict)
2437
2438         for cc_kind in ('subtitles', 'automatic_captions'):
2439             cc = info_dict.get(cc_kind)
2440             if cc:
2441                 for _, subtitle in cc.items():
2442                     for subtitle_format in subtitle:
2443                         if subtitle_format.get('url'):
2444                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2445                         if subtitle_format.get('ext') is None:
2446                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2447
2448         automatic_captions = info_dict.get('automatic_captions')
2449         subtitles = info_dict.get('subtitles')
2450
2451         info_dict['requested_subtitles'] = self.process_subtitles(
2452             info_dict['id'], subtitles, automatic_captions)
2453
2454         if info_dict.get('formats') is None:
2455             # There's only one format available
2456             formats = [info_dict]
2457         else:
2458             formats = info_dict['formats']
2459
2460         # or None ensures --clean-infojson removes it
2461         info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
2462         if not self.params.get('allow_unplayable_formats'):
2463             formats = [f for f in formats if not f.get('has_drm')]
2464             if info_dict['_has_drm'] and all(
2465                     f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2466                 self.report_warning(
2467                     'This video is DRM protected and only images are available for download. '
2468                     'Use --list-formats to see them')
2469
2470         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2471         if not get_from_start:
2472             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2473         if info_dict.get('is_live') and formats:
2474             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2475             if get_from_start and not formats:
2476                 self.raise_no_formats(info_dict, msg=(
2477                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2478                     'If you want to download from the current time, use --no-live-from-start'))
2479
2480         if not formats:
2481             self.raise_no_formats(info_dict)
2482
2483         def is_wellformed(f):
2484             url = f.get('url')
2485             if not url:
2486                 self.report_warning(
2487                     '"url" field is missing or empty - skipping format, '
2488                     'there is an error in extractor')
2489                 return False
2490             if isinstance(url, bytes):
2491                 sanitize_string_field(f, 'url')
2492             return True
2493
2494         # Filter out malformed formats for better extraction robustness
2495         formats = list(filter(is_wellformed, formats))
2496
2497         formats_dict = {}
2498
2499         # We check that all the formats have the format and format_id fields
2500         for i, format in enumerate(formats):
2501             sanitize_string_field(format, 'format_id')
2502             sanitize_numeric_fields(format)
2503             format['url'] = sanitize_url(format['url'])
2504             if not format.get('format_id'):
2505                 format['format_id'] = str(i)
2506             else:
2507                 # Sanitize format_id from characters used in format selector expression
2508                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2509             format_id = format['format_id']
2510             if format_id not in formats_dict:
2511                 formats_dict[format_id] = []
2512             formats_dict[format_id].append(format)
2513
2514         # Make sure all formats have unique format_id
2515         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2516         for format_id, ambiguous_formats in formats_dict.items():
2517             ambigious_id = len(ambiguous_formats) > 1
2518             for i, format in enumerate(ambiguous_formats):
2519                 if ambigious_id:
2520                     format['format_id'] = '%s-%d' % (format_id, i)
2521                 if format.get('ext') is None:
2522                     format['ext'] = determine_ext(format['url']).lower()
2523                 # Ensure there is no conflict between id and ext in format selection
2524                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2525                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2526                     format['format_id'] = 'f%s' % format['format_id']
2527
2528         for i, format in enumerate(formats):
2529             if format.get('format') is None:
2530                 format['format'] = '{id} - {res}{note}'.format(
2531                     id=format['format_id'],
2532                     res=self.format_resolution(format),
2533                     note=format_field(format, 'format_note', ' (%s)'),
2534                 )
2535             if format.get('protocol') is None:
2536                 format['protocol'] = determine_protocol(format)
2537             if format.get('resolution') is None:
2538                 format['resolution'] = self.format_resolution(format, default=None)
2539             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2540                 format['dynamic_range'] = 'SDR'
2541             if (info_dict.get('duration') and format.get('tbr')
2542                     and not format.get('filesize') and not format.get('filesize_approx')):
2543                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2544
2545             # Add HTTP headers, so that external programs can use them from the
2546             # json output
2547             full_format_info = info_dict.copy()
2548             full_format_info.update(format)
2549             format['http_headers'] = self._calc_headers(full_format_info)
2550         # Remove private housekeeping stuff
2551         if '__x_forwarded_for_ip' in info_dict:
2552             del info_dict['__x_forwarded_for_ip']
2553
2554         if self.params.get('check_formats') is True:
2555             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2556
2557         if not formats or formats[0] is not info_dict:
2558             # only set the 'formats' fields if the original info_dict list them
2559             # otherwise we end up with a circular reference, the first (and unique)
2560             # element in the 'formats' field in info_dict is info_dict itself,
2561             # which can't be exported to json
2562             info_dict['formats'] = formats
2563
2564         info_dict, _ = self.pre_process(info_dict)
2565
2566         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2567             return info_dict
2568
2569         self.post_extract(info_dict)
2570         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2571
2572         # The pre-processors may have modified the formats
2573         formats = info_dict.get('formats', [info_dict])
2574
2575         list_only = self.params.get('simulate') is None and (
2576             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2577         interactive_format_selection = not list_only and self.format_selector == '-'
2578         if self.params.get('list_thumbnails'):
2579             self.list_thumbnails(info_dict)
2580         if self.params.get('listsubtitles'):
2581             if 'automatic_captions' in info_dict:
2582                 self.list_subtitles(
2583                     info_dict['id'], automatic_captions, 'automatic captions')
2584             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2585         if self.params.get('listformats') or interactive_format_selection:
2586             self.list_formats(info_dict)
2587         if list_only:
2588             # Without this printing, -F --print-json will not work
2589             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2590             return info_dict
2591
2592         format_selector = self.format_selector
2593         if format_selector is None:
2594             req_format = self._default_format_spec(info_dict, download=download)
2595             self.write_debug('Default format spec: %s' % req_format)
2596             format_selector = self.build_format_selector(req_format)
2597
2598         while True:
2599             if interactive_format_selection:
2600                 req_format = input(
2601                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2602                 try:
2603                     format_selector = self.build_format_selector(req_format)
2604                 except SyntaxError as err:
2605                     self.report_error(err, tb=False, is_error=False)
2606                     continue
2607
2608             formats_to_download = list(format_selector({
2609                 'formats': formats,
2610                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2611                 'incomplete_formats': (
2612                     # All formats are video-only or
2613                     all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2614                     # all formats are audio-only
2615                     or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2616             }))
2617             if interactive_format_selection and not formats_to_download:
2618                 self.report_error('Requested format is not available', tb=False, is_error=False)
2619                 continue
2620             break
2621
2622         if not formats_to_download:
2623             if not self.params.get('ignore_no_formats_error'):
2624                 raise ExtractorError(
2625                     'Requested format is not available. Use --list-formats for a list of available formats',
2626                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2627             self.report_warning('Requested format is not available')
2628             # Process what we can, even without any available formats.
2629             formats_to_download = [{}]
2630
2631         requested_ranges = self.params.get('download_ranges')
2632         if requested_ranges:
2633             requested_ranges = tuple(requested_ranges(info_dict, self))
2634
2635         best_format, downloaded_formats = formats_to_download[-1], []
2636         if download:
2637             if best_format:
2638                 def to_screen(*msg):
2639                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2640
2641                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2642                           (f['format_id'] for f in formats_to_download))
2643                 if requested_ranges:
2644                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2645                               (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
2646             max_downloads_reached = False
2647
2648             for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
2649                 new_info = self._copy_infodict(info_dict)
2650                 new_info.update(fmt)
2651                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2652                 if chapter or offset:
2653                     new_info.update({
2654                         'section_start': offset + chapter.get('start_time', 0),
2655                         'section_end': offset + min(chapter.get('end_time', duration), duration),
2656                         'section_title': chapter.get('title'),
2657                         'section_number': chapter.get('index'),
2658                     })
2659                 downloaded_formats.append(new_info)
2660                 try:
2661                     self.process_info(new_info)
2662                 except MaxDownloadsReached:
2663                     max_downloads_reached = True
2664                 self._raise_pending_errors(new_info)
2665                 # Remove copied info
2666                 for key, val in tuple(new_info.items()):
2667                     if info_dict.get(key) == val:
2668                         new_info.pop(key)
2669                 if max_downloads_reached:
2670                     break
2671
2672             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2673             assert write_archive.issubset({True, False, 'ignore'})
2674             if True in write_archive and False not in write_archive:
2675                 self.record_download_archive(info_dict)
2676
2677             info_dict['requested_downloads'] = downloaded_formats
2678             info_dict = self.run_all_pps('after_video', info_dict)
2679             if max_downloads_reached:
2680                 raise MaxDownloadsReached()
2681
2682         # We update the info dict with the selected best quality format (backwards compatibility)
2683         info_dict.update(best_format)
2684         return info_dict
2685
2686     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2687         """Select the requested subtitles and their format"""
2688         available_subs, normal_sub_langs = {}, []
2689         if normal_subtitles and self.params.get('writesubtitles'):
2690             available_subs.update(normal_subtitles)
2691             normal_sub_langs = tuple(normal_subtitles.keys())
2692         if automatic_captions and self.params.get('writeautomaticsub'):
2693             for lang, cap_info in automatic_captions.items():
2694                 if lang not in available_subs:
2695                     available_subs[lang] = cap_info
2696
2697         if (not self.params.get('writesubtitles') and not
2698                 self.params.get('writeautomaticsub') or not
2699                 available_subs):
2700             return None
2701
2702         all_sub_langs = tuple(available_subs.keys())
2703         if self.params.get('allsubtitles', False):
2704             requested_langs = all_sub_langs
2705         elif self.params.get('subtitleslangs', False):
2706             # A list is used so that the order of languages will be the same as
2707             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2708             requested_langs = []
2709             for lang_re in self.params.get('subtitleslangs'):
2710                 discard = lang_re[0] == '-'
2711                 if discard:
2712                     lang_re = lang_re[1:]
2713                 if lang_re == 'all':
2714                     if discard:
2715                         requested_langs = []
2716                     else:
2717                         requested_langs.extend(all_sub_langs)
2718                     continue
2719                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2720                 if discard:
2721                     for lang in current_langs:
2722                         while lang in requested_langs:
2723                             requested_langs.remove(lang)
2724                 else:
2725                     requested_langs.extend(current_langs)
2726             requested_langs = orderedSet(requested_langs)
2727         elif normal_sub_langs:
2728             requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
2729         else:
2730             requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
2731         if requested_langs:
2732             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2733
2734         formats_query = self.params.get('subtitlesformat', 'best')
2735         formats_preference = formats_query.split('/') if formats_query else []
2736         subs = {}
2737         for lang in requested_langs:
2738             formats = available_subs.get(lang)
2739             if formats is None:
2740                 self.report_warning(f'{lang} subtitles not available for {video_id}')
2741                 continue
2742             for ext in formats_preference:
2743                 if ext == 'best':
2744                     f = formats[-1]
2745                     break
2746                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2747                 if matches:
2748                     f = matches[-1]
2749                     break
2750             else:
2751                 f = formats[-1]
2752                 self.report_warning(
2753                     'No subtitle format found matching "%s" for language %s, '
2754                     'using %s' % (formats_query, lang, f['ext']))
2755             subs[lang] = f
2756         return subs
2757
2758     def _forceprint(self, key, info_dict):
2759         if info_dict is None:
2760             return
2761         info_copy = info_dict.copy()
2762         info_copy['formats_table'] = self.render_formats_table(info_dict)
2763         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2764         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2765         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2766
2767         def format_tmpl(tmpl):
2768             mobj = re.match(r'\w+(=?)$', tmpl)
2769             if mobj and mobj.group(1):
2770                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2771             elif mobj:
2772                 return f'%({tmpl})s'
2773             return tmpl
2774
2775         for tmpl in self.params['forceprint'].get(key, []):
2776             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2777
2778         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2779             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2780             tmpl = format_tmpl(tmpl)
2781             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2782             if self._ensure_dir_exists(filename):
2783                 with open(filename, 'a', encoding='utf-8') as f:
2784                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2785
2786     def __forced_printings(self, info_dict, filename, incomplete):
2787         def print_mandatory(field, actual_field=None):
2788             if actual_field is None:
2789                 actual_field = field
2790             if (self.params.get('force%s' % field, False)
2791                     and (not incomplete or info_dict.get(actual_field) is not None)):
2792                 self.to_stdout(info_dict[actual_field])
2793
2794         def print_optional(field):
2795             if (self.params.get('force%s' % field, False)
2796                     and info_dict.get(field) is not None):
2797                 self.to_stdout(info_dict[field])
2798
2799         info_dict = info_dict.copy()
2800         if filename is not None:
2801             info_dict['filename'] = filename
2802         if info_dict.get('requested_formats') is not None:
2803             # For RTMP URLs, also include the playpath
2804             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2805         elif info_dict.get('url'):
2806             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2807
2808         if (self.params.get('forcejson')
2809                 or self.params['forceprint'].get('video')
2810                 or self.params['print_to_file'].get('video')):
2811             self.post_extract(info_dict)
2812         self._forceprint('video', info_dict)
2813
2814         print_mandatory('title')
2815         print_mandatory('id')
2816         print_mandatory('url', 'urls')
2817         print_optional('thumbnail')
2818         print_optional('description')
2819         print_optional('filename')
2820         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2821             self.to_stdout(formatSeconds(info_dict['duration']))
2822         print_mandatory('format')
2823
2824         if self.params.get('forcejson'):
2825             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2826
2827     def dl(self, name, info, subtitle=False, test=False):
2828         if not info.get('url'):
2829             self.raise_no_formats(info, True)
2830
2831         if test:
2832             verbose = self.params.get('verbose')
2833             params = {
2834                 'test': True,
2835                 'quiet': self.params.get('quiet') or not verbose,
2836                 'verbose': verbose,
2837                 'noprogress': not verbose,
2838                 'nopart': True,
2839                 'skip_unavailable_fragments': False,
2840                 'keep_fragments': False,
2841                 'overwrites': True,
2842                 '_no_ytdl_file': True,
2843             }
2844         else:
2845             params = self.params
2846         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2847         if not test:
2848             for ph in self._progress_hooks:
2849                 fd.add_progress_hook(ph)
2850             urls = '", "'.join(
2851                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2852                 for f in info.get('requested_formats', []) or [info])
2853             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
2854
2855         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2856         # But it may contain objects that are not deep-copyable
2857         new_info = self._copy_infodict(info)
2858         if new_info.get('http_headers') is None:
2859             new_info['http_headers'] = self._calc_headers(new_info)
2860         return fd.download(name, new_info, subtitle)
2861
2862     def existing_file(self, filepaths, *, default_overwrite=True):
2863         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2864         if existing_files and not self.params.get('overwrites', default_overwrite):
2865             return existing_files[0]
2866
2867         for file in existing_files:
2868             self.report_file_delete(file)
2869             os.remove(file)
2870         return None
2871
2872     def process_info(self, info_dict):
2873         """Process a single resolved IE result. (Modifies it in-place)"""
2874
2875         assert info_dict.get('_type', 'video') == 'video'
2876         original_infodict = info_dict
2877
2878         if 'format' not in info_dict and 'ext' in info_dict:
2879             info_dict['format'] = info_dict['ext']
2880
2881         # This is mostly just for backward compatibility of process_info
2882         # As a side-effect, this allows for format-specific filters
2883         if self._match_entry(info_dict) is not None:
2884             info_dict['__write_download_archive'] = 'ignore'
2885             return
2886
2887         # Does nothing under normal operation - for backward compatibility of process_info
2888         self.post_extract(info_dict)
2889         self._num_downloads += 1
2890
2891         # info_dict['_filename'] needs to be set for backward compatibility
2892         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2893         temp_filename = self.prepare_filename(info_dict, 'temp')
2894         files_to_move = {}
2895
2896         # Forced printings
2897         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2898
2899         def check_max_downloads():
2900             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
2901                 raise MaxDownloadsReached()
2902
2903         if self.params.get('simulate'):
2904             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2905             check_max_downloads()
2906             return
2907
2908         if full_filename is None:
2909             return
2910         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2911             return
2912         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2913             return
2914
2915         if self._write_description('video', info_dict,
2916                                    self.prepare_filename(info_dict, 'description')) is None:
2917             return
2918
2919         sub_files = self._write_subtitles(info_dict, temp_filename)
2920         if sub_files is None:
2921             return
2922         files_to_move.update(dict(sub_files))
2923
2924         thumb_files = self._write_thumbnails(
2925             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2926         if thumb_files is None:
2927             return
2928         files_to_move.update(dict(thumb_files))
2929
2930         infofn = self.prepare_filename(info_dict, 'infojson')
2931         _infojson_written = self._write_info_json('video', info_dict, infofn)
2932         if _infojson_written:
2933             info_dict['infojson_filename'] = infofn
2934             # For backward compatibility, even though it was a private field
2935             info_dict['__infojson_filename'] = infofn
2936         elif _infojson_written is None:
2937             return
2938
2939         # Note: Annotations are deprecated
2940         annofn = None
2941         if self.params.get('writeannotations', False):
2942             annofn = self.prepare_filename(info_dict, 'annotation')
2943         if annofn:
2944             if not self._ensure_dir_exists(encodeFilename(annofn)):
2945                 return
2946             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2947                 self.to_screen('[info] Video annotations are already present')
2948             elif not info_dict.get('annotations'):
2949                 self.report_warning('There are no annotations to write.')
2950             else:
2951                 try:
2952                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2953                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2954                         annofile.write(info_dict['annotations'])
2955                 except (KeyError, TypeError):
2956                     self.report_warning('There are no annotations to write.')
2957                 except OSError:
2958                     self.report_error('Cannot write annotations file: ' + annofn)
2959                     return
2960
2961         # Write internet shortcut files
2962         def _write_link_file(link_type):
2963             url = try_get(info_dict['webpage_url'], iri_to_uri)
2964             if not url:
2965                 self.report_warning(
2966                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2967                 return True
2968             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2969             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2970                 return False
2971             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2972                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2973                 return True
2974             try:
2975                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2976                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2977                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2978                     template_vars = {'url': url}
2979                     if link_type == 'desktop':
2980                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2981                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2982             except OSError:
2983                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2984                 return False
2985             return True
2986
2987         write_links = {
2988             'url': self.params.get('writeurllink'),
2989             'webloc': self.params.get('writewebloclink'),
2990             'desktop': self.params.get('writedesktoplink'),
2991         }
2992         if self.params.get('writelink'):
2993             link_type = ('webloc' if sys.platform == 'darwin'
2994                          else 'desktop' if sys.platform.startswith('linux')
2995                          else 'url')
2996             write_links[link_type] = True
2997
2998         if any(should_write and not _write_link_file(link_type)
2999                for link_type, should_write in write_links.items()):
3000             return
3001
3002         def replace_info_dict(new_info):
3003             nonlocal info_dict
3004             if new_info == info_dict:
3005                 return
3006             info_dict.clear()
3007             info_dict.update(new_info)
3008
3009         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3010         replace_info_dict(new_info)
3011
3012         if self.params.get('skip_download'):
3013             info_dict['filepath'] = temp_filename
3014             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3015             info_dict['__files_to_move'] = files_to_move
3016             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3017             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3018         else:
3019             # Download
3020             info_dict.setdefault('__postprocessors', [])
3021             try:
3022
3023                 def existing_video_file(*filepaths):
3024                     ext = info_dict.get('ext')
3025                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3026                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3027                                               default_overwrite=False)
3028                     if file:
3029                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3030                     return file
3031
3032                 fd, success = None, True
3033                 if info_dict.get('protocol') or info_dict.get('url'):
3034                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3035                     if fd is not FFmpegFD and (
3036                             info_dict.get('section_start') or info_dict.get('section_end')):
3037                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3038                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
3039                         self.report_error(f'{msg}. Aborting')
3040                         return
3041
3042                 if info_dict.get('requested_formats') is not None:
3043
3044                     def compatible_formats(formats):
3045                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3046                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
3047                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
3048                         if len(video_formats) > 2 or len(audio_formats) > 2:
3049                             return False
3050
3051                         # Check extension
3052                         exts = {format.get('ext') for format in formats}
3053                         COMPATIBLE_EXTS = (
3054                             {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
3055                             {'webm'},
3056                         )
3057                         for ext_sets in COMPATIBLE_EXTS:
3058                             if ext_sets.issuperset(exts):
3059                                 return True
3060                         # TODO: Check acodec/vcodec
3061                         return False
3062
3063                     requested_formats = info_dict['requested_formats']
3064                     old_ext = info_dict['ext']
3065                     if self.params.get('merge_output_format') is None:
3066                         if not compatible_formats(requested_formats):
3067                             info_dict['ext'] = 'mkv'
3068                             self.report_warning(
3069                                 'Requested formats are incompatible for merge and will be merged into mkv')
3070                         if (info_dict['ext'] == 'webm'
3071                                 and info_dict.get('thumbnails')
3072                                 # check with type instead of pp_key, __name__, or isinstance
3073                                 # since we dont want any custom PPs to trigger this
3074                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3075                             info_dict['ext'] = 'mkv'
3076                             self.report_warning(
3077                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3078                     new_ext = info_dict['ext']
3079
3080                     def correct_ext(filename, ext=new_ext):
3081                         if filename == '-':
3082                             return filename
3083                         filename_real_ext = os.path.splitext(filename)[1][1:]
3084                         filename_wo_ext = (
3085                             os.path.splitext(filename)[0]
3086                             if filename_real_ext in (old_ext, new_ext)
3087                             else filename)
3088                         return f'{filename_wo_ext}.{ext}'
3089
3090                     # Ensure filename always has a correct extension for successful merge
3091                     full_filename = correct_ext(full_filename)
3092                     temp_filename = correct_ext(temp_filename)
3093                     dl_filename = existing_video_file(full_filename, temp_filename)
3094                     info_dict['__real_download'] = False
3095
3096                     merger = FFmpegMergerPP(self)
3097                     downloaded = []
3098                     if dl_filename is not None:
3099                         self.report_file_already_downloaded(dl_filename)
3100                     elif fd:
3101                         for f in requested_formats if fd != FFmpegFD else []:
3102                             f['filepath'] = fname = prepend_extension(
3103                                 correct_ext(temp_filename, info_dict['ext']),
3104                                 'f%s' % f['format_id'], info_dict['ext'])
3105                             downloaded.append(fname)
3106                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3107                         success, real_download = self.dl(temp_filename, info_dict)
3108                         info_dict['__real_download'] = real_download
3109                     else:
3110                         if self.params.get('allow_unplayable_formats'):
3111                             self.report_warning(
3112                                 'You have requested merging of multiple formats '
3113                                 'while also allowing unplayable formats to be downloaded. '
3114                                 'The formats won\'t be merged to prevent data corruption.')
3115                         elif not merger.available:
3116                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3117                             if not self.params.get('ignoreerrors'):
3118                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3119                                 return
3120                             self.report_warning(f'{msg}. The formats won\'t be merged')
3121
3122                         if temp_filename == '-':
3123                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3124                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3125                                       else 'but ffmpeg is not installed')
3126                             self.report_warning(
3127                                 f'You have requested downloading multiple formats to stdout {reason}. '
3128                                 'The formats will be streamed one after the other')
3129                             fname = temp_filename
3130                         for f in requested_formats:
3131                             new_info = dict(info_dict)
3132                             del new_info['requested_formats']
3133                             new_info.update(f)
3134                             if temp_filename != '-':
3135                                 fname = prepend_extension(
3136                                     correct_ext(temp_filename, new_info['ext']),
3137                                     'f%s' % f['format_id'], new_info['ext'])
3138                                 if not self._ensure_dir_exists(fname):
3139                                     return
3140                                 f['filepath'] = fname
3141                                 downloaded.append(fname)
3142                             partial_success, real_download = self.dl(fname, new_info)
3143                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3144                             success = success and partial_success
3145
3146                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3147                         info_dict['__postprocessors'].append(merger)
3148                         info_dict['__files_to_merge'] = downloaded
3149                         # Even if there were no downloads, it is being merged only now
3150                         info_dict['__real_download'] = True
3151                     else:
3152                         for file in downloaded:
3153                             files_to_move[file] = None
3154                 else:
3155                     # Just a single file
3156                     dl_filename = existing_video_file(full_filename, temp_filename)
3157                     if dl_filename is None or dl_filename == temp_filename:
3158                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3159                         # So we should try to resume the download
3160                         success, real_download = self.dl(temp_filename, info_dict)
3161                         info_dict['__real_download'] = real_download
3162                     else:
3163                         self.report_file_already_downloaded(dl_filename)
3164
3165                 dl_filename = dl_filename or temp_filename
3166                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3167
3168             except network_exceptions as err:
3169                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3170                 return
3171             except OSError as err:
3172                 raise UnavailableVideoError(err)
3173             except (ContentTooShortError, ) as err:
3174                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3175                 return
3176
3177             self._raise_pending_errors(info_dict)
3178             if success and full_filename != '-':
3179
3180                 def fixup():
3181                     do_fixup = True
3182                     fixup_policy = self.params.get('fixup')
3183                     vid = info_dict['id']
3184
3185                     if fixup_policy in ('ignore', 'never'):
3186                         return
3187                     elif fixup_policy == 'warn':
3188                         do_fixup = 'warn'
3189                     elif fixup_policy != 'force':
3190                         assert fixup_policy in ('detect_or_warn', None)
3191                         if not info_dict.get('__real_download'):
3192                             do_fixup = False
3193
3194                     def ffmpeg_fixup(cndn, msg, cls):
3195                         if not (do_fixup and cndn):
3196                             return
3197                         elif do_fixup == 'warn':
3198                             self.report_warning(f'{vid}: {msg}')
3199                             return
3200                         pp = cls(self)
3201                         if pp.available:
3202                             info_dict['__postprocessors'].append(pp)
3203                         else:
3204                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3205
3206                     stretched_ratio = info_dict.get('stretched_ratio')
3207                     ffmpeg_fixup(stretched_ratio not in (1, None),
3208                                  f'Non-uniform pixel ratio {stretched_ratio}',
3209                                  FFmpegFixupStretchedPP)
3210
3211                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3212                     downloader = downloader.FD_NAME if downloader else None
3213
3214                     ext = info_dict.get('ext')
3215                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3216                         isinstance(pp, FFmpegVideoConvertorPP)
3217                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3218                     ) for pp in self._pps['post_process'])
3219
3220                     if not postprocessed_by_ffmpeg:
3221                         ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3222                                      'writing DASH m4a. Only some players support this container',
3223                                      FFmpegFixupM4aPP)
3224                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3225                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3226                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3227                                      FFmpegFixupM3u8PP)
3228                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3229                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3230
3231                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3232                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3233
3234                 fixup()
3235                 try:
3236                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3237                 except PostProcessingError as err:
3238                     self.report_error('Postprocessing: %s' % str(err))
3239                     return
3240                 try:
3241                     for ph in self._post_hooks:
3242                         ph(info_dict['filepath'])
3243                 except Exception as err:
3244                     self.report_error('post hooks: %s' % str(err))
3245                     return
3246                 info_dict['__write_download_archive'] = True
3247
3248         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3249         if self.params.get('force_write_download_archive'):
3250             info_dict['__write_download_archive'] = True
3251         check_max_downloads()
3252
3253     def __download_wrapper(self, func):
3254         @functools.wraps(func)
3255         def wrapper(*args, **kwargs):
3256             try:
3257                 res = func(*args, **kwargs)
3258             except UnavailableVideoError as e:
3259                 self.report_error(e)
3260             except DownloadCancelled as e:
3261                 self.to_screen(f'[info] {e}')
3262                 if not self.params.get('break_per_url'):
3263                     raise
3264             else:
3265                 if self.params.get('dump_single_json', False):
3266                     self.post_extract(res)
3267                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3268         return wrapper
3269
3270     def download(self, url_list):
3271         """Download a given list of URLs."""
3272         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3273         outtmpl = self.params['outtmpl']['default']
3274         if (len(url_list) > 1
3275                 and outtmpl != '-'
3276                 and '%' not in outtmpl
3277                 and self.params.get('max_downloads') != 1):
3278             raise SameFileError(outtmpl)
3279
3280         for url in url_list:
3281             self.__download_wrapper(self.extract_info)(
3282                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3283
3284         return self._download_retcode
3285
3286     def download_with_info_file(self, info_filename):
3287         with contextlib.closing(fileinput.FileInput(
3288                 [info_filename], mode='r',
3289                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3290             # FileInput doesn't have a read method, we can't call json.load
3291             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3292         try:
3293             self.__download_wrapper(self.process_ie_result)(info, download=True)
3294         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3295             if not isinstance(e, EntryNotInPlaylist):
3296                 self.to_stderr('\r')
3297             webpage_url = info.get('webpage_url')
3298             if webpage_url is not None:
3299                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3300                 return self.download([webpage_url])
3301             else:
3302                 raise
3303         return self._download_retcode
3304
3305     @staticmethod
3306     def sanitize_info(info_dict, remove_private_keys=False):
3307         ''' Sanitize the infodict for converting to json '''
3308         if info_dict is None:
3309             return info_dict
3310         info_dict.setdefault('epoch', int(time.time()))
3311         info_dict.setdefault('_type', 'video')
3312
3313         if remove_private_keys:
3314             reject = lambda k, v: v is None or k.startswith('__') or k in {
3315                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3316                 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3317             }
3318         else:
3319             reject = lambda k, v: False
3320
3321         def filter_fn(obj):
3322             if isinstance(obj, dict):
3323                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3324             elif isinstance(obj, (list, tuple, set, LazyList)):
3325                 return list(map(filter_fn, obj))
3326             elif obj is None or isinstance(obj, (str, int, float, bool)):
3327                 return obj
3328             else:
3329                 return repr(obj)
3330
3331         return filter_fn(info_dict)
3332
3333     @staticmethod
3334     def filter_requested_info(info_dict, actually_filter=True):
3335         ''' Alias of sanitize_info for backward compatibility '''
3336         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3337
3338     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3339         for filename in set(filter(None, files_to_delete)):
3340             if msg:
3341                 self.to_screen(msg % filename)
3342             try:
3343                 os.remove(filename)
3344             except OSError:
3345                 self.report_warning(f'Unable to delete file {filename}')
3346             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3347                 del info['__files_to_move'][filename]
3348
3349     @staticmethod
3350     def post_extract(info_dict):
3351         def actual_post_extract(info_dict):
3352             if info_dict.get('_type') in ('playlist', 'multi_video'):
3353                 for video_dict in info_dict.get('entries', {}):
3354                     actual_post_extract(video_dict or {})
3355                 return
3356
3357             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3358             info_dict.update(post_extractor())
3359
3360         actual_post_extract(info_dict or {})
3361
3362     def run_pp(self, pp, infodict):
3363         files_to_delete = []
3364         if '__files_to_move' not in infodict:
3365             infodict['__files_to_move'] = {}
3366         try:
3367             files_to_delete, infodict = pp.run(infodict)
3368         except PostProcessingError as e:
3369             # Must be True and not 'only_download'
3370             if self.params.get('ignoreerrors') is True:
3371                 self.report_error(e)
3372                 return infodict
3373             raise
3374
3375         if not files_to_delete:
3376             return infodict
3377         if self.params.get('keepvideo', False):
3378             for f in files_to_delete:
3379                 infodict['__files_to_move'].setdefault(f, '')
3380         else:
3381             self._delete_downloaded_files(
3382                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3383         return infodict
3384
3385     def run_all_pps(self, key, info, *, additional_pps=None):
3386         self._forceprint(key, info)
3387         for pp in (additional_pps or []) + self._pps[key]:
3388             info = self.run_pp(pp, info)
3389         return info
3390
3391     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3392         info = dict(ie_info)
3393         info['__files_to_move'] = files_to_move or {}
3394         try:
3395             info = self.run_all_pps(key, info)
3396         except PostProcessingError as err:
3397             msg = f'Preprocessing: {err}'
3398             info.setdefault('__pending_error', msg)
3399             self.report_error(msg, is_error=False)
3400         return info, info.pop('__files_to_move', None)
3401
3402     def post_process(self, filename, info, files_to_move=None):
3403         """Run all the postprocessors on the given file."""
3404         info['filepath'] = filename
3405         info['__files_to_move'] = files_to_move or {}
3406         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3407         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3408         del info['__files_to_move']
3409         return self.run_all_pps('after_move', info)
3410
3411     def _make_archive_id(self, info_dict):
3412         video_id = info_dict.get('id')
3413         if not video_id:
3414             return
3415         # Future-proof against any change in case
3416         # and backwards compatibility with prior versions
3417         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3418         if extractor is None:
3419             url = str_or_none(info_dict.get('url'))
3420             if not url:
3421                 return
3422             # Try to find matching extractor for the URL and take its ie_key
3423             for ie_key, ie in self._ies.items():
3424                 if ie.suitable(url):
3425                     extractor = ie_key
3426                     break
3427             else:
3428                 return
3429         return f'{extractor.lower()} {video_id}'
3430
3431     def in_download_archive(self, info_dict):
3432         fn = self.params.get('download_archive')
3433         if fn is None:
3434             return False
3435
3436         vid_id = self._make_archive_id(info_dict)
3437         if not vid_id:
3438             return False  # Incomplete video information
3439
3440         return vid_id in self.archive
3441
3442     def record_download_archive(self, info_dict):
3443         fn = self.params.get('download_archive')
3444         if fn is None:
3445             return
3446         vid_id = self._make_archive_id(info_dict)
3447         assert vid_id
3448         self.write_debug(f'Adding to archive: {vid_id}')
3449         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3450             archive_file.write(vid_id + '\n')
3451         self.archive.add(vid_id)
3452
3453     @staticmethod
3454     def format_resolution(format, default='unknown'):
3455         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3456             return 'audio only'
3457         if format.get('resolution') is not None:
3458             return format['resolution']
3459         if format.get('width') and format.get('height'):
3460             return '%dx%d' % (format['width'], format['height'])
3461         elif format.get('height'):
3462             return '%sp' % format['height']
3463         elif format.get('width'):
3464             return '%dx?' % format['width']
3465         return default
3466
3467     def _list_format_headers(self, *headers):
3468         if self.params.get('listformats_table', True) is not False:
3469             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3470         return headers
3471
3472     def _format_note(self, fdict):
3473         res = ''
3474         if fdict.get('ext') in ['f4f', 'f4m']:
3475             res += '(unsupported)'
3476         if fdict.get('language'):
3477             if res:
3478                 res += ' '
3479             res += '[%s]' % fdict['language']
3480         if fdict.get('format_note') is not None:
3481             if res:
3482                 res += ' '
3483             res += fdict['format_note']
3484         if fdict.get('tbr') is not None:
3485             if res:
3486                 res += ', '
3487             res += '%4dk' % fdict['tbr']
3488         if fdict.get('container') is not None:
3489             if res:
3490                 res += ', '
3491             res += '%s container' % fdict['container']
3492         if (fdict.get('vcodec') is not None
3493                 and fdict.get('vcodec') != 'none'):
3494             if res:
3495                 res += ', '
3496             res += fdict['vcodec']
3497             if fdict.get('vbr') is not None:
3498                 res += '@'
3499         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3500             res += 'video@'
3501         if fdict.get('vbr') is not None:
3502             res += '%4dk' % fdict['vbr']
3503         if fdict.get('fps') is not None:
3504             if res:
3505                 res += ', '
3506             res += '%sfps' % fdict['fps']
3507         if fdict.get('acodec') is not None:
3508             if res:
3509                 res += ', '
3510             if fdict['acodec'] == 'none':
3511                 res += 'video only'
3512             else:
3513                 res += '%-5s' % fdict['acodec']
3514         elif fdict.get('abr') is not None:
3515             if res:
3516                 res += ', '
3517             res += 'audio'
3518         if fdict.get('abr') is not None:
3519             res += '@%3dk' % fdict['abr']
3520         if fdict.get('asr') is not None:
3521             res += ' (%5dHz)' % fdict['asr']
3522         if fdict.get('filesize') is not None:
3523             if res:
3524                 res += ', '
3525             res += format_bytes(fdict['filesize'])
3526         elif fdict.get('filesize_approx') is not None:
3527             if res:
3528                 res += ', '
3529             res += '~' + format_bytes(fdict['filesize_approx'])
3530         return res
3531
3532     def render_formats_table(self, info_dict):
3533         if not info_dict.get('formats') and not info_dict.get('url'):
3534             return None
3535
3536         formats = info_dict.get('formats', [info_dict])
3537         if not self.params.get('listformats_table', True) is not False:
3538             table = [
3539                 [
3540                     format_field(f, 'format_id'),
3541                     format_field(f, 'ext'),
3542                     self.format_resolution(f),
3543                     self._format_note(f)
3544                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3545             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3546
3547         def simplified_codec(f, field):
3548             assert field in ('acodec', 'vcodec')
3549             codec = f.get(field, 'unknown')
3550             if not codec:
3551                 return 'unknown'
3552             elif codec != 'none':
3553                 return '.'.join(codec.split('.')[:4])
3554
3555             if field == 'vcodec' and f.get('acodec') == 'none':
3556                 return 'images'
3557             elif field == 'acodec' and f.get('vcodec') == 'none':
3558                 return ''
3559             return self._format_out('audio only' if field == 'vcodec' else 'video only',
3560                                     self.Styles.SUPPRESS)
3561
3562         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3563         table = [
3564             [
3565                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3566                 format_field(f, 'ext'),
3567                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3568                 format_field(f, 'fps', '\t%d', func=round),
3569                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3570                 delim,
3571                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3572                 format_field(f, 'tbr', '\t%dk', func=round),
3573                 shorten_protocol_name(f.get('protocol', '')),
3574                 delim,
3575                 simplified_codec(f, 'vcodec'),
3576                 format_field(f, 'vbr', '\t%dk', func=round),
3577                 simplified_codec(f, 'acodec'),
3578                 format_field(f, 'abr', '\t%dk', func=round),
3579                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3580                 join_nonempty(
3581                     self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3582                     format_field(f, 'language', '[%s]'),
3583                     join_nonempty(format_field(f, 'format_note'),
3584                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3585                                   delim=', '),
3586                     delim=' '),
3587             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3588         header_line = self._list_format_headers(
3589             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3590             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3591
3592         return render_table(
3593             header_line, table, hide_empty=True,
3594             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3595
3596     def render_thumbnails_table(self, info_dict):
3597         thumbnails = list(info_dict.get('thumbnails') or [])
3598         if not thumbnails:
3599             return None
3600         return render_table(
3601             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3602             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3603
3604     def render_subtitles_table(self, video_id, subtitles):
3605         def _row(lang, formats):
3606             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3607             if len(set(names)) == 1:
3608                 names = [] if names[0] == 'unknown' else names[:1]
3609             return [lang, ', '.join(names), ', '.join(exts)]
3610
3611         if not subtitles:
3612             return None
3613         return render_table(
3614             self._list_format_headers('Language', 'Name', 'Formats'),
3615             [_row(lang, formats) for lang, formats in subtitles.items()],
3616             hide_empty=True)
3617
3618     def __list_table(self, video_id, name, func, *args):
3619         table = func(*args)
3620         if not table:
3621             self.to_screen(f'{video_id} has no {name}')
3622             return
3623         self.to_screen(f'[info] Available {name} for {video_id}:')
3624         self.to_stdout(table)
3625
3626     def list_formats(self, info_dict):
3627         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3628
3629     def list_thumbnails(self, info_dict):
3630         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3631
3632     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3633         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3634
3635     def urlopen(self, req):
3636         """ Start an HTTP download """
3637         if isinstance(req, str):
3638             req = sanitized_Request(req)
3639         return self._opener.open(req, timeout=self._socket_timeout)
3640
3641     def print_debug_header(self):
3642         if not self.params.get('verbose'):
3643             return
3644
3645         # These imports can be slow. So import them only as needed
3646         from .extractor.extractors import _LAZY_LOADER
3647         from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3648
3649         def get_encoding(stream):
3650             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3651             if not supports_terminal_sequences(stream):
3652                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3653                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3654             return ret
3655
3656         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3657             locale.getpreferredencoding(),
3658             sys.getfilesystemencoding(),
3659             self.get_encoding(),
3660             ', '.join(
3661                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3662                 if stream is not None and key != 'console')
3663         )
3664
3665         logger = self.params.get('logger')
3666         if logger:
3667             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3668             write_debug(encoding_str)
3669         else:
3670             write_string(f'[debug] {encoding_str}\n', encoding=None)
3671             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3672
3673         source = detect_variant()
3674         write_debug(join_nonempty(
3675             'yt-dlp version', __version__,
3676             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3677             '' if source == 'unknown' else f'({source})',
3678             delim=' '))
3679         if not _LAZY_LOADER:
3680             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3681                 write_debug('Lazy loading extractors is forcibly disabled')
3682             else:
3683                 write_debug('Lazy loading extractors is disabled')
3684         if plugin_extractors or plugin_postprocessors:
3685             write_debug('Plugins: %s' % [
3686                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3687                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3688         if self.params['compat_opts']:
3689             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3690
3691         if source == 'source':
3692             try:
3693                 stdout, _, _ = Popen.run(
3694                     ['git', 'rev-parse', '--short', 'HEAD'],
3695                     text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
3696                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3697                 if re.fullmatch('[0-9a-f]+', stdout.strip()):
3698                     write_debug(f'Git HEAD: {stdout.strip()}')
3699             except Exception:
3700                 with contextlib.suppress(Exception):
3701                     sys.exc_clear()
3702
3703         write_debug(system_identifier())
3704
3705         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3706         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3707         if ffmpeg_features:
3708             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3709
3710         exe_versions['rtmpdump'] = rtmpdump_version()
3711         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3712         exe_str = ', '.join(
3713             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3714         ) or 'none'
3715         write_debug('exe versions: %s' % exe_str)
3716
3717         from .compat.compat_utils import get_package_info
3718         from .dependencies import available_dependencies
3719
3720         write_debug('Optional libraries: %s' % (', '.join(sorted({
3721             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3722         })) or 'none'))
3723
3724         self._setup_opener()
3725         proxy_map = {}
3726         for handler in self._opener.handlers:
3727             if hasattr(handler, 'proxies'):
3728                 proxy_map.update(handler.proxies)
3729         write_debug(f'Proxy map: {proxy_map}')
3730
3731         # Not implemented
3732         if False and self.params.get('call_home'):
3733             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3734             write_debug('Public IP address: %s' % ipaddr)
3735             latest_version = self.urlopen(
3736                 'https://yt-dl.org/latest/version').read().decode()
3737             if version_tuple(latest_version) > version_tuple(__version__):
3738                 self.report_warning(
3739                     'You are using an outdated version (newest version: %s)! '
3740                     'See https://yt-dl.org/update if you need help updating.' %
3741                     latest_version)
3742
3743     def _setup_opener(self):
3744         if hasattr(self, '_opener'):
3745             return
3746         timeout_val = self.params.get('socket_timeout')
3747         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3748
3749         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3750         opts_cookiefile = self.params.get('cookiefile')
3751         opts_proxy = self.params.get('proxy')
3752
3753         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3754
3755         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3756         if opts_proxy is not None:
3757             if opts_proxy == '':
3758                 proxies = {}
3759             else:
3760                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3761         else:
3762             proxies = urllib.request.getproxies()
3763             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3764             if 'http' in proxies and 'https' not in proxies:
3765                 proxies['https'] = proxies['http']
3766         proxy_handler = PerRequestProxyHandler(proxies)
3767
3768         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3769         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3770         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3771         redirect_handler = YoutubeDLRedirectHandler()
3772         data_handler = urllib.request.DataHandler()
3773
3774         # When passing our own FileHandler instance, build_opener won't add the
3775         # default FileHandler and allows us to disable the file protocol, which
3776         # can be used for malicious purposes (see
3777         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3778         file_handler = urllib.request.FileHandler()
3779
3780         def file_open(*args, **kwargs):
3781             raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3782         file_handler.file_open = file_open
3783
3784         opener = urllib.request.build_opener(
3785             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3786
3787         # Delete the default user-agent header, which would otherwise apply in
3788         # cases where our custom HTTP handler doesn't come into play
3789         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3790         opener.addheaders = []
3791         self._opener = opener
3792
3793     def encode(self, s):
3794         if isinstance(s, bytes):
3795             return s  # Already encoded
3796
3797         try:
3798             return s.encode(self.get_encoding())
3799         except UnicodeEncodeError as err:
3800             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3801             raise
3802
3803     def get_encoding(self):
3804         encoding = self.params.get('encoding')
3805         if encoding is None:
3806             encoding = preferredencoding()
3807         return encoding
3808
3809     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3810         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3811         if overwrite is None:
3812             overwrite = self.params.get('overwrites', True)
3813         if not self.params.get('writeinfojson'):
3814             return False
3815         elif not infofn:
3816             self.write_debug(f'Skipping writing {label} infojson')
3817             return False
3818         elif not self._ensure_dir_exists(infofn):
3819             return None
3820         elif not overwrite and os.path.exists(infofn):
3821             self.to_screen(f'[info] {label.title()} metadata is already present')
3822             return 'exists'
3823
3824         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3825         try:
3826             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3827             return True
3828         except OSError:
3829             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3830             return None
3831
3832     def _write_description(self, label, ie_result, descfn):
3833         ''' Write description and returns True = written, False = skip, None = error '''
3834         if not self.params.get('writedescription'):
3835             return False
3836         elif not descfn:
3837             self.write_debug(f'Skipping writing {label} description')
3838             return False
3839         elif not self._ensure_dir_exists(descfn):
3840             return None
3841         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3842             self.to_screen(f'[info] {label.title()} description is already present')
3843         elif ie_result.get('description') is None:
3844             self.report_warning(f'There\'s no {label} description to write')
3845             return False
3846         else:
3847             try:
3848                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3849                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3850                     descfile.write(ie_result['description'])
3851             except OSError:
3852                 self.report_error(f'Cannot write {label} description file {descfn}')
3853                 return None
3854         return True
3855
3856     def _write_subtitles(self, info_dict, filename):
3857         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3858         ret = []
3859         subtitles = info_dict.get('requested_subtitles')
3860         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3861             # subtitles download errors are already managed as troubles in relevant IE
3862             # that way it will silently go on when used with unsupporting IE
3863             return ret
3864
3865         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3866         if not sub_filename_base:
3867             self.to_screen('[info] Skipping writing video subtitles')
3868             return ret
3869         for sub_lang, sub_info in subtitles.items():
3870             sub_format = sub_info['ext']
3871             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3872             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3873             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3874             if existing_sub:
3875                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3876                 sub_info['filepath'] = existing_sub
3877                 ret.append((existing_sub, sub_filename_final))
3878                 continue
3879
3880             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3881             if sub_info.get('data') is not None:
3882                 try:
3883                     # Use newline='' to prevent conversion of newline characters
3884                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3885                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3886                         subfile.write(sub_info['data'])
3887                     sub_info['filepath'] = sub_filename
3888                     ret.append((sub_filename, sub_filename_final))
3889                     continue
3890                 except OSError:
3891                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3892                     return None
3893
3894             try:
3895                 sub_copy = sub_info.copy()
3896                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3897                 self.dl(sub_filename, sub_copy, subtitle=True)
3898                 sub_info['filepath'] = sub_filename
3899                 ret.append((sub_filename, sub_filename_final))
3900             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3901                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3902                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3903                     if not self.params.get('ignoreerrors'):
3904                         self.report_error(msg)
3905                     raise DownloadError(msg)
3906                 self.report_warning(msg)
3907         return ret
3908
3909     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3910         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3911         write_all = self.params.get('write_all_thumbnails', False)
3912         thumbnails, ret = [], []
3913         if write_all or self.params.get('writethumbnail', False):
3914             thumbnails = info_dict.get('thumbnails') or []
3915         multiple = write_all and len(thumbnails) > 1
3916
3917         if thumb_filename_base is None:
3918             thumb_filename_base = filename
3919         if thumbnails and not thumb_filename_base:
3920             self.write_debug(f'Skipping writing {label} thumbnail')
3921             return ret
3922
3923         for idx, t in list(enumerate(thumbnails))[::-1]:
3924             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3925             thumb_display_id = f'{label} thumbnail {t["id"]}'
3926             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3927             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3928
3929             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3930             if existing_thumb:
3931                 self.to_screen('[info] %s is already present' % (
3932                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3933                 t['filepath'] = existing_thumb
3934                 ret.append((existing_thumb, thumb_filename_final))
3935             else:
3936                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3937                 try:
3938                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3939                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3940                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3941                         shutil.copyfileobj(uf, thumbf)
3942                     ret.append((thumb_filename, thumb_filename_final))
3943                     t['filepath'] = thumb_filename
3944                 except network_exceptions as err:
3945                     thumbnails.pop(idx)
3946                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3947             if ret and not write_all:
3948                 break
3949         return ret