yt_dlp/YoutubeDL.py

   1 import collections
   2 import contextlib
   3 import copy
   4 import datetime
   5 import errno
   6 import fileinput
   7 import functools
   8 import http.cookiejar
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import random
  16 import re
  17 import shutil
  18 import string
  19 import subprocess
  20 import sys
  21 import tempfile
  22 import time
  23 import tokenize
  24 import traceback
  25 import unicodedata
  26
  27 from .cache import Cache
  28 from .compat import urllib  # isort: split
  29 from .compat import compat_os_name, compat_shlex_quote
  30 from .cookies import LenientSimpleCookie, load_cookies
  31 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  32 from .downloader.rtmp import rtmpdump_version
  33 from .extractor import gen_extractor_classes, get_info_extractor
  34 from .extractor.common import UnsupportedURLIE
  35 from .extractor.openload import PhantomJSwrapper
  36 from .minicurses import format_text
  37 from .plugins import directories as plugin_directories
  38 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
  39 from .postprocessor import (
  40     EmbedThumbnailPP,
  41     FFmpegFixupDuplicateMoovPP,
  42     FFmpegFixupDurationPP,
  43     FFmpegFixupM3u8PP,
  44     FFmpegFixupM4aPP,
  45     FFmpegFixupStretchedPP,
  46     FFmpegFixupTimestampPP,
  47     FFmpegMergerPP,
  48     FFmpegPostProcessor,
  49     FFmpegVideoConvertorPP,
  50     MoveFilesAfterDownloadPP,
  51     get_postprocessor,
  52 )
  53 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
  54 from .update import REPOSITORY, current_git_head, detect_variant
  55 from .utils import (
  56     DEFAULT_OUTTMPL,
  57     IDENTITY,
  58     LINK_TEMPLATES,
  59     MEDIA_EXTENSIONS,
  60     NO_DEFAULT,
  61     NUMBER_RE,
  62     OUTTMPL_TYPES,
  63     POSTPROCESS_WHEN,
  64     STR_FORMAT_RE_TMPL,
  65     STR_FORMAT_TYPES,
  66     ContentTooShortError,
  67     DateRange,
  68     DownloadCancelled,
  69     DownloadError,
  70     EntryNotInPlaylist,
  71     ExistingVideoReached,
  72     ExtractorError,
  73     FormatSorter,
  74     GeoRestrictedError,
  75     HEADRequest,
  76     ISO3166Utils,
  77     LazyList,
  78     MaxDownloadsReached,
  79     Namespace,
  80     PagedList,
  81     PerRequestProxyHandler,
  82     PlaylistEntries,
  83     Popen,
  84     PostProcessingError,
  85     ReExtractInfo,
  86     RejectedVideoReached,
  87     SameFileError,
  88     UnavailableVideoError,
  89     UserNotLive,
  90     YoutubeDLCookieProcessor,
  91     YoutubeDLHandler,
  92     YoutubeDLRedirectHandler,
  93     age_restricted,
  94     args_to_str,
  95     bug_reports_message,
  96     date_from_str,
  97     deprecation_warning,
  98     determine_ext,
  99     determine_protocol,
 100     encode_compat_str,
 101     encodeFilename,
 102     error_to_compat_str,
 103     escapeHTML,
 104     expand_path,
 105     filter_dict,
 106     float_or_none,
 107     format_bytes,
 108     format_decimal_suffix,
 109     format_field,
 110     formatSeconds,
 111     get_compatible_ext,
 112     get_domain,
 113     int_or_none,
 114     iri_to_uri,
 115     is_path_like,
 116     join_nonempty,
 117     locked_file,
 118     make_archive_id,
 119     make_dir,
 120     make_HTTPS_handler,
 121     merge_headers,
 122     network_exceptions,
 123     number_of_digits,
 124     orderedSet,
 125     orderedSet_from_options,
 126     parse_filesize,
 127     preferredencoding,
 128     prepend_extension,
 129     remove_terminal_sequences,
 130     render_table,
 131     replace_extension,
 132     sanitize_filename,
 133     sanitize_path,
 134     sanitize_url,
 135     sanitized_Request,
 136     std_headers,
 137     str_or_none,
 138     strftime_or_none,
 139     subtitles_filename,
 140     supports_terminal_sequences,
 141     system_identifier,
 142     timetuple_from_msec,
 143     to_high_limit_path,
 144     traverse_obj,
 145     try_call,
 146     try_get,
 147     url_basename,
 148     variadic,
 149     version_tuple,
 150     windows_enable_vt_mode,
 151     write_json_file,
 152     write_string,
 153 )
 154 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
 155
 156 if compat_os_name == 'nt':
 157     import ctypes
 158
 159
 160 class YoutubeDL:
 161     """YoutubeDL class.
 162
 163     YoutubeDL objects are the ones responsible of downloading the
 164     actual video file and writing it to disk if the user has requested
 165     it, among some other tasks. In most cases there should be one per
 166     program. As, given a video URL, the downloader doesn't know how to
 167     extract all the needed information, task that InfoExtractors do, it
 168     has to pass the URL to one of them.
 169
 170     For this, YoutubeDL objects have a method that allows
 171     InfoExtractors to be registered in a given order. When it is passed
 172     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 173     finds that reports being able to handle it. The InfoExtractor extracts
 174     all the information about the video or videos the URL refers to, and
 175     YoutubeDL process the extracted information, possibly using a File
 176     Downloader to download the video.
 177
 178     YoutubeDL objects accept a lot of parameters. In order not to saturate
 179     the object constructor with arguments, it receives a dictionary of
 180     options instead. These options are available through the params
 181     attribute for the InfoExtractors to use. The YoutubeDL also
 182     registers itself as the downloader in charge for the InfoExtractors
 183     that are added to it, so this is a "mutual registration".
 184
 185     Available options:
 186
 187     username:          Username for authentication purposes.
 188     password:          Password for authentication purposes.
 189     videopassword:     Password for accessing a video.
 190     ap_mso:            Adobe Pass multiple-system operator identifier.
 191     ap_username:       Multiple-system operator account username.
 192     ap_password:       Multiple-system operator account password.
 193     usenetrc:          Use netrc for authentication instead.
 194     netrc_location:    Location of the netrc file. Defaults to ~/.netrc.
 195     netrc_cmd:         Use a shell command to get credentials
 196     verbose:           Print additional info to stdout.
 197     quiet:             Do not print messages to stdout.
 198     no_warnings:       Do not print out anything for warnings.
 199     forceprint:        A dict with keys WHEN mapped to a list of templates to
 200                        print to stdout. The allowed keys are video or any of the
 201                        items in utils.POSTPROCESS_WHEN.
 202                        For compatibility, a single list is also accepted
 203     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 204                        a list of tuples with (template, filename)
 205     forcejson:         Force printing info_dict as JSON.
 206     dump_single_json:  Force printing the info_dict of the whole playlist
 207                        (or video) as a single JSON line.
 208     force_write_download_archive: Force writing download archive regardless
 209                        of 'skip_download' or 'simulate'.
 210     simulate:          Do not download the video files. If unset (or None),
 211                        simulate only if listsubtitles, listformats or list_thumbnails is used
 212     format:            Video format code. see "FORMAT SELECTION" for more details.
 213                        You can also pass a function. The function takes 'ctx' as
 214                        argument and returns the formats to download.
 215                        See "build_format_selector" for an implementation
 216     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 217     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 218                        extracting metadata even if the video is not actually
 219                        available for download (experimental)
 220     format_sort:       A list of fields by which to sort the video formats.
 221                        See "Sorting Formats" for more details.
 222     format_sort_force: Force the given format_sort. see "Sorting Formats"
 223                        for more details.
 224     prefer_free_formats: Whether to prefer video formats with free containers
 225                        over non-free ones of same quality.
 226     allow_multiple_video_streams:   Allow multiple video streams to be merged
 227                        into a single file
 228     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 229                        into a single file
 230     check_formats      Whether to test if the formats are downloadable.
 231                        Can be True (check all), False (check none),
 232                        'selected' (check selected formats),
 233                        or None (check only if requested by extractor)
 234     paths:             Dictionary of output paths. The allowed keys are 'home'
 235                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 236     outtmpl:           Dictionary of templates for output names. Allowed keys
 237                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 238                        For compatibility with youtube-dl, a single string can also be used
 239     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 240     restrictfilenames: Do not allow "&" and spaces in file names
 241     trim_file_name:    Limit length of filename (extension excluded)
 242     windowsfilenames:  Force the filenames to be windows compatible
 243     ignoreerrors:      Do not stop on download/postprocessing errors.
 244                        Can be 'only_download' to ignore only download errors.
 245                        Default is 'only_download' for CLI, but False for API
 246     skip_playlist_after_errors: Number of allowed failures until the rest of
 247                        the playlist is skipped
 248     allowed_extractors:  List of regexes to match against extractor names that are allowed
 249     overwrites:        Overwrite all video and metadata files if True,
 250                        overwrite only non-video files if None
 251                        and don't overwrite any file if False
 252                        For compatibility with youtube-dl,
 253                        "nooverwrites" may also be used instead
 254     playlist_items:    Specific indices of playlist to download.
 255     playlistrandom:    Download playlist items in random order.
 256     lazy_playlist:     Process playlist entries as they are received.
 257     matchtitle:        Download only matching titles.
 258     rejecttitle:       Reject downloads for matching titles.
 259     logger:            Log messages to a logging.Logger instance.
 260     logtostderr:       Print everything to stderr instead of stdout.
 261     consoletitle:      Display progress in console window's titlebar.
 262     writedescription:  Write the video description to a .description file
 263     writeinfojson:     Write the video description to a .info.json file
 264     clean_infojson:    Remove internal metadata from the infojson
 265     getcomments:       Extract video comments. This will not be written to disk
 266                        unless writeinfojson is also given
 267     writeannotations:  Write the video annotations to a .annotations.xml file
 268     writethumbnail:    Write the thumbnail image to a file
 269     allow_playlist_files: Whether to write playlists' description, infojson etc
 270                        also to disk when using the 'write*' options
 271     write_all_thumbnails:  Write all thumbnail formats to files
 272     writelink:         Write an internet shortcut file, depending on the
 273                        current platform (.url/.webloc/.desktop)
 274     writeurllink:      Write a Windows internet shortcut file (.url)
 275     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 276     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 277     writesubtitles:    Write the video subtitles to a file
 278     writeautomaticsub: Write the automatically generated subtitles to a file
 279     listsubtitles:     Lists all available subtitles for the video
 280     subtitlesformat:   The format code for subtitles
 281     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 282                        The list may contain "all" to refer to all the available
 283                        subtitles. The language can be prefixed with a "-" to
 284                        exclude it from the requested languages, e.g. ['all', '-live_chat']
 285     keepvideo:         Keep the video file after post-processing
 286     daterange:         A utils.DateRange object, download only if the upload_date is in the range.
 287     skip_download:     Skip the actual download of the video file
 288     cachedir:          Location of the cache files in the filesystem.
 289                        False to disable filesystem cache.
 290     noplaylist:        Download single video instead of a playlist if in doubt.
 291     age_limit:         An integer representing the user's age in years.
 292                        Unsuitable videos for the given age are skipped.
 293     min_views:         An integer representing the minimum view count the video
 294                        must have in order to not be skipped.
 295                        Videos without view count information are always
 296                        downloaded. None for no limit.
 297     max_views:         An integer representing the maximum view count.
 298                        Videos that are more popular than that are not
 299                        downloaded.
 300                        Videos without view count information are always
 301                        downloaded. None for no limit.
 302     download_archive:  A set, or the name of a file where all downloads are recorded.
 303                        Videos already present in the file are not downloaded again.
 304     break_on_existing: Stop the download process after attempting to download a
 305                        file that is in the archive.
 306     break_per_url:     Whether break_on_reject and break_on_existing
 307                        should act on each input URL as opposed to for the entire queue
 308     cookiefile:        File name or text stream from where cookies should be read and dumped to
 309     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 310                        name/path from where cookies are loaded, the name of the keyring,
 311                        and the container name, e.g. ('chrome', ) or
 312                        ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
 313     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 314                        support RFC 5746 secure renegotiation
 315     nocheckcertificate:  Do not verify SSL certificates
 316     client_certificate:  Path to client certificate file in PEM format. May include the private key
 317     client_certificate_key:  Path to private key file for client certificate
 318     client_certificate_password:  Password for client certificate private key, if encrypted.
 319                         If not provided and the key is encrypted, yt-dlp will ask interactively
 320     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 321                        (Only supported by some extractors)
 322     enable_file_urls:  Enable file:// URLs. This is disabled by default for security reasons.
 323     http_headers:      A dictionary of custom headers to be used for all requests
 324     proxy:             URL of the proxy server to use
 325     geo_verification_proxy:  URL of the proxy to use for IP address verification
 326                        on geo-restricted sites.
 327     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 328     bidi_workaround:   Work around buggy terminals without bidirectional text
 329                        support, using fridibi
 330     debug_printtraffic:Print out sent and received HTTP traffic
 331     default_search:    Prepend this string if an input url is not valid.
 332                        'auto' for elaborate guessing
 333     encoding:          Use this encoding instead of the system-specified.
 334     extract_flat:      Whether to resolve and process url_results further
 335                        * False:     Always process. Default for API
 336                        * True:      Never process
 337                        * 'in_playlist': Do not process inside playlist/multi_video
 338                        * 'discard': Always process, but don't return the result
 339                                     from inside playlist/multi_video
 340                        * 'discard_in_playlist': Same as "discard", but only for
 341                                     playlists (not multi_video). Default for CLI
 342     wait_for_video:    If given, wait for scheduled streams to become available.
 343                        The value should be a tuple containing the range
 344                        (min_secs, max_secs) to wait between retries
 345     postprocessors:    A list of dictionaries, each with an entry
 346                        * key:  The name of the postprocessor. See
 347                                yt_dlp/postprocessor/__init__.py for a list.
 348                        * when: When to run the postprocessor. Allowed values are
 349                                the entries of utils.POSTPROCESS_WHEN
 350                                Assumed to be 'post_process' if not given
 351     progress_hooks:    A list of functions that get called on download
 352                        progress, with a dictionary with the entries
 353                        * status: One of "downloading", "error", or "finished".
 354                                  Check this first and ignore unknown values.
 355                        * info_dict: The extracted info_dict
 356
 357                        If status is one of "downloading", or "finished", the
 358                        following properties may also be present:
 359                        * filename: The final filename (always present)
 360                        * tmpfilename: The filename we're currently writing to
 361                        * downloaded_bytes: Bytes on disk
 362                        * total_bytes: Size of the whole file, None if unknown
 363                        * total_bytes_estimate: Guess of the eventual file size,
 364                                                None if unavailable.
 365                        * elapsed: The number of seconds since download started.
 366                        * eta: The estimated time in seconds, None if unknown
 367                        * speed: The download speed in bytes/second, None if
 368                                 unknown
 369                        * fragment_index: The counter of the currently
 370                                          downloaded video fragment.
 371                        * fragment_count: The number of fragments (= individual
 372                                          files that will be merged)
 373
 374                        Progress hooks are guaranteed to be called at least once
 375                        (with status "finished") if the download is successful.
 376     postprocessor_hooks:  A list of functions that get called on postprocessing
 377                        progress, with a dictionary with the entries
 378                        * status: One of "started", "processing", or "finished".
 379                                  Check this first and ignore unknown values.
 380                        * postprocessor: Name of the postprocessor
 381                        * info_dict: The extracted info_dict
 382
 383                        Progress hooks are guaranteed to be called at least twice
 384                        (with status "started" and "finished") if the processing is successful.
 385     merge_output_format: "/" separated list of extensions to use when merging formats.
 386     final_ext:         Expected final extension; used to detect when the file was
 387                        already downloaded and converted
 388     fixup:             Automatically correct known faults of the file.
 389                        One of:
 390                        - "never": do nothing
 391                        - "warn": only emit a warning
 392                        - "detect_or_warn": check whether we can do anything
 393                                            about it, warn otherwise (default)
 394     source_address:    Client-side IP address to bind to.
 395     sleep_interval_requests: Number of seconds to sleep between requests
 396                        during extraction
 397     sleep_interval:    Number of seconds to sleep before each download when
 398                        used alone or a lower bound of a range for randomized
 399                        sleep before each download (minimum possible number
 400                        of seconds to sleep) when used along with
 401                        max_sleep_interval.
 402     max_sleep_interval:Upper bound of a range for randomized sleep before each
 403                        download (maximum possible number of seconds to sleep).
 404                        Must only be used along with sleep_interval.
 405                        Actual sleep time will be a random float from range
 406                        [sleep_interval; max_sleep_interval].
 407     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 408     listformats:       Print an overview of available video formats and exit.
 409     list_thumbnails:   Print a table of all thumbnails and exit.
 410     match_filter:      A function that gets called for every video with the signature
 411                        (info_dict, *, incomplete: bool) -> Optional[str]
 412                        For backward compatibility with youtube-dl, the signature
 413                        (info_dict) -> Optional[str] is also allowed.
 414                        - If it returns a message, the video is ignored.
 415                        - If it returns None, the video is downloaded.
 416                        - If it returns utils.NO_DEFAULT, the user is interactively
 417                          asked whether to download the video.
 418                        - Raise utils.DownloadCancelled(msg) to abort remaining
 419                          downloads when a video is rejected.
 420                        match_filter_func in utils.py is one example for this.
 421     color:             A Dictionary with output stream names as keys
 422                        and their respective color policy as values.
 423                        Can also just be a single color policy,
 424                        in which case it applies to all outputs.
 425                        Valid stream names are 'stdout' and 'stderr'.
 426                        Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
 427     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 428                        HTTP header
 429     geo_bypass_country:
 430                        Two-letter ISO 3166-2 country code that will be used for
 431                        explicit geographic restriction bypassing via faking
 432                        X-Forwarded-For HTTP header
 433     geo_bypass_ip_block:
 434                        IP range in CIDR notation that will be used similarly to
 435                        geo_bypass_country
 436     external_downloader: A dictionary of protocol keys and the executable of the
 437                        external downloader to use for it. The allowed protocols
 438                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 439                        Set the value to 'native' to use the native downloader
 440     compat_opts:       Compatibility options. See "Differences in default behavior".
 441                        The following options do not work when used through the API:
 442                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 443                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 444                        Refer __init__.py for their implementation
 445     progress_template: Dictionary of templates for progress outputs.
 446                        Allowed keys are 'download', 'postprocess',
 447                        'download-title' (console title) and 'postprocess-title'.
 448                        The template is mapped on a dictionary with keys 'progress' and 'info'
 449     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 450                        as argument and returns the time to sleep in seconds.
 451                        Allowed keys are 'http', 'fragment', 'file_access'
 452     download_ranges:   A callback function that gets called for every video with
 453                        the signature (info_dict, ydl) -> Iterable[Section].
 454                        Only the returned sections will be downloaded.
 455                        Each Section is a dict with the following keys:
 456                        * start_time: Start time of the section in seconds
 457                        * end_time: End time of the section in seconds
 458                        * title: Section title (Optional)
 459                        * index: Section number (Optional)
 460     force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
 461     noprogress:        Do not print the progress bar
 462     live_from_start:   Whether to download livestreams videos from the start
 463
 464     The following parameters are not used by YoutubeDL itself, they are used by
 465     the downloader (see yt_dlp/downloader/common.py):
 466     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 467     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 468     continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 469     external_downloader_args, concurrent_fragment_downloads.
 470
 471     The following options are used by the post processors:
 472     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 473                        to the binary or its containing directory.
 474     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 475                        and a list of additional command-line arguments for the
 476                        postprocessor/executable. The dict can also have "PP+EXE" keys
 477                        which are used when the given exe is used by the given PP.
 478                        Use 'default' as the name for arguments to passed to all PP
 479                        For compatibility with youtube-dl, a single list of args
 480                        can also be used
 481
 482     The following options are used by the extractors:
 483     extractor_retries: Number of times to retry for known errors (default: 3)
 484     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 485     hls_split_discontinuity: Split HLS playlists to different formats at
 486                        discontinuities such as ad breaks (default: False)
 487     extractor_args:    A dictionary of arguments to be passed to the extractors.
 488                        See "EXTRACTOR ARGUMENTS" for details.
 489                        E.g. {'youtube': {'skip': ['dash', 'hls']}}
 490     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 491
 492     The following options are deprecated and may be removed in the future:
 493
 494     break_on_reject:   Stop the download process when encountering a video that
 495                        has been filtered out.
 496                        - `raise DownloadCancelled(msg)` in match_filter instead
 497     force_generic_extractor: Force downloader to use the generic extractor
 498                        - Use allowed_extractors = ['generic', 'default']
 499     playliststart:     - Use playlist_items
 500                        Playlist item to start at.
 501     playlistend:       - Use playlist_items
 502                        Playlist item to end at.
 503     playlistreverse:   - Use playlist_items
 504                        Download playlist items in reverse order.
 505     forceurl:          - Use forceprint
 506                        Force printing final URL.
 507     forcetitle:        - Use forceprint
 508                        Force printing title.
 509     forceid:           - Use forceprint
 510                        Force printing ID.
 511     forcethumbnail:    - Use forceprint
 512                        Force printing thumbnail URL.
 513     forcedescription:  - Use forceprint
 514                        Force printing description.
 515     forcefilename:     - Use forceprint
 516                        Force printing final filename.
 517     forceduration:     - Use forceprint
 518                        Force printing duration.
 519     allsubtitles:      - Use subtitleslangs = ['all']
 520                        Downloads all the subtitles of the video
 521                        (requires writesubtitles or writeautomaticsub)
 522     include_ads:       - Doesn't work
 523                        Download ads as well
 524     call_home:         - Not implemented
 525                        Boolean, true iff we are allowed to contact the
 526                        yt-dlp servers for debugging.
 527     post_hooks:        - Register a custom postprocessor
 528                        A list of functions that get called as the final step
 529                        for each video file, after all postprocessors have been
 530                        called. The filename will be passed as the only argument.
 531     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 532                        Use the native HLS downloader instead of ffmpeg/avconv
 533                        if True, otherwise use ffmpeg/avconv if False, otherwise
 534                        use downloader suggested by extractor if None.
 535     prefer_ffmpeg:     - avconv support is deprecated
 536                        If False, use avconv instead of ffmpeg if both are available,
 537                        otherwise prefer ffmpeg.
 538     youtube_include_dash_manifest: - Use extractor_args
 539                        If True (default), DASH manifests and related
 540                        data will be downloaded and processed by extractor.
 541                        You can reduce network I/O by disabling it if you don't
 542                        care about DASH. (only for youtube)
 543     youtube_include_hls_manifest: - Use extractor_args
 544                        If True (default), HLS manifests and related
 545                        data will be downloaded and processed by extractor.
 546                        You can reduce network I/O by disabling it if you don't
 547                        care about HLS. (only for youtube)
 548     no_color:          Same as `color='no_color'`
 549     """
 550
 551     _NUMERIC_FIELDS = {
 552         'width', 'height', 'asr', 'audio_channels', 'fps',
 553         'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
 554         'timestamp', 'release_timestamp',
 555         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 556         'average_rating', 'comment_count', 'age_limit',
 557         'start_time', 'end_time',
 558         'chapter_number', 'season_number', 'episode_number',
 559         'track_number', 'disc_number', 'release_year',
 560     }
 561
 562     _format_fields = {
 563         # NB: Keep in sync with the docstring of extractor/common.py
 564         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 565         'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
 566         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
 567         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 568         'preference', 'language', 'language_preference', 'quality', 'source_preference',
 569         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
 570         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 571     }
 572     _format_selection_exts = {
 573         'audio': set(MEDIA_EXTENSIONS.common_audio),
 574         'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
 575         'storyboards': set(MEDIA_EXTENSIONS.storyboards),
 576     }
 577
 578     def __init__(self, params=None, auto_init=True):
 579         """Create a FileDownloader object with the given options.
 580         @param auto_init    Whether to load the default extractors and print header (if verbose).
 581                             Set to 'no_verbose_header' to not print the header
 582         """
 583         if params is None:
 584             params = {}
 585         self.params = params
 586         self._ies = {}
 587         self._ies_instances = {}
 588         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 589         self._printed_messages = set()
 590         self._first_webpage_request = True
 591         self._post_hooks = []
 592         self._progress_hooks = []
 593         self._postprocessor_hooks = []
 594         self._download_retcode = 0
 595         self._num_downloads = 0
 596         self._num_videos = 0
 597         self._playlist_level = 0
 598         self._playlist_urls = set()
 599         self.cache = Cache(self)
 600
 601         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 602         self._out_files = Namespace(
 603             out=stdout,
 604             error=sys.stderr,
 605             screen=sys.stderr if self.params.get('quiet') else stdout,
 606             console=None if compat_os_name == 'nt' else next(
 607                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 608         )
 609
 610         try:
 611             windows_enable_vt_mode()
 612         except Exception as e:
 613             self.write_debug(f'Failed to enable VT mode: {e}')
 614
 615         if self.params.get('no_color'):
 616             if self.params.get('color') is not None:
 617                 self.report_warning('Overwriting params from "color" with "no_color"')
 618             self.params['color'] = 'no_color'
 619
 620         term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
 621
 622         def process_color_policy(stream):
 623             stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
 624             policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
 625             if policy in ('auto', None):
 626                 return term_allow_color and supports_terminal_sequences(stream)
 627             assert policy in ('always', 'never', 'no_color')
 628             return {'always': True, 'never': False}.get(policy, policy)
 629
 630         self._allow_colors = Namespace(**{
 631             name: process_color_policy(stream)
 632             for name, stream in self._out_files.items_ if name != 'console'
 633         })
 634
 635         # The code is left like this to be reused for future deprecations
 636         MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
 637         current_version = sys.version_info[:2]
 638         if current_version < MIN_RECOMMENDED:
 639             msg = ('Support for Python version %d.%d has been deprecated. '
 640                    'See  https://github.com/yt-dlp/yt-dlp/issues/3764  for more details.'
 641                    '\n                    You will no longer receive updates on this version')
 642             if current_version < MIN_SUPPORTED:
 643                 msg = 'Python version %d.%d is no longer supported'
 644             self.deprecated_feature(
 645                 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
 646
 647         if self.params.get('allow_unplayable_formats'):
 648             self.report_warning(
 649                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 650                 'This is a developer option intended for debugging. \n'
 651                 '         If you experience any issues while using this option, '
 652                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 653
 654         if self.params.get('bidi_workaround', False):
 655             try:
 656                 import pty
 657                 master, slave = pty.openpty()
 658                 width = shutil.get_terminal_size().columns
 659                 width_args = [] if width is None else ['-w', str(width)]
 660                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 661                 try:
 662                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 663                 except OSError:
 664                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 665                 self._output_channel = os.fdopen(master, 'rb')
 666             except OSError as ose:
 667                 if ose.errno == errno.ENOENT:
 668                     self.report_warning(
 669                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 670                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 671                 else:
 672                     raise
 673
 674         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 675         if auto_init and auto_init != 'no_verbose_header':
 676             self.print_debug_header()
 677
 678         self.__header_cookies = []
 679         self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False))  # compat
 680
 681         def check_deprecated(param, option, suggestion):
 682             if self.params.get(param) is not None:
 683                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 684                 return True
 685             return False
 686
 687         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 688             if self.params.get('geo_verification_proxy') is None:
 689                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 690
 691         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 692         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 693         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 694
 695         for msg in self.params.get('_warnings', []):
 696             self.report_warning(msg)
 697         for msg in self.params.get('_deprecation_warnings', []):
 698             self.deprecated_feature(msg)
 699
 700         if 'list-formats' in self.params['compat_opts']:
 701             self.params['listformats_table'] = False
 702
 703         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 704             # nooverwrites was unnecessarily changed to overwrites
 705             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 706             # This ensures compatibility with both keys
 707             self.params['overwrites'] = not self.params['nooverwrites']
 708         elif self.params.get('overwrites') is None:
 709             self.params.pop('overwrites', None)
 710         else:
 711             self.params['nooverwrites'] = not self.params['overwrites']
 712
 713         if self.params.get('simulate') is None and any((
 714             self.params.get('list_thumbnails'),
 715             self.params.get('listformats'),
 716             self.params.get('listsubtitles'),
 717         )):
 718             self.params['simulate'] = 'list_only'
 719
 720         self.params.setdefault('forceprint', {})
 721         self.params.setdefault('print_to_file', {})
 722
 723         # Compatibility with older syntax
 724         if not isinstance(params['forceprint'], dict):
 725             self.params['forceprint'] = {'video': params['forceprint']}
 726
 727         if auto_init:
 728             self.add_default_info_extractors()
 729
 730         if (sys.platform != 'win32'
 731                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 732                 and not self.params.get('restrictfilenames', False)):
 733             # Unicode filesystem API will throw errors (#1474, #13027)
 734             self.report_warning(
 735                 'Assuming --restrict-filenames since file system encoding '
 736                 'cannot encode all characters. '
 737                 'Set the LC_ALL environment variable to fix this.')
 738             self.params['restrictfilenames'] = True
 739
 740         self._parse_outtmpl()
 741
 742         # Creating format selector here allows us to catch syntax errors before the extraction
 743         self.format_selector = (
 744             self.params.get('format') if self.params.get('format') in (None, '-')
 745             else self.params['format'] if callable(self.params['format'])
 746             else self.build_format_selector(self.params['format']))
 747
 748         # Set http_headers defaults according to std_headers
 749         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 750
 751         hooks = {
 752             'post_hooks': self.add_post_hook,
 753             'progress_hooks': self.add_progress_hook,
 754             'postprocessor_hooks': self.add_postprocessor_hook,
 755         }
 756         for opt, fn in hooks.items():
 757             for ph in self.params.get(opt, []):
 758                 fn(ph)
 759
 760         for pp_def_raw in self.params.get('postprocessors', []):
 761             pp_def = dict(pp_def_raw)
 762             when = pp_def.pop('when', 'post_process')
 763             self.add_post_processor(
 764                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 765                 when=when)
 766
 767         self._setup_opener()
 768
 769         def preload_download_archive(fn):
 770             """Preload the archive, if any is specified"""
 771             archive = set()
 772             if fn is None:
 773                 return archive
 774             elif not is_path_like(fn):
 775                 return fn
 776
 777             self.write_debug(f'Loading archive file {fn!r}')
 778             try:
 779                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 780                     for line in archive_file:
 781                         archive.add(line.strip())
 782             except OSError as ioe:
 783                 if ioe.errno != errno.ENOENT:
 784                     raise
 785             return archive
 786
 787         self.archive = preload_download_archive(self.params.get('download_archive'))
 788
 789     def warn_if_short_id(self, argv):
 790         # short YouTube ID starting with dash?
 791         idxs = [
 792             i for i, a in enumerate(argv)
 793             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 794         if idxs:
 795             correct_argv = (
 796                 ['yt-dlp']
 797                 + [a for i, a in enumerate(argv) if i not in idxs]
 798                 + ['--'] + [argv[i] for i in idxs]
 799             )
 800             self.report_warning(
 801                 'Long argument string detected. '
 802                 'Use -- to separate parameters and URLs, like this:\n%s' %
 803                 args_to_str(correct_argv))
 804
 805     def add_info_extractor(self, ie):
 806         """Add an InfoExtractor object to the end of the list."""
 807         ie_key = ie.ie_key()
 808         self._ies[ie_key] = ie
 809         if not isinstance(ie, type):
 810             self._ies_instances[ie_key] = ie
 811             ie.set_downloader(self)
 812
 813     def get_info_extractor(self, ie_key):
 814         """
 815         Get an instance of an IE with name ie_key, it will try to get one from
 816         the _ies list, if there's no instance it will create a new one and add
 817         it to the extractor list.
 818         """
 819         ie = self._ies_instances.get(ie_key)
 820         if ie is None:
 821             ie = get_info_extractor(ie_key)()
 822             self.add_info_extractor(ie)
 823         return ie
 824
 825     def add_default_info_extractors(self):
 826         """
 827         Add the InfoExtractors returned by gen_extractors to the end of the list
 828         """
 829         all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
 830         all_ies['end'] = UnsupportedURLIE()
 831         try:
 832             ie_names = orderedSet_from_options(
 833                 self.params.get('allowed_extractors', ['default']), {
 834                     'all': list(all_ies),
 835                     'default': [name for name, ie in all_ies.items() if ie._ENABLED],
 836                 }, use_regex=True)
 837         except re.error as e:
 838             raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
 839         for name in ie_names:
 840             self.add_info_extractor(all_ies[name])
 841         self.write_debug(f'Loaded {len(ie_names)} extractors')
 842
 843     def add_post_processor(self, pp, when='post_process'):
 844         """Add a PostProcessor object to the end of the chain."""
 845         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 846         self._pps[when].append(pp)
 847         pp.set_downloader(self)
 848
 849     def add_post_hook(self, ph):
 850         """Add the post hook"""
 851         self._post_hooks.append(ph)
 852
 853     def add_progress_hook(self, ph):
 854         """Add the download progress hook"""
 855         self._progress_hooks.append(ph)
 856
 857     def add_postprocessor_hook(self, ph):
 858         """Add the postprocessing progress hook"""
 859         self._postprocessor_hooks.append(ph)
 860         for pps in self._pps.values():
 861             for pp in pps:
 862                 pp.add_progress_hook(ph)
 863
 864     def _bidi_workaround(self, message):
 865         if not hasattr(self, '_output_channel'):
 866             return message
 867
 868         assert hasattr(self, '_output_process')
 869         assert isinstance(message, str)
 870         line_count = message.count('\n') + 1
 871         self._output_process.stdin.write((message + '\n').encode())
 872         self._output_process.stdin.flush()
 873         res = ''.join(self._output_channel.readline().decode()
 874                       for _ in range(line_count))
 875         return res[:-len('\n')]
 876
 877     def _write_string(self, message, out=None, only_once=False):
 878         if only_once:
 879             if message in self._printed_messages:
 880                 return
 881             self._printed_messages.add(message)
 882         write_string(message, out=out, encoding=self.params.get('encoding'))
 883
 884     def to_stdout(self, message, skip_eol=False, quiet=None):
 885         """Print message to stdout"""
 886         if quiet is not None:
 887             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
 888                                      'Use "YoutubeDL.to_screen" instead')
 889         if skip_eol is not False:
 890             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
 891                                      'Use "YoutubeDL.to_screen" instead')
 892         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 893
 894     def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
 895         """Print message to screen if not in quiet mode"""
 896         if self.params.get('logger'):
 897             self.params['logger'].debug(message)
 898             return
 899         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 900             return
 901         self._write_string(
 902             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 903             self._out_files.screen, only_once=only_once)
 904
 905     def to_stderr(self, message, only_once=False):
 906         """Print message to stderr"""
 907         assert isinstance(message, str)
 908         if self.params.get('logger'):
 909             self.params['logger'].error(message)
 910         else:
 911             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 912
 913     def _send_console_code(self, code):
 914         if compat_os_name == 'nt' or not self._out_files.console:
 915             return
 916         self._write_string(code, self._out_files.console)
 917
 918     def to_console_title(self, message):
 919         if not self.params.get('consoletitle', False):
 920             return
 921         message = remove_terminal_sequences(message)
 922         if compat_os_name == 'nt':
 923             if ctypes.windll.kernel32.GetConsoleWindow():
 924                 # c_wchar_p() might not be necessary if `message` is
 925                 # already of type unicode()
 926                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 927         else:
 928             self._send_console_code(f'\033]0;{message}\007')
 929
 930     def save_console_title(self):
 931         if not self.params.get('consoletitle') or self.params.get('simulate'):
 932             return
 933         self._send_console_code('\033[22;0t')  # Save the title on stack
 934
 935     def restore_console_title(self):
 936         if not self.params.get('consoletitle') or self.params.get('simulate'):
 937             return
 938         self._send_console_code('\033[23;0t')  # Restore the title from stack
 939
 940     def __enter__(self):
 941         self.save_console_title()
 942         return self
 943
 944     def __exit__(self, *args):
 945         self.restore_console_title()
 946
 947         if self.params.get('cookiefile') is not None:
 948             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 949
 950     def trouble(self, message=None, tb=None, is_error=True):
 951         """Determine action to take when a download problem appears.
 952
 953         Depending on if the downloader has been configured to ignore
 954         download errors or not, this method may throw an exception or
 955         not when errors are found, after printing the message.
 956
 957         @param tb          If given, is additional traceback information
 958         @param is_error    Whether to raise error according to ignorerrors
 959         """
 960         if message is not None:
 961             self.to_stderr(message)
 962         if self.params.get('verbose'):
 963             if tb is None:
 964                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 965                     tb = ''
 966                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 967                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 968                     tb += encode_compat_str(traceback.format_exc())
 969                 else:
 970                     tb_data = traceback.format_list(traceback.extract_stack())
 971                     tb = ''.join(tb_data)
 972             if tb:
 973                 self.to_stderr(tb)
 974         if not is_error:
 975             return
 976         if not self.params.get('ignoreerrors'):
 977             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 978                 exc_info = sys.exc_info()[1].exc_info
 979             else:
 980                 exc_info = sys.exc_info()
 981             raise DownloadError(message, exc_info)
 982         self._download_retcode = 1
 983
 984     Styles = Namespace(
 985         HEADERS='yellow',
 986         EMPHASIS='light blue',
 987         FILENAME='green',
 988         ID='green',
 989         DELIM='blue',
 990         ERROR='red',
 991         BAD_FORMAT='light red',
 992         WARNING='yellow',
 993         SUPPRESS='light black',
 994     )
 995
 996     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 997         text = str(text)
 998         if test_encoding:
 999             original_text = text
1000             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1001             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1002             text = text.encode(encoding, 'ignore').decode(encoding)
1003             if fallback is not None and text != original_text:
1004                 text = fallback
1005         return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1006
1007     def _format_out(self, *args, **kwargs):
1008         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1009
1010     def _format_screen(self, *args, **kwargs):
1011         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1012
1013     def _format_err(self, *args, **kwargs):
1014         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1015
1016     def report_warning(self, message, only_once=False):
1017         '''
1018         Print the message to stderr, it will be prefixed with 'WARNING:'
1019         If stderr is a tty file the 'WARNING:' will be colored
1020         '''
1021         if self.params.get('logger') is not None:
1022             self.params['logger'].warning(message)
1023         else:
1024             if self.params.get('no_warnings'):
1025                 return
1026             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1027
1028     def deprecation_warning(self, message, *, stacklevel=0):
1029         deprecation_warning(
1030             message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1031
1032     def deprecated_feature(self, message):
1033         if self.params.get('logger') is not None:
1034             self.params['logger'].warning(f'Deprecated Feature: {message}')
1035         self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1036
1037     def report_error(self, message, *args, **kwargs):
1038         '''
1039         Do the same as trouble, but prefixes the message with 'ERROR:', colored
1040         in red if stderr is a tty file.
1041         '''
1042         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1043
1044     def write_debug(self, message, only_once=False):
1045         '''Log debug message or Print message to stderr'''
1046         if not self.params.get('verbose', False):
1047             return
1048         message = f'[debug] {message}'
1049         if self.params.get('logger'):
1050             self.params['logger'].debug(message)
1051         else:
1052             self.to_stderr(message, only_once)
1053
1054     def report_file_already_downloaded(self, file_name):
1055         """Report file has already been fully downloaded."""
1056         try:
1057             self.to_screen('[download] %s has already been downloaded' % file_name)
1058         except UnicodeEncodeError:
1059             self.to_screen('[download] The file has already been downloaded')
1060
1061     def report_file_delete(self, file_name):
1062         """Report that existing file will be deleted."""
1063         try:
1064             self.to_screen('Deleting existing file %s' % file_name)
1065         except UnicodeEncodeError:
1066             self.to_screen('Deleting existing file')
1067
1068     def raise_no_formats(self, info, forced=False, *, msg=None):
1069         has_drm = info.get('_has_drm')
1070         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1071         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1072         if forced or not ignored:
1073             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1074                                  expected=has_drm or ignored or expected)
1075         else:
1076             self.report_warning(msg)
1077
1078     def parse_outtmpl(self):
1079         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1080         self._parse_outtmpl()
1081         return self.params['outtmpl']
1082
1083     def _parse_outtmpl(self):
1084         sanitize = IDENTITY
1085         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1086             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1087
1088         outtmpl = self.params.setdefault('outtmpl', {})
1089         if not isinstance(outtmpl, dict):
1090             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1091         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1092
1093     def get_output_path(self, dir_type='', filename=None):
1094         paths = self.params.get('paths', {})
1095         assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1096         path = os.path.join(
1097             expand_path(paths.get('home', '').strip()),
1098             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1099             filename or '')
1100         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1101
1102     @staticmethod
1103     def _outtmpl_expandpath(outtmpl):
1104         # expand_path translates '%%' into '%' and '$$' into '$'
1105         # correspondingly that is not what we want since we need to keep
1106         # '%%' intact for template dict substitution step. Working around
1107         # with boundary-alike separator hack.
1108         sep = ''.join(random.choices(string.ascii_letters, k=32))
1109         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1110
1111         # outtmpl should be expand_path'ed before template dict substitution
1112         # because meta fields may contain env variables we don't want to
1113         # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1114         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1115         return expand_path(outtmpl).replace(sep, '')
1116
1117     @staticmethod
1118     def escape_outtmpl(outtmpl):
1119         ''' Escape any remaining strings like %s, %abc% etc. '''
1120         return re.sub(
1121             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1122             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1123             outtmpl)
1124
1125     @classmethod
1126     def validate_outtmpl(cls, outtmpl):
1127         ''' @return None or Exception object '''
1128         outtmpl = re.sub(
1129             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1130             lambda mobj: f'{mobj.group(0)[:-1]}s',
1131             cls._outtmpl_expandpath(outtmpl))
1132         try:
1133             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1134             return None
1135         except ValueError as err:
1136             return err
1137
1138     @staticmethod
1139     def _copy_infodict(info_dict):
1140         info_dict = dict(info_dict)
1141         info_dict.pop('__postprocessors', None)
1142         info_dict.pop('__pending_error', None)
1143         return info_dict
1144
1145     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1146         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1147         @param sanitize    Whether to sanitize the output as a filename.
1148                            For backward compatibility, a function can also be passed
1149         """
1150
1151         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1152
1153         info_dict = self._copy_infodict(info_dict)
1154         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1155             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1156             if info_dict.get('duration', None) is not None
1157             else None)
1158         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1159         info_dict['video_autonumber'] = self._num_videos
1160         if info_dict.get('resolution') is None:
1161             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1162
1163         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1164         # of %(field)s to %(field)0Nd for backward compatibility
1165         field_size_compat_map = {
1166             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1167             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1168             'autonumber': self.params.get('autonumber_size') or 5,
1169         }
1170
1171         TMPL_DICT = {}
1172         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1173         MATH_FUNCTIONS = {
1174             '+': float.__add__,
1175             '-': float.__sub__,
1176         }
1177         # Field is of the form key1.key2...
1178         # where keys (except first) can be string, int, slice or "{field, ...}"
1179         FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1180         FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1181             'inner': FIELD_INNER_RE,
1182             'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1183         }
1184         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1185         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1186         INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1187             (?P<negate>-)?
1188             (?P<fields>{FIELD_RE})
1189             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1190             (?:>(?P<strf_format>.+?))?
1191             (?P<remaining>
1192                 (?P<alternate>(?<!\\),[^|&)]+)?
1193                 (?:&(?P<replacement>.*?))?
1194                 (?:\|(?P<default>.*?))?
1195             )$''')
1196
1197         def _traverse_infodict(fields):
1198             fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1199                       for f in ([x] if x.startswith('{') else x.split('.'))]
1200             for i in (0, -1):
1201                 if fields and not fields[i]:
1202                     fields.pop(i)
1203
1204             for i, f in enumerate(fields):
1205                 if not f.startswith('{'):
1206                     continue
1207                 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1208                 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1209
1210             return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1211
1212         def get_value(mdict):
1213             # Object traversal
1214             value = _traverse_infodict(mdict['fields'])
1215             # Negative
1216             if mdict['negate']:
1217                 value = float_or_none(value)
1218                 if value is not None:
1219                     value *= -1
1220             # Do maths
1221             offset_key = mdict['maths']
1222             if offset_key:
1223                 value = float_or_none(value)
1224                 operator = None
1225                 while offset_key:
1226                     item = re.match(
1227                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1228                         offset_key).group(0)
1229                     offset_key = offset_key[len(item):]
1230                     if operator is None:
1231                         operator = MATH_FUNCTIONS[item]
1232                         continue
1233                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1234                     offset = float_or_none(item)
1235                     if offset is None:
1236                         offset = float_or_none(_traverse_infodict(item))
1237                     try:
1238                         value = operator(value, multiplier * offset)
1239                     except (TypeError, ZeroDivisionError):
1240                         return None
1241                     operator = None
1242             # Datetime formatting
1243             if mdict['strf_format']:
1244                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1245
1246             # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1247             if sanitize and value == '':
1248                 value = None
1249             return value
1250
1251         na = self.params.get('outtmpl_na_placeholder', 'NA')
1252
1253         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1254             return sanitize_filename(str(value), restricted=restricted, is_id=(
1255                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1256                 if 'filename-sanitization' in self.params['compat_opts']
1257                 else NO_DEFAULT))
1258
1259         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1260         sanitize = bool(sanitize)
1261
1262         def _dumpjson_default(obj):
1263             if isinstance(obj, (set, LazyList)):
1264                 return list(obj)
1265             return repr(obj)
1266
1267         class _ReplacementFormatter(string.Formatter):
1268             def get_field(self, field_name, args, kwargs):
1269                 if field_name.isdigit():
1270                     return args[0], -1
1271                 raise ValueError('Unsupported field')
1272
1273         replacement_formatter = _ReplacementFormatter()
1274
1275         def create_key(outer_mobj):
1276             if not outer_mobj.group('has_key'):
1277                 return outer_mobj.group(0)
1278             key = outer_mobj.group('key')
1279             mobj = re.match(INTERNAL_FORMAT_RE, key)
1280             value, replacement, default, last_field = None, None, na, ''
1281             while mobj:
1282                 mobj = mobj.groupdict()
1283                 default = mobj['default'] if mobj['default'] is not None else default
1284                 value = get_value(mobj)
1285                 last_field, replacement = mobj['fields'], mobj['replacement']
1286                 if value is None and mobj['alternate']:
1287                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1288                 else:
1289                     break
1290
1291             fmt = outer_mobj.group('format')
1292             if fmt == 's' and value is not None and last_field in field_size_compat_map.keys():
1293                 fmt = f'0{field_size_compat_map[last_field]:d}d'
1294
1295             if None not in (value, replacement):
1296                 try:
1297                     value = replacement_formatter.format(replacement, value)
1298                 except ValueError:
1299                     value, default = None, na
1300
1301             flags = outer_mobj.group('conversion') or ''
1302             str_fmt = f'{fmt[:-1]}s'
1303             if value is None:
1304                 value, fmt = default, 's'
1305             elif fmt[-1] == 'l':  # list
1306                 delim = '\n' if '#' in flags else ', '
1307                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1308             elif fmt[-1] == 'j':  # json
1309                 value, fmt = json.dumps(
1310                     value, default=_dumpjson_default,
1311                     indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1312             elif fmt[-1] == 'h':  # html
1313                 value, fmt = escapeHTML(str(value)), str_fmt
1314             elif fmt[-1] == 'q':  # quoted
1315                 value = map(str, variadic(value) if '#' in flags else [value])
1316                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1317             elif fmt[-1] == 'B':  # bytes
1318                 value = f'%{str_fmt}'.encode() % str(value).encode()
1319                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1320             elif fmt[-1] == 'U':  # unicode normalized
1321                 value, fmt = unicodedata.normalize(
1322                     # "+" = compatibility equivalence, "#" = NFD
1323                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1324                     value), str_fmt
1325             elif fmt[-1] == 'D':  # decimal suffix
1326                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1327                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1328                                               factor=1024 if '#' in flags else 1000)
1329             elif fmt[-1] == 'S':  # filename sanitization
1330                 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1331             elif fmt[-1] == 'c':
1332                 if value:
1333                     value = str(value)[0]
1334                 else:
1335                     fmt = str_fmt
1336             elif fmt[-1] not in 'rsa':  # numeric
1337                 value = float_or_none(value)
1338                 if value is None:
1339                     value, fmt = default, 's'
1340
1341             if sanitize:
1342                 # If value is an object, sanitize might convert it to a string
1343                 # So we convert it to repr first
1344                 if fmt[-1] == 'r':
1345                     value, fmt = repr(value), str_fmt
1346                 elif fmt[-1] == 'a':
1347                     value, fmt = ascii(value), str_fmt
1348                 if fmt[-1] in 'csra':
1349                     value = sanitizer(last_field, value)
1350
1351             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1352             TMPL_DICT[key] = value
1353             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1354
1355         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1356
1357     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1358         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1359         return self.escape_outtmpl(outtmpl) % info_dict
1360
1361     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1362         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1363         if outtmpl is None:
1364             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1365         try:
1366             outtmpl = self._outtmpl_expandpath(outtmpl)
1367             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1368             if not filename:
1369                 return None
1370
1371             if tmpl_type in ('', 'temp'):
1372                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1373                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1374                     filename = replace_extension(filename, ext, final_ext)
1375             elif tmpl_type:
1376                 force_ext = OUTTMPL_TYPES[tmpl_type]
1377                 if force_ext:
1378                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1379
1380             # https://github.com/blackjack4494/youtube-dlc/issues/85
1381             trim_file_name = self.params.get('trim_file_name', False)
1382             if trim_file_name:
1383                 no_ext, *ext = filename.rsplit('.', 2)
1384                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1385
1386             return filename
1387         except ValueError as err:
1388             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1389             return None
1390
1391     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1392         """Generate the output filename"""
1393         if outtmpl:
1394             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1395             dir_type = None
1396         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1397         if not filename and dir_type not in ('', 'temp'):
1398             return ''
1399
1400         if warn:
1401             if not self.params.get('paths'):
1402                 pass
1403             elif filename == '-':
1404                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1405             elif os.path.isabs(filename):
1406                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1407         if filename == '-' or not filename:
1408             return filename
1409
1410         return self.get_output_path(dir_type, filename)
1411
1412     def _match_entry(self, info_dict, incomplete=False, silent=False):
1413         """Returns None if the file should be downloaded"""
1414         _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1415         assert incomplete or _type == 'video', 'Only video result can be considered complete'
1416
1417         video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1418
1419         def check_filter():
1420             if _type in ('playlist', 'multi_video'):
1421                 return
1422             elif _type in ('url', 'url_transparent') and not try_call(
1423                     lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1424                 return
1425
1426             if 'title' in info_dict:
1427                 # This can happen when we're just evaluating the playlist
1428                 title = info_dict['title']
1429                 matchtitle = self.params.get('matchtitle', False)
1430                 if matchtitle:
1431                     if not re.search(matchtitle, title, re.IGNORECASE):
1432                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1433                 rejecttitle = self.params.get('rejecttitle', False)
1434                 if rejecttitle:
1435                     if re.search(rejecttitle, title, re.IGNORECASE):
1436                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1437
1438             date = info_dict.get('upload_date')
1439             if date is not None:
1440                 dateRange = self.params.get('daterange', DateRange())
1441                 if date not in dateRange:
1442                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1443             view_count = info_dict.get('view_count')
1444             if view_count is not None:
1445                 min_views = self.params.get('min_views')
1446                 if min_views is not None and view_count < min_views:
1447                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1448                 max_views = self.params.get('max_views')
1449                 if max_views is not None and view_count > max_views:
1450                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1451             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1452                 return 'Skipping "%s" because it is age restricted' % video_title
1453
1454             match_filter = self.params.get('match_filter')
1455             if match_filter is None:
1456                 return None
1457
1458             cancelled = None
1459             try:
1460                 try:
1461                     ret = match_filter(info_dict, incomplete=incomplete)
1462                 except TypeError:
1463                     # For backward compatibility
1464                     ret = None if incomplete else match_filter(info_dict)
1465             except DownloadCancelled as err:
1466                 if err.msg is not NO_DEFAULT:
1467                     raise
1468                 ret, cancelled = err.msg, err
1469
1470             if ret is NO_DEFAULT:
1471                 while True:
1472                     filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1473                     reply = input(self._format_screen(
1474                         f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1475                     if reply in {'y', ''}:
1476                         return None
1477                     elif reply == 'n':
1478                         if cancelled:
1479                             raise type(cancelled)(f'Skipping {video_title}')
1480                         return f'Skipping {video_title}'
1481             return ret
1482
1483         if self.in_download_archive(info_dict):
1484             reason = '%s has already been recorded in the archive' % video_title
1485             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1486         else:
1487             try:
1488                 reason = check_filter()
1489             except DownloadCancelled as e:
1490                 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1491             else:
1492                 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1493         if reason is not None:
1494             if not silent:
1495                 self.to_screen('[download] ' + reason)
1496             if self.params.get(break_opt, False):
1497                 raise break_err()
1498         return reason
1499
1500     @staticmethod
1501     def add_extra_info(info_dict, extra_info):
1502         '''Set the keys from extra_info in info dict if they are missing'''
1503         for key, value in extra_info.items():
1504             info_dict.setdefault(key, value)
1505
1506     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1507                      process=True, force_generic_extractor=False):
1508         """
1509         Extract and return the information dictionary of the URL
1510
1511         Arguments:
1512         @param url          URL to extract
1513
1514         Keyword arguments:
1515         @param download     Whether to download videos
1516         @param process      Whether to resolve all unresolved references (URLs, playlist items).
1517                             Must be True for download to work
1518         @param ie_key       Use only the extractor with this key
1519
1520         @param extra_info   Dictionary containing the extra values to add to the info (For internal use only)
1521         @force_generic_extractor  Force using the generic extractor (Deprecated; use ie_key='Generic')
1522         """
1523
1524         if extra_info is None:
1525             extra_info = {}
1526
1527         if not ie_key and force_generic_extractor:
1528             ie_key = 'Generic'
1529
1530         if ie_key:
1531             ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1532         else:
1533             ies = self._ies
1534
1535         for key, ie in ies.items():
1536             if not ie.suitable(url):
1537                 continue
1538
1539             if not ie.working():
1540                 self.report_warning('The program functionality for this site has been marked as broken, '
1541                                     'and will probably not work.')
1542
1543             temp_id = ie.get_temp_id(url)
1544             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1545                 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
1546                 if self.params.get('break_on_existing', False):
1547                     raise ExistingVideoReached()
1548                 break
1549             return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1550         else:
1551             extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1552             self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1553                               tb=False if extractors_restricted else None)
1554
1555     def _handle_extraction_exceptions(func):
1556         @functools.wraps(func)
1557         def wrapper(self, *args, **kwargs):
1558             while True:
1559                 try:
1560                     return func(self, *args, **kwargs)
1561                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1562                     raise
1563                 except ReExtractInfo as e:
1564                     if e.expected:
1565                         self.to_screen(f'{e}; Re-extracting data')
1566                     else:
1567                         self.to_stderr('\r')
1568                         self.report_warning(f'{e}; Re-extracting data')
1569                     continue
1570                 except GeoRestrictedError as e:
1571                     msg = e.msg
1572                     if e.countries:
1573                         msg += '\nThis video is available in %s.' % ', '.join(
1574                             map(ISO3166Utils.short2full, e.countries))
1575                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1576                     self.report_error(msg)
1577                 except ExtractorError as e:  # An error we somewhat expected
1578                     self.report_error(str(e), e.format_traceback())
1579                 except Exception as e:
1580                     if self.params.get('ignoreerrors'):
1581                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1582                     else:
1583                         raise
1584                 break
1585         return wrapper
1586
1587     def _wait_for_video(self, ie_result={}):
1588         if (not self.params.get('wait_for_video')
1589                 or ie_result.get('_type', 'video') != 'video'
1590                 or ie_result.get('formats') or ie_result.get('url')):
1591             return
1592
1593         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1594         last_msg = ''
1595
1596         def progress(msg):
1597             nonlocal last_msg
1598             full_msg = f'{msg}\n'
1599             if not self.params.get('noprogress'):
1600                 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1601             elif last_msg:
1602                 return
1603             self.to_screen(full_msg, skip_eol=True)
1604             last_msg = msg
1605
1606         min_wait, max_wait = self.params.get('wait_for_video')
1607         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1608         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1609             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1610             self.report_warning('Release time of video is not known')
1611         elif ie_result and (diff or 0) <= 0:
1612             self.report_warning('Video should already be available according to extracted info')
1613         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1614         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1615
1616         wait_till = time.time() + diff
1617         try:
1618             while True:
1619                 diff = wait_till - time.time()
1620                 if diff <= 0:
1621                     progress('')
1622                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1623                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1624                 time.sleep(1)
1625         except KeyboardInterrupt:
1626             progress('')
1627             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1628         except BaseException as e:
1629             if not isinstance(e, ReExtractInfo):
1630                 self.to_screen('')
1631             raise
1632
1633     def _load_cookies(self, data, *, from_headers=True):
1634         """Loads cookies from a `Cookie` header
1635
1636         This tries to work around the security vulnerability of passing cookies to every domain.
1637         See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1638         The unscoped cookies are saved for later to be stored in the jar with a limited scope.
1639
1640         @param data         The Cookie header as string to load the cookies from
1641         @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
1642         """
1643         for cookie in LenientSimpleCookie(data).values():
1644             if from_headers and any(cookie.values()):
1645                 raise ValueError('Invalid syntax in Cookie Header')
1646
1647             domain = cookie.get('domain') or ''
1648             expiry = cookie.get('expires')
1649             if expiry == '':  # 0 is valid
1650                 expiry = None
1651             prepared_cookie = http.cookiejar.Cookie(
1652                 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1653                 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1654                 cookie.get('secure') or False, expiry, False, None, None, {})
1655
1656             if domain:
1657                 self.cookiejar.set_cookie(prepared_cookie)
1658             elif from_headers:
1659                 self.deprecated_feature(
1660                     'Passing cookies as a header is a potential security risk; '
1661                     'they will be scoped to the domain of the downloaded urls. '
1662                     'Please consider loading cookies from a file or browser instead.')
1663                 self.__header_cookies.append(prepared_cookie)
1664             else:
1665                 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1666                                   tb=False, is_error=False)
1667
1668     def _apply_header_cookies(self, url):
1669         """Applies stray header cookies to the provided url
1670
1671         This loads header cookies and scopes them to the domain provided in `url`.
1672         While this is not ideal, it helps reduce the risk of them being sent
1673         to an unintended destination while mostly maintaining compatibility.
1674         """
1675         parsed = urllib.parse.urlparse(url)
1676         if not parsed.hostname:
1677             return
1678
1679         for cookie in map(copy.copy, self.__header_cookies):
1680             cookie.domain = f'.{parsed.hostname}'
1681             self.cookiejar.set_cookie(cookie)
1682
1683     @_handle_extraction_exceptions
1684     def __extract_info(self, url, ie, download, extra_info, process):
1685         self._apply_header_cookies(url)
1686
1687         try:
1688             ie_result = ie.extract(url)
1689         except UserNotLive as e:
1690             if process:
1691                 if self.params.get('wait_for_video'):
1692                     self.report_warning(e)
1693                 self._wait_for_video()
1694             raise
1695         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1696             self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1697             return
1698         if isinstance(ie_result, list):
1699             # Backwards compatibility: old IE result format
1700             ie_result = {
1701                 '_type': 'compat_list',
1702                 'entries': ie_result,
1703             }
1704         if extra_info.get('original_url'):
1705             ie_result.setdefault('original_url', extra_info['original_url'])
1706         self.add_default_extra_info(ie_result, ie, url)
1707         if process:
1708             self._wait_for_video(ie_result)
1709             return self.process_ie_result(ie_result, download, extra_info)
1710         else:
1711             return ie_result
1712
1713     def add_default_extra_info(self, ie_result, ie, url):
1714         if url is not None:
1715             self.add_extra_info(ie_result, {
1716                 'webpage_url': url,
1717                 'original_url': url,
1718             })
1719         webpage_url = ie_result.get('webpage_url')
1720         if webpage_url:
1721             self.add_extra_info(ie_result, {
1722                 'webpage_url_basename': url_basename(webpage_url),
1723                 'webpage_url_domain': get_domain(webpage_url),
1724             })
1725         if ie is not None:
1726             self.add_extra_info(ie_result, {
1727                 'extractor': ie.IE_NAME,
1728                 'extractor_key': ie.ie_key(),
1729             })
1730
1731     def process_ie_result(self, ie_result, download=True, extra_info=None):
1732         """
1733         Take the result of the ie(may be modified) and resolve all unresolved
1734         references (URLs, playlist items).
1735
1736         It will also download the videos if 'download'.
1737         Returns the resolved ie_result.
1738         """
1739         if extra_info is None:
1740             extra_info = {}
1741         result_type = ie_result.get('_type', 'video')
1742
1743         if result_type in ('url', 'url_transparent'):
1744             ie_result['url'] = sanitize_url(
1745                 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1746             if ie_result.get('original_url') and not extra_info.get('original_url'):
1747                 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1748
1749             extract_flat = self.params.get('extract_flat', False)
1750             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1751                     or extract_flat is True):
1752                 info_copy = ie_result.copy()
1753                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1754                 if ie and not ie_result.get('id'):
1755                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1756                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1757                 self.add_extra_info(info_copy, extra_info)
1758                 info_copy, _ = self.pre_process(info_copy)
1759                 self._fill_common_fields(info_copy, False)
1760                 self.__forced_printings(info_copy)
1761                 self._raise_pending_errors(info_copy)
1762                 if self.params.get('force_write_download_archive', False):
1763                     self.record_download_archive(info_copy)
1764                 return ie_result
1765
1766         if result_type == 'video':
1767             self.add_extra_info(ie_result, extra_info)
1768             ie_result = self.process_video_result(ie_result, download=download)
1769             self._raise_pending_errors(ie_result)
1770             additional_urls = (ie_result or {}).get('additional_urls')
1771             if additional_urls:
1772                 # TODO: Improve MetadataParserPP to allow setting a list
1773                 if isinstance(additional_urls, str):
1774                     additional_urls = [additional_urls]
1775                 self.to_screen(
1776                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1777                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1778                 ie_result['additional_entries'] = [
1779                     self.extract_info(
1780                         url, download, extra_info=extra_info,
1781                         force_generic_extractor=self.params.get('force_generic_extractor'))
1782                     for url in additional_urls
1783                 ]
1784             return ie_result
1785         elif result_type == 'url':
1786             # We have to add extra_info to the results because it may be
1787             # contained in a playlist
1788             return self.extract_info(
1789                 ie_result['url'], download,
1790                 ie_key=ie_result.get('ie_key'),
1791                 extra_info=extra_info)
1792         elif result_type == 'url_transparent':
1793             # Use the information from the embedding page
1794             info = self.extract_info(
1795                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1796                 extra_info=extra_info, download=False, process=False)
1797
1798             # extract_info may return None when ignoreerrors is enabled and
1799             # extraction failed with an error, don't crash and return early
1800             # in this case
1801             if not info:
1802                 return info
1803
1804             exempted_fields = {'_type', 'url', 'ie_key'}
1805             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1806                 # For video clips, the id etc of the clip extractor should be used
1807                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1808
1809             new_result = info.copy()
1810             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1811
1812             # Extracted info may not be a video result (i.e.
1813             # info.get('_type', 'video') != video) but rather an url or
1814             # url_transparent. In such cases outer metadata (from ie_result)
1815             # should be propagated to inner one (info). For this to happen
1816             # _type of info should be overridden with url_transparent. This
1817             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1818             if new_result.get('_type') == 'url':
1819                 new_result['_type'] = 'url_transparent'
1820
1821             return self.process_ie_result(
1822                 new_result, download=download, extra_info=extra_info)
1823         elif result_type in ('playlist', 'multi_video'):
1824             # Protect from infinite recursion due to recursively nested playlists
1825             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1826             webpage_url = ie_result.get('webpage_url')  # Playlists maynot have webpage_url
1827             if webpage_url and webpage_url in self._playlist_urls:
1828                 self.to_screen(
1829                     '[download] Skipping already downloaded playlist: %s'
1830                     % ie_result.get('title') or ie_result.get('id'))
1831                 return
1832
1833             self._playlist_level += 1
1834             self._playlist_urls.add(webpage_url)
1835             self._fill_common_fields(ie_result, False)
1836             self._sanitize_thumbnails(ie_result)
1837             try:
1838                 return self.__process_playlist(ie_result, download)
1839             finally:
1840                 self._playlist_level -= 1
1841                 if not self._playlist_level:
1842                     self._playlist_urls.clear()
1843         elif result_type == 'compat_list':
1844             self.report_warning(
1845                 'Extractor %s returned a compat_list result. '
1846                 'It needs to be updated.' % ie_result.get('extractor'))
1847
1848             def _fixup(r):
1849                 self.add_extra_info(r, {
1850                     'extractor': ie_result['extractor'],
1851                     'webpage_url': ie_result['webpage_url'],
1852                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1853                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1854                     'extractor_key': ie_result['extractor_key'],
1855                 })
1856                 return r
1857             ie_result['entries'] = [
1858                 self.process_ie_result(_fixup(r), download, extra_info)
1859                 for r in ie_result['entries']
1860             ]
1861             return ie_result
1862         else:
1863             raise Exception('Invalid result type: %s' % result_type)
1864
1865     def _ensure_dir_exists(self, path):
1866         return make_dir(path, self.report_error)
1867
1868     @staticmethod
1869     def _playlist_infodict(ie_result, strict=False, **kwargs):
1870         info = {
1871             'playlist_count': ie_result.get('playlist_count'),
1872             'playlist': ie_result.get('title') or ie_result.get('id'),
1873             'playlist_id': ie_result.get('id'),
1874             'playlist_title': ie_result.get('title'),
1875             'playlist_uploader': ie_result.get('uploader'),
1876             'playlist_uploader_id': ie_result.get('uploader_id'),
1877             **kwargs,
1878         }
1879         if strict:
1880             return info
1881         if ie_result.get('webpage_url'):
1882             info.update({
1883                 'webpage_url': ie_result['webpage_url'],
1884                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1885                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1886             })
1887         return {
1888             **info,
1889             'playlist_index': 0,
1890             '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1891             'extractor': ie_result['extractor'],
1892             'extractor_key': ie_result['extractor_key'],
1893         }
1894
1895     def __process_playlist(self, ie_result, download):
1896         """Process each entry in the playlist"""
1897         assert ie_result['_type'] in ('playlist', 'multi_video')
1898
1899         common_info = self._playlist_infodict(ie_result, strict=True)
1900         title = common_info.get('playlist') or '<Untitled>'
1901         if self._match_entry(common_info, incomplete=True) is not None:
1902             return
1903         self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1904
1905         all_entries = PlaylistEntries(self, ie_result)
1906         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1907
1908         lazy = self.params.get('lazy_playlist')
1909         if lazy:
1910             resolved_entries, n_entries = [], 'N/A'
1911             ie_result['requested_entries'], ie_result['entries'] = None, None
1912         else:
1913             entries = resolved_entries = list(entries)
1914             n_entries = len(resolved_entries)
1915             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1916         if not ie_result.get('playlist_count'):
1917             # Better to do this after potentially exhausting entries
1918             ie_result['playlist_count'] = all_entries.get_full_count()
1919
1920         extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1921         ie_copy = collections.ChainMap(ie_result, extra)
1922
1923         _infojson_written = False
1924         write_playlist_files = self.params.get('allow_playlist_files', True)
1925         if write_playlist_files and self.params.get('list_thumbnails'):
1926             self.list_thumbnails(ie_result)
1927         if write_playlist_files and not self.params.get('simulate'):
1928             _infojson_written = self._write_info_json(
1929                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1930             if _infojson_written is None:
1931                 return
1932             if self._write_description('playlist', ie_result,
1933                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1934                 return
1935             # TODO: This should be passed to ThumbnailsConvertor if necessary
1936             self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1937
1938         if lazy:
1939             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1940                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1941         elif self.params.get('playlistreverse'):
1942             entries.reverse()
1943         elif self.params.get('playlistrandom'):
1944             random.shuffle(entries)
1945
1946         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1947                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1948
1949         keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1950         if self.params.get('extract_flat') == 'discard_in_playlist':
1951             keep_resolved_entries = ie_result['_type'] != 'playlist'
1952         if keep_resolved_entries:
1953             self.write_debug('The information of all playlist entries will be held in memory')
1954
1955         failures = 0
1956         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1957         for i, (playlist_index, entry) in enumerate(entries):
1958             if lazy:
1959                 resolved_entries.append((playlist_index, entry))
1960             if not entry:
1961                 continue
1962
1963             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1964             if not lazy and 'playlist-index' in self.params['compat_opts']:
1965                 playlist_index = ie_result['requested_entries'][i]
1966
1967             entry_copy = collections.ChainMap(entry, {
1968                 **common_info,
1969                 'n_entries': int_or_none(n_entries),
1970                 'playlist_index': playlist_index,
1971                 'playlist_autonumber': i + 1,
1972             })
1973
1974             if self._match_entry(entry_copy, incomplete=True) is not None:
1975                 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1976                 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1977                 continue
1978
1979             self.to_screen('[download] Downloading item %s of %s' % (
1980                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1981
1982             entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
1983                 'playlist_index': playlist_index,
1984                 'playlist_autonumber': i + 1,
1985             }, extra))
1986             if not entry_result:
1987                 failures += 1
1988             if failures >= max_failures:
1989                 self.report_error(
1990                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1991                 break
1992             if keep_resolved_entries:
1993                 resolved_entries[i] = (playlist_index, entry_result)
1994
1995         # Update with processed data
1996         ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
1997         ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1998         if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
1999             # Do not set for full playlist
2000             ie_result.pop('requested_entries')
2001
2002         # Write the updated info to json
2003         if _infojson_written is True and self._write_info_json(
2004                 'updated playlist', ie_result,
2005                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2006             return
2007
2008         ie_result = self.run_all_pps('playlist', ie_result)
2009         self.to_screen(f'[download] Finished downloading playlist: {title}')
2010         return ie_result
2011
2012     @_handle_extraction_exceptions
2013     def __process_iterable_entry(self, entry, download, extra_info):
2014         return self.process_ie_result(
2015             entry, download=download, extra_info=extra_info)
2016
2017     def _build_format_filter(self, filter_spec):
2018         " Returns a function to filter the formats according to the filter_spec "
2019
2020         OPERATORS = {
2021             '<': operator.lt,
2022             '<=': operator.le,
2023             '>': operator.gt,
2024             '>=': operator.ge,
2025             '=': operator.eq,
2026             '!=': operator.ne,
2027         }
2028         operator_rex = re.compile(r'''(?x)\s*
2029             (?P<key>[\w.-]+)\s*
2030             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2031             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2032             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2033         m = operator_rex.fullmatch(filter_spec)
2034         if m:
2035             try:
2036                 comparison_value = int(m.group('value'))
2037             except ValueError:
2038                 comparison_value = parse_filesize(m.group('value'))
2039                 if comparison_value is None:
2040                     comparison_value = parse_filesize(m.group('value') + 'B')
2041                 if comparison_value is None:
2042                     raise ValueError(
2043                         'Invalid value %r in format specification %r' % (
2044                             m.group('value'), filter_spec))
2045             op = OPERATORS[m.group('op')]
2046
2047         if not m:
2048             STR_OPERATORS = {
2049                 '=': operator.eq,
2050                 '^=': lambda attr, value: attr.startswith(value),
2051                 '$=': lambda attr, value: attr.endswith(value),
2052                 '*=': lambda attr, value: value in attr,
2053                 '~=': lambda attr, value: value.search(attr) is not None
2054             }
2055             str_operator_rex = re.compile(r'''(?x)\s*
2056                 (?P<key>[a-zA-Z0-9._-]+)\s*
2057                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2058                 (?P<quote>["'])?
2059                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2060                 (?(quote)(?P=quote))\s*
2061                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2062             m = str_operator_rex.fullmatch(filter_spec)
2063             if m:
2064                 if m.group('op') == '~=':
2065                     comparison_value = re.compile(m.group('value'))
2066                 else:
2067                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2068                 str_op = STR_OPERATORS[m.group('op')]
2069                 if m.group('negation'):
2070                     op = lambda attr, value: not str_op(attr, value)
2071                 else:
2072                     op = str_op
2073
2074         if not m:
2075             raise SyntaxError('Invalid filter specification %r' % filter_spec)
2076
2077         def _filter(f):
2078             actual_value = f.get(m.group('key'))
2079             if actual_value is None:
2080                 return m.group('none_inclusive')
2081             return op(actual_value, comparison_value)
2082         return _filter
2083
2084     def _check_formats(self, formats):
2085         for f in formats:
2086             self.to_screen('[info] Testing format %s' % f['format_id'])
2087             path = self.get_output_path('temp')
2088             if not self._ensure_dir_exists(f'{path}/'):
2089                 continue
2090             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2091             temp_file.close()
2092             try:
2093                 success, _ = self.dl(temp_file.name, f, test=True)
2094             except (DownloadError, OSError, ValueError) + network_exceptions:
2095                 success = False
2096             finally:
2097                 if os.path.exists(temp_file.name):
2098                     try:
2099                         os.remove(temp_file.name)
2100                     except OSError:
2101                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2102             if success:
2103                 yield f
2104             else:
2105                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2106
2107     def _default_format_spec(self, info_dict, download=True):
2108
2109         def can_merge():
2110             merger = FFmpegMergerPP(self)
2111             return merger.available and merger.can_merge()
2112
2113         prefer_best = (
2114             not self.params.get('simulate')
2115             and download
2116             and (
2117                 not can_merge()
2118                 or info_dict.get('is_live') and not self.params.get('live_from_start')
2119                 or self.params['outtmpl']['default'] == '-'))
2120         compat = (
2121             prefer_best
2122             or self.params.get('allow_multiple_audio_streams', False)
2123             or 'format-spec' in self.params['compat_opts'])
2124
2125         return (
2126             'best/bestvideo+bestaudio' if prefer_best
2127             else 'bestvideo*+bestaudio/best' if not compat
2128             else 'bestvideo+bestaudio/best')
2129
2130     def build_format_selector(self, format_spec):
2131         def syntax_error(note, start):
2132             message = (
2133                 'Invalid format specification: '
2134                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2135             return SyntaxError(message)
2136
2137         PICKFIRST = 'PICKFIRST'
2138         MERGE = 'MERGE'
2139         SINGLE = 'SINGLE'
2140         GROUP = 'GROUP'
2141         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2142
2143         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2144                                   'video': self.params.get('allow_multiple_video_streams', False)}
2145
2146         def _parse_filter(tokens):
2147             filter_parts = []
2148             for type, string_, start, _, _ in tokens:
2149                 if type == tokenize.OP and string_ == ']':
2150                     return ''.join(filter_parts)
2151                 else:
2152                     filter_parts.append(string_)
2153
2154         def _remove_unused_ops(tokens):
2155             # Remove operators that we don't use and join them with the surrounding strings.
2156             # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2157             ALLOWED_OPS = ('/', '+', ',', '(', ')')
2158             last_string, last_start, last_end, last_line = None, None, None, None
2159             for type, string_, start, end, line in tokens:
2160                 if type == tokenize.OP and string_ == '[':
2161                     if last_string:
2162                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2163                         last_string = None
2164                     yield type, string_, start, end, line
2165                     # everything inside brackets will be handled by _parse_filter
2166                     for type, string_, start, end, line in tokens:
2167                         yield type, string_, start, end, line
2168                         if type == tokenize.OP and string_ == ']':
2169                             break
2170                 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2171                     if last_string:
2172                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2173                         last_string = None
2174                     yield type, string_, start, end, line
2175                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2176                     if not last_string:
2177                         last_string = string_
2178                         last_start = start
2179                         last_end = end
2180                     else:
2181                         last_string += string_
2182             if last_string:
2183                 yield tokenize.NAME, last_string, last_start, last_end, last_line
2184
2185         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2186             selectors = []
2187             current_selector = None
2188             for type, string_, start, _, _ in tokens:
2189                 # ENCODING is only defined in python 3.x
2190                 if type == getattr(tokenize, 'ENCODING', None):
2191                     continue
2192                 elif type in [tokenize.NAME, tokenize.NUMBER]:
2193                     current_selector = FormatSelector(SINGLE, string_, [])
2194                 elif type == tokenize.OP:
2195                     if string_ == ')':
2196                         if not inside_group:
2197                             # ')' will be handled by the parentheses group
2198                             tokens.restore_last_token()
2199                         break
2200                     elif inside_merge and string_ in ['/', ',']:
2201                         tokens.restore_last_token()
2202                         break
2203                     elif inside_choice and string_ == ',':
2204                         tokens.restore_last_token()
2205                         break
2206                     elif string_ == ',':
2207                         if not current_selector:
2208                             raise syntax_error('"," must follow a format selector', start)
2209                         selectors.append(current_selector)
2210                         current_selector = None
2211                     elif string_ == '/':
2212                         if not current_selector:
2213                             raise syntax_error('"/" must follow a format selector', start)
2214                         first_choice = current_selector
2215                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2216                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2217                     elif string_ == '[':
2218                         if not current_selector:
2219                             current_selector = FormatSelector(SINGLE, 'best', [])
2220                         format_filter = _parse_filter(tokens)
2221                         current_selector.filters.append(format_filter)
2222                     elif string_ == '(':
2223                         if current_selector:
2224                             raise syntax_error('Unexpected "("', start)
2225                         group = _parse_format_selection(tokens, inside_group=True)
2226                         current_selector = FormatSelector(GROUP, group, [])
2227                     elif string_ == '+':
2228                         if not current_selector:
2229                             raise syntax_error('Unexpected "+"', start)
2230                         selector_1 = current_selector
2231                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2232                         if not selector_2:
2233                             raise syntax_error('Expected a selector', start)
2234                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2235                     else:
2236                         raise syntax_error(f'Operator not recognized: "{string_}"', start)
2237                 elif type == tokenize.ENDMARKER:
2238                     break
2239             if current_selector:
2240                 selectors.append(current_selector)
2241             return selectors
2242
2243         def _merge(formats_pair):
2244             format_1, format_2 = formats_pair
2245
2246             formats_info = []
2247             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2248             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2249
2250             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2251                 get_no_more = {'video': False, 'audio': False}
2252                 for (i, fmt_info) in enumerate(formats_info):
2253                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2254                         formats_info.pop(i)
2255                         continue
2256                     for aud_vid in ['audio', 'video']:
2257                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2258                             if get_no_more[aud_vid]:
2259                                 formats_info.pop(i)
2260                                 break
2261                             get_no_more[aud_vid] = True
2262
2263             if len(formats_info) == 1:
2264                 return formats_info[0]
2265
2266             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2267             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2268
2269             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2270             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2271
2272             output_ext = get_compatible_ext(
2273                 vcodecs=[f.get('vcodec') for f in video_fmts],
2274                 acodecs=[f.get('acodec') for f in audio_fmts],
2275                 vexts=[f['ext'] for f in video_fmts],
2276                 aexts=[f['ext'] for f in audio_fmts],
2277                 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2278                              or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2279
2280             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2281
2282             new_dict = {
2283                 'requested_formats': formats_info,
2284                 'format': '+'.join(filtered('format')),
2285                 'format_id': '+'.join(filtered('format_id')),
2286                 'ext': output_ext,
2287                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2288                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2289                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2290                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2291                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2292             }
2293
2294             if the_only_video:
2295                 new_dict.update({
2296                     'width': the_only_video.get('width'),
2297                     'height': the_only_video.get('height'),
2298                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2299                     'fps': the_only_video.get('fps'),
2300                     'dynamic_range': the_only_video.get('dynamic_range'),
2301                     'vcodec': the_only_video.get('vcodec'),
2302                     'vbr': the_only_video.get('vbr'),
2303                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2304                     'aspect_ratio': the_only_video.get('aspect_ratio'),
2305                 })
2306
2307             if the_only_audio:
2308                 new_dict.update({
2309                     'acodec': the_only_audio.get('acodec'),
2310                     'abr': the_only_audio.get('abr'),
2311                     'asr': the_only_audio.get('asr'),
2312                     'audio_channels': the_only_audio.get('audio_channels')
2313                 })
2314
2315             return new_dict
2316
2317         def _check_formats(formats):
2318             if (self.params.get('check_formats') is not None
2319                     or self.params.get('allow_unplayable_formats')):
2320                 yield from formats
2321                 return
2322             elif self.params.get('check_formats') == 'selected':
2323                 yield from self._check_formats(formats)
2324                 return
2325
2326             for f in formats:
2327                 if f.get('has_drm'):
2328                     yield from self._check_formats([f])
2329                 else:
2330                     yield f
2331
2332         def _build_selector_function(selector):
2333             if isinstance(selector, list):  # ,
2334                 fs = [_build_selector_function(s) for s in selector]
2335
2336                 def selector_function(ctx):
2337                     for f in fs:
2338                         yield from f(ctx)
2339                 return selector_function
2340
2341             elif selector.type == GROUP:  # ()
2342                 selector_function = _build_selector_function(selector.selector)
2343
2344             elif selector.type == PICKFIRST:  # /
2345                 fs = [_build_selector_function(s) for s in selector.selector]
2346
2347                 def selector_function(ctx):
2348                     for f in fs:
2349                         picked_formats = list(f(ctx))
2350                         if picked_formats:
2351                             return picked_formats
2352                     return []
2353
2354             elif selector.type == MERGE:  # +
2355                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2356
2357                 def selector_function(ctx):
2358                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2359                         yield _merge(pair)
2360
2361             elif selector.type == SINGLE:  # atom
2362                 format_spec = selector.selector or 'best'
2363
2364                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2365                 if format_spec == 'all':
2366                     def selector_function(ctx):
2367                         yield from _check_formats(ctx['formats'][::-1])
2368                 elif format_spec == 'mergeall':
2369                     def selector_function(ctx):
2370                         formats = list(_check_formats(
2371                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2372                         if not formats:
2373                             return
2374                         merged_format = formats[-1]
2375                         for f in formats[-2::-1]:
2376                             merged_format = _merge((merged_format, f))
2377                         yield merged_format
2378
2379                 else:
2380                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2381                     mobj = re.match(
2382                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2383                         format_spec)
2384                     if mobj is not None:
2385                         format_idx = int_or_none(mobj.group('n'), default=1)
2386                         format_reverse = mobj.group('bw')[0] == 'b'
2387                         format_type = (mobj.group('type') or [None])[0]
2388                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2389                         format_modified = mobj.group('mod') is not None
2390
2391                         format_fallback = not format_type and not format_modified  # for b, w
2392                         _filter_f = (
2393                             (lambda f: f.get('%scodec' % format_type) != 'none')
2394                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2395                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2396                             if format_type  # bv, ba, wv, wa
2397                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2398                             if not format_modified  # b, w
2399                             else lambda f: True)  # b*, w*
2400                         filter_f = lambda f: _filter_f(f) and (
2401                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2402                     else:
2403                         if format_spec in self._format_selection_exts['audio']:
2404                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2405                         elif format_spec in self._format_selection_exts['video']:
2406                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2407                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2408                         elif format_spec in self._format_selection_exts['storyboards']:
2409                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2410                         else:
2411                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2412
2413                     def selector_function(ctx):
2414                         formats = list(ctx['formats'])
2415                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2416                         if not matches:
2417                             if format_fallback and ctx['incomplete_formats']:
2418                                 # for extractors with incomplete formats (audio only (soundcloud)
2419                                 # or video only (imgur)) best/worst will fallback to
2420                                 # best/worst {video,audio}-only format
2421                                 matches = formats
2422                             elif seperate_fallback and not ctx['has_merged_format']:
2423                                 # for compatibility with youtube-dl when there is no pre-merged format
2424                                 matches = list(filter(seperate_fallback, formats))
2425                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2426                         try:
2427                             yield matches[format_idx - 1]
2428                         except LazyList.IndexError:
2429                             return
2430
2431             filters = [self._build_format_filter(f) for f in selector.filters]
2432
2433             def final_selector(ctx):
2434                 ctx_copy = dict(ctx)
2435                 for _filter in filters:
2436                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2437                 return selector_function(ctx_copy)
2438             return final_selector
2439
2440         stream = io.BytesIO(format_spec.encode())
2441         try:
2442             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2443         except tokenize.TokenError:
2444             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2445
2446         class TokenIterator:
2447             def __init__(self, tokens):
2448                 self.tokens = tokens
2449                 self.counter = 0
2450
2451             def __iter__(self):
2452                 return self
2453
2454             def __next__(self):
2455                 if self.counter >= len(self.tokens):
2456                     raise StopIteration()
2457                 value = self.tokens[self.counter]
2458                 self.counter += 1
2459                 return value
2460
2461             next = __next__
2462
2463             def restore_last_token(self):
2464                 self.counter -= 1
2465
2466         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2467         return _build_selector_function(parsed_selector)
2468
2469     def _calc_headers(self, info_dict):
2470         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2471         if 'Youtubedl-No-Compression' in res:  # deprecated
2472             res.pop('Youtubedl-No-Compression', None)
2473             res['Accept-Encoding'] = 'identity'
2474         cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2475         if cookies:
2476             encoder = LenientSimpleCookie()
2477             values = []
2478             for cookie in cookies:
2479                 _, value = encoder.value_encode(cookie.value)
2480                 values.append(f'{cookie.name}={value}')
2481                 if cookie.domain:
2482                     values.append(f'Domain={cookie.domain}')
2483                 if cookie.path:
2484                     values.append(f'Path={cookie.path}')
2485                 if cookie.secure:
2486                     values.append('Secure')
2487                 if cookie.expires:
2488                     values.append(f'Expires={cookie.expires}')
2489                 if cookie.version:
2490                     values.append(f'Version={cookie.version}')
2491             info_dict['cookies'] = '; '.join(values)
2492
2493         if 'X-Forwarded-For' not in res:
2494             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2495             if x_forwarded_for_ip:
2496                 res['X-Forwarded-For'] = x_forwarded_for_ip
2497
2498         return res
2499
2500     def _calc_cookies(self, url):
2501         self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2502         return self.cookiejar.get_cookie_header(url)
2503
2504     def _sort_thumbnails(self, thumbnails):
2505         thumbnails.sort(key=lambda t: (
2506             t.get('preference') if t.get('preference') is not None else -1,
2507             t.get('width') if t.get('width') is not None else -1,
2508             t.get('height') if t.get('height') is not None else -1,
2509             t.get('id') if t.get('id') is not None else '',
2510             t.get('url')))
2511
2512     def _sanitize_thumbnails(self, info_dict):
2513         thumbnails = info_dict.get('thumbnails')
2514         if thumbnails is None:
2515             thumbnail = info_dict.get('thumbnail')
2516             if thumbnail:
2517                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2518         if not thumbnails:
2519             return
2520
2521         def check_thumbnails(thumbnails):
2522             for t in thumbnails:
2523                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2524                 try:
2525                     self.urlopen(HEADRequest(t['url']))
2526                 except network_exceptions as err:
2527                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2528                     continue
2529                 yield t
2530
2531         self._sort_thumbnails(thumbnails)
2532         for i, t in enumerate(thumbnails):
2533             if t.get('id') is None:
2534                 t['id'] = '%d' % i
2535             if t.get('width') and t.get('height'):
2536                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2537             t['url'] = sanitize_url(t['url'])
2538
2539         if self.params.get('check_formats') is True:
2540             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2541         else:
2542             info_dict['thumbnails'] = thumbnails
2543
2544     def _fill_common_fields(self, info_dict, final=True):
2545         # TODO: move sanitization here
2546         if final:
2547             title = info_dict['fulltitle'] = info_dict.get('title')
2548             if not title:
2549                 if title == '':
2550                     self.write_debug('Extractor gave empty title. Creating a generic title')
2551                 else:
2552                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2553                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2554
2555         if info_dict.get('duration') is not None:
2556             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2557
2558         for ts_key, date_key in (
2559                 ('timestamp', 'upload_date'),
2560                 ('release_timestamp', 'release_date'),
2561                 ('modified_timestamp', 'modified_date'),
2562         ):
2563             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2564                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2565                 # see http://bugs.python.org/issue1646728)
2566                 with contextlib.suppress(ValueError, OverflowError, OSError):
2567                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2568                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2569
2570         live_keys = ('is_live', 'was_live')
2571         live_status = info_dict.get('live_status')
2572         if live_status is None:
2573             for key in live_keys:
2574                 if info_dict.get(key) is False:
2575                     continue
2576                 if info_dict.get(key):
2577                     live_status = key
2578                 break
2579             if all(info_dict.get(key) is False for key in live_keys):
2580                 live_status = 'not_live'
2581         if live_status:
2582             info_dict['live_status'] = live_status
2583             for key in live_keys:
2584                 if info_dict.get(key) is None:
2585                     info_dict[key] = (live_status == key)
2586         if live_status == 'post_live':
2587             info_dict['was_live'] = True
2588
2589         # Auto generate title fields corresponding to the *_number fields when missing
2590         # in order to always have clean titles. This is very common for TV series.
2591         for field in ('chapter', 'season', 'episode'):
2592             if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2593                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2594
2595     def _raise_pending_errors(self, info):
2596         err = info.pop('__pending_error', None)
2597         if err:
2598             self.report_error(err, tb=False)
2599
2600     def sort_formats(self, info_dict):
2601         formats = self._get_formats(info_dict)
2602         formats.sort(key=FormatSorter(
2603             self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2604
2605     def process_video_result(self, info_dict, download=True):
2606         assert info_dict.get('_type', 'video') == 'video'
2607         self._num_videos += 1
2608
2609         if 'id' not in info_dict:
2610             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2611         elif not info_dict.get('id'):
2612             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2613
2614         def report_force_conversion(field, field_not, conversion):
2615             self.report_warning(
2616                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2617                 % (field, field_not, conversion))
2618
2619         def sanitize_string_field(info, string_field):
2620             field = info.get(string_field)
2621             if field is None or isinstance(field, str):
2622                 return
2623             report_force_conversion(string_field, 'a string', 'string')
2624             info[string_field] = str(field)
2625
2626         def sanitize_numeric_fields(info):
2627             for numeric_field in self._NUMERIC_FIELDS:
2628                 field = info.get(numeric_field)
2629                 if field is None or isinstance(field, (int, float)):
2630                     continue
2631                 report_force_conversion(numeric_field, 'numeric', 'int')
2632                 info[numeric_field] = int_or_none(field)
2633
2634         sanitize_string_field(info_dict, 'id')
2635         sanitize_numeric_fields(info_dict)
2636         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2637             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2638         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2639             self.report_warning('"duration" field is negative, there is an error in extractor')
2640
2641         chapters = info_dict.get('chapters') or []
2642         if chapters and chapters[0].get('start_time'):
2643             chapters.insert(0, {'start_time': 0})
2644
2645         dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2646         for idx, (prev, current, next_) in enumerate(zip(
2647                 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2648             if current.get('start_time') is None:
2649                 current['start_time'] = prev.get('end_time')
2650             if not current.get('end_time'):
2651                 current['end_time'] = next_.get('start_time')
2652             if not current.get('title'):
2653                 current['title'] = f'<Untitled Chapter {idx}>'
2654
2655         if 'playlist' not in info_dict:
2656             # It isn't part of a playlist
2657             info_dict['playlist'] = None
2658             info_dict['playlist_index'] = None
2659
2660         self._sanitize_thumbnails(info_dict)
2661
2662         thumbnail = info_dict.get('thumbnail')
2663         thumbnails = info_dict.get('thumbnails')
2664         if thumbnail:
2665             info_dict['thumbnail'] = sanitize_url(thumbnail)
2666         elif thumbnails:
2667             info_dict['thumbnail'] = thumbnails[-1]['url']
2668
2669         if info_dict.get('display_id') is None and 'id' in info_dict:
2670             info_dict['display_id'] = info_dict['id']
2671
2672         self._fill_common_fields(info_dict)
2673
2674         for cc_kind in ('subtitles', 'automatic_captions'):
2675             cc = info_dict.get(cc_kind)
2676             if cc:
2677                 for _, subtitle in cc.items():
2678                     for subtitle_format in subtitle:
2679                         if subtitle_format.get('url'):
2680                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2681                         if subtitle_format.get('ext') is None:
2682                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2683
2684         automatic_captions = info_dict.get('automatic_captions')
2685         subtitles = info_dict.get('subtitles')
2686
2687         info_dict['requested_subtitles'] = self.process_subtitles(
2688             info_dict['id'], subtitles, automatic_captions)
2689
2690         formats = self._get_formats(info_dict)
2691
2692         # Backward compatibility with InfoExtractor._sort_formats
2693         field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2694         if field_preference:
2695             info_dict['_format_sort_fields'] = field_preference
2696
2697         info_dict['_has_drm'] = any(  # or None ensures --clean-infojson removes it
2698             f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2699         if not self.params.get('allow_unplayable_formats'):
2700             formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2701
2702         if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2703             self.report_warning(
2704                 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2705                 'only images are available for download. Use --list-formats to see them'.capitalize())
2706
2707         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2708         if not get_from_start:
2709             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2710         if info_dict.get('is_live') and formats:
2711             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2712             if get_from_start and not formats:
2713                 self.raise_no_formats(info_dict, msg=(
2714                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2715                     'If you want to download from the current time, use --no-live-from-start'))
2716
2717         def is_wellformed(f):
2718             url = f.get('url')
2719             if not url:
2720                 self.report_warning(
2721                     '"url" field is missing or empty - skipping format, '
2722                     'there is an error in extractor')
2723                 return False
2724             if isinstance(url, bytes):
2725                 sanitize_string_field(f, 'url')
2726             return True
2727
2728         # Filter out malformed formats for better extraction robustness
2729         formats = list(filter(is_wellformed, formats or []))
2730
2731         if not formats:
2732             self.raise_no_formats(info_dict)
2733
2734         for format in formats:
2735             sanitize_string_field(format, 'format_id')
2736             sanitize_numeric_fields(format)
2737             format['url'] = sanitize_url(format['url'])
2738             if format.get('ext') is None:
2739                 format['ext'] = determine_ext(format['url']).lower()
2740             if format.get('protocol') is None:
2741                 format['protocol'] = determine_protocol(format)
2742             if format.get('resolution') is None:
2743                 format['resolution'] = self.format_resolution(format, default=None)
2744             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2745                 format['dynamic_range'] = 'SDR'
2746             if format.get('aspect_ratio') is None:
2747                 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2748             if (not format.get('manifest_url')  # For fragmented formats, "tbr" is often max bitrate and not average
2749                     and info_dict.get('duration') and format.get('tbr')
2750                     and not format.get('filesize') and not format.get('filesize_approx')):
2751                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2752             format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
2753
2754         # This is copied to http_headers by the above _calc_headers and can now be removed
2755         if '__x_forwarded_for_ip' in info_dict:
2756             del info_dict['__x_forwarded_for_ip']
2757
2758         self.sort_formats({
2759             'formats': formats,
2760             '_format_sort_fields': info_dict.get('_format_sort_fields')
2761         })
2762
2763         # Sanitize and group by format_id
2764         formats_dict = {}
2765         for i, format in enumerate(formats):
2766             if not format.get('format_id'):
2767                 format['format_id'] = str(i)
2768             else:
2769                 # Sanitize format_id from characters used in format selector expression
2770                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2771             formats_dict.setdefault(format['format_id'], []).append(format)
2772
2773         # Make sure all formats have unique format_id
2774         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2775         for format_id, ambiguous_formats in formats_dict.items():
2776             ambigious_id = len(ambiguous_formats) > 1
2777             for i, format in enumerate(ambiguous_formats):
2778                 if ambigious_id:
2779                     format['format_id'] = '%s-%d' % (format_id, i)
2780                 # Ensure there is no conflict between id and ext in format selection
2781                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2782                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2783                     format['format_id'] = 'f%s' % format['format_id']
2784
2785                 if format.get('format') is None:
2786                     format['format'] = '{id} - {res}{note}'.format(
2787                         id=format['format_id'],
2788                         res=self.format_resolution(format),
2789                         note=format_field(format, 'format_note', ' (%s)'),
2790                     )
2791
2792         if self.params.get('check_formats') is True:
2793             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2794
2795         if not formats or formats[0] is not info_dict:
2796             # only set the 'formats' fields if the original info_dict list them
2797             # otherwise we end up with a circular reference, the first (and unique)
2798             # element in the 'formats' field in info_dict is info_dict itself,
2799             # which can't be exported to json
2800             info_dict['formats'] = formats
2801
2802         info_dict, _ = self.pre_process(info_dict)
2803
2804         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2805             return info_dict
2806
2807         self.post_extract(info_dict)
2808         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2809
2810         # The pre-processors may have modified the formats
2811         formats = self._get_formats(info_dict)
2812
2813         list_only = self.params.get('simulate') == 'list_only'
2814         interactive_format_selection = not list_only and self.format_selector == '-'
2815         if self.params.get('list_thumbnails'):
2816             self.list_thumbnails(info_dict)
2817         if self.params.get('listsubtitles'):
2818             if 'automatic_captions' in info_dict:
2819                 self.list_subtitles(
2820                     info_dict['id'], automatic_captions, 'automatic captions')
2821             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2822         if self.params.get('listformats') or interactive_format_selection:
2823             self.list_formats(info_dict)
2824         if list_only:
2825             # Without this printing, -F --print-json will not work
2826             self.__forced_printings(info_dict)
2827             return info_dict
2828
2829         format_selector = self.format_selector
2830         while True:
2831             if interactive_format_selection:
2832                 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2833                                    + '(Press ENTER for default, or Ctrl+C to quit)'
2834                                    + self._format_screen(': ', self.Styles.EMPHASIS))
2835                 try:
2836                     format_selector = self.build_format_selector(req_format) if req_format else None
2837                 except SyntaxError as err:
2838                     self.report_error(err, tb=False, is_error=False)
2839                     continue
2840
2841             if format_selector is None:
2842                 req_format = self._default_format_spec(info_dict, download=download)
2843                 self.write_debug(f'Default format spec: {req_format}')
2844                 format_selector = self.build_format_selector(req_format)
2845
2846             formats_to_download = list(format_selector({
2847                 'formats': formats,
2848                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2849                 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)  # No formats with video
2850                                        or all(f.get('acodec') == 'none' for f in formats)),  # OR, No formats with audio
2851             }))
2852             if interactive_format_selection and not formats_to_download:
2853                 self.report_error('Requested format is not available', tb=False, is_error=False)
2854                 continue
2855             break
2856
2857         if not formats_to_download:
2858             if not self.params.get('ignore_no_formats_error'):
2859                 raise ExtractorError(
2860                     'Requested format is not available. Use --list-formats for a list of available formats',
2861                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2862             self.report_warning('Requested format is not available')
2863             # Process what we can, even without any available formats.
2864             formats_to_download = [{}]
2865
2866         requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2867         best_format, downloaded_formats = formats_to_download[-1], []
2868         if download:
2869             if best_format and requested_ranges:
2870                 def to_screen(*msg):
2871                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2872
2873                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2874                           (f['format_id'] for f in formats_to_download))
2875                 if requested_ranges != ({}, ):
2876                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2877                               (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2878             max_downloads_reached = False
2879
2880             for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2881                 new_info = self._copy_infodict(info_dict)
2882                 new_info.update(fmt)
2883                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2884                 end_time = offset + min(chapter.get('end_time', duration), duration)
2885                 # duration may not be accurate. So allow deviations <1sec
2886                 if end_time == float('inf') or end_time > offset + duration + 1:
2887                     end_time = None
2888                 if chapter or offset:
2889                     new_info.update({
2890                         'section_start': offset + chapter.get('start_time', 0),
2891                         'section_end': end_time,
2892                         'section_title': chapter.get('title'),
2893                         'section_number': chapter.get('index'),
2894                     })
2895                 downloaded_formats.append(new_info)
2896                 try:
2897                     self.process_info(new_info)
2898                 except MaxDownloadsReached:
2899                     max_downloads_reached = True
2900                 self._raise_pending_errors(new_info)
2901                 # Remove copied info
2902                 for key, val in tuple(new_info.items()):
2903                     if info_dict.get(key) == val:
2904                         new_info.pop(key)
2905                 if max_downloads_reached:
2906                     break
2907
2908             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2909             assert write_archive.issubset({True, False, 'ignore'})
2910             if True in write_archive and False not in write_archive:
2911                 self.record_download_archive(info_dict)
2912
2913             info_dict['requested_downloads'] = downloaded_formats
2914             info_dict = self.run_all_pps('after_video', info_dict)
2915             if max_downloads_reached:
2916                 raise MaxDownloadsReached()
2917
2918         # We update the info dict with the selected best quality format (backwards compatibility)
2919         info_dict.update(best_format)
2920         return info_dict
2921
2922     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2923         """Select the requested subtitles and their format"""
2924         available_subs, normal_sub_langs = {}, []
2925         if normal_subtitles and self.params.get('writesubtitles'):
2926             available_subs.update(normal_subtitles)
2927             normal_sub_langs = tuple(normal_subtitles.keys())
2928         if automatic_captions and self.params.get('writeautomaticsub'):
2929             for lang, cap_info in automatic_captions.items():
2930                 if lang not in available_subs:
2931                     available_subs[lang] = cap_info
2932
2933         if not available_subs or (
2934                 not self.params.get('writesubtitles')
2935                 and not self.params.get('writeautomaticsub')):
2936             return None
2937
2938         all_sub_langs = tuple(available_subs.keys())
2939         if self.params.get('allsubtitles', False):
2940             requested_langs = all_sub_langs
2941         elif self.params.get('subtitleslangs', False):
2942             try:
2943                 requested_langs = orderedSet_from_options(
2944                     self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2945             except re.error as e:
2946                 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2947         else:
2948             requested_langs = LazyList(itertools.chain(
2949                 ['en'] if 'en' in normal_sub_langs else [],
2950                 filter(lambda f: f.startswith('en'), normal_sub_langs),
2951                 ['en'] if 'en' in all_sub_langs else [],
2952                 filter(lambda f: f.startswith('en'), all_sub_langs),
2953                 normal_sub_langs, all_sub_langs,
2954             ))[:1]
2955         if requested_langs:
2956             self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2957
2958         formats_query = self.params.get('subtitlesformat', 'best')
2959         formats_preference = formats_query.split('/') if formats_query else []
2960         subs = {}
2961         for lang in requested_langs:
2962             formats = available_subs.get(lang)
2963             if formats is None:
2964                 self.report_warning(f'{lang} subtitles not available for {video_id}')
2965                 continue
2966             for ext in formats_preference:
2967                 if ext == 'best':
2968                     f = formats[-1]
2969                     break
2970                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2971                 if matches:
2972                     f = matches[-1]
2973                     break
2974             else:
2975                 f = formats[-1]
2976                 self.report_warning(
2977                     'No subtitle format found matching "%s" for language %s, '
2978                     'using %s' % (formats_query, lang, f['ext']))
2979             subs[lang] = f
2980         return subs
2981
2982     def _forceprint(self, key, info_dict):
2983         if info_dict is None:
2984             return
2985         info_copy = info_dict.copy()
2986         info_copy.setdefault('filename', self.prepare_filename(info_dict))
2987         if info_dict.get('requested_formats') is not None:
2988             # For RTMP URLs, also include the playpath
2989             info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2990         elif info_dict.get('url'):
2991             info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2992         info_copy['formats_table'] = self.render_formats_table(info_dict)
2993         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2994         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2995         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2996
2997         def format_tmpl(tmpl):
2998             mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
2999             if not mobj:
3000                 return tmpl
3001
3002             fmt = '%({})s'
3003             if tmpl.startswith('{'):
3004                 tmpl, fmt = f'.{tmpl}', '%({})j'
3005             if tmpl.endswith('='):
3006                 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3007             return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3008
3009         for tmpl in self.params['forceprint'].get(key, []):
3010             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3011
3012         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3013             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3014             tmpl = format_tmpl(tmpl)
3015             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3016             if self._ensure_dir_exists(filename):
3017                 with open(filename, 'a', encoding='utf-8', newline='') as f:
3018                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3019
3020         return info_copy
3021
3022     def __forced_printings(self, info_dict, filename=None, incomplete=True):
3023         if (self.params.get('forcejson')
3024                 or self.params['forceprint'].get('video')
3025                 or self.params['print_to_file'].get('video')):
3026             self.post_extract(info_dict)
3027         if filename:
3028             info_dict['filename'] = filename
3029         info_copy = self._forceprint('video', info_dict)
3030
3031         def print_field(field, actual_field=None, optional=False):
3032             if actual_field is None:
3033                 actual_field = field
3034             if self.params.get(f'force{field}') and (
3035                     info_copy.get(field) is not None or (not optional and not incomplete)):
3036                 self.to_stdout(info_copy[actual_field])
3037
3038         print_field('title')
3039         print_field('id')
3040         print_field('url', 'urls')
3041         print_field('thumbnail', optional=True)
3042         print_field('description', optional=True)
3043         print_field('filename')
3044         if self.params.get('forceduration') and info_copy.get('duration') is not None:
3045             self.to_stdout(formatSeconds(info_copy['duration']))
3046         print_field('format')
3047
3048         if self.params.get('forcejson'):
3049             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3050
3051     def dl(self, name, info, subtitle=False, test=False):
3052         if not info.get('url'):
3053             self.raise_no_formats(info, True)
3054
3055         if test:
3056             verbose = self.params.get('verbose')
3057             params = {
3058                 'test': True,
3059                 'quiet': self.params.get('quiet') or not verbose,
3060                 'verbose': verbose,
3061                 'noprogress': not verbose,
3062                 'nopart': True,
3063                 'skip_unavailable_fragments': False,
3064                 'keep_fragments': False,
3065                 'overwrites': True,
3066                 '_no_ytdl_file': True,
3067             }
3068         else:
3069             params = self.params
3070         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3071         if not test:
3072             for ph in self._progress_hooks:
3073                 fd.add_progress_hook(ph)
3074             urls = '", "'.join(
3075                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3076                 for f in info.get('requested_formats', []) or [info])
3077             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3078
3079         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3080         # But it may contain objects that are not deep-copyable
3081         new_info = self._copy_infodict(info)
3082         if new_info.get('http_headers') is None:
3083             new_info['http_headers'] = self._calc_headers(new_info)
3084         return fd.download(name, new_info, subtitle)
3085
3086     def existing_file(self, filepaths, *, default_overwrite=True):
3087         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3088         if existing_files and not self.params.get('overwrites', default_overwrite):
3089             return existing_files[0]
3090
3091         for file in existing_files:
3092             self.report_file_delete(file)
3093             os.remove(file)
3094         return None
3095
3096     def process_info(self, info_dict):
3097         """Process a single resolved IE result. (Modifies it in-place)"""
3098
3099         assert info_dict.get('_type', 'video') == 'video'
3100         original_infodict = info_dict
3101
3102         if 'format' not in info_dict and 'ext' in info_dict:
3103             info_dict['format'] = info_dict['ext']
3104
3105         if self._match_entry(info_dict) is not None:
3106             info_dict['__write_download_archive'] = 'ignore'
3107             return
3108
3109         # Does nothing under normal operation - for backward compatibility of process_info
3110         self.post_extract(info_dict)
3111
3112         def replace_info_dict(new_info):
3113             nonlocal info_dict
3114             if new_info == info_dict:
3115                 return
3116             info_dict.clear()
3117             info_dict.update(new_info)
3118
3119         new_info, _ = self.pre_process(info_dict, 'video')
3120         replace_info_dict(new_info)
3121         self._num_downloads += 1
3122
3123         # info_dict['_filename'] needs to be set for backward compatibility
3124         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3125         temp_filename = self.prepare_filename(info_dict, 'temp')
3126         files_to_move = {}
3127
3128         # Forced printings
3129         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3130
3131         def check_max_downloads():
3132             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3133                 raise MaxDownloadsReached()
3134
3135         if self.params.get('simulate'):
3136             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3137             check_max_downloads()
3138             return
3139
3140         if full_filename is None:
3141             return
3142         if not self._ensure_dir_exists(encodeFilename(full_filename)):
3143             return
3144         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3145             return
3146
3147         if self._write_description('video', info_dict,
3148                                    self.prepare_filename(info_dict, 'description')) is None:
3149             return
3150
3151         sub_files = self._write_subtitles(info_dict, temp_filename)
3152         if sub_files is None:
3153             return
3154         files_to_move.update(dict(sub_files))
3155
3156         thumb_files = self._write_thumbnails(
3157             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3158         if thumb_files is None:
3159             return
3160         files_to_move.update(dict(thumb_files))
3161
3162         infofn = self.prepare_filename(info_dict, 'infojson')
3163         _infojson_written = self._write_info_json('video', info_dict, infofn)
3164         if _infojson_written:
3165             info_dict['infojson_filename'] = infofn
3166             # For backward compatibility, even though it was a private field
3167             info_dict['__infojson_filename'] = infofn
3168         elif _infojson_written is None:
3169             return
3170
3171         # Note: Annotations are deprecated
3172         annofn = None
3173         if self.params.get('writeannotations', False):
3174             annofn = self.prepare_filename(info_dict, 'annotation')
3175         if annofn:
3176             if not self._ensure_dir_exists(encodeFilename(annofn)):
3177                 return
3178             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3179                 self.to_screen('[info] Video annotations are already present')
3180             elif not info_dict.get('annotations'):
3181                 self.report_warning('There are no annotations to write.')
3182             else:
3183                 try:
3184                     self.to_screen('[info] Writing video annotations to: ' + annofn)
3185                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3186                         annofile.write(info_dict['annotations'])
3187                 except (KeyError, TypeError):
3188                     self.report_warning('There are no annotations to write.')
3189                 except OSError:
3190                     self.report_error('Cannot write annotations file: ' + annofn)
3191                     return
3192
3193         # Write internet shortcut files
3194         def _write_link_file(link_type):
3195             url = try_get(info_dict['webpage_url'], iri_to_uri)
3196             if not url:
3197                 self.report_warning(
3198                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3199                 return True
3200             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3201             if not self._ensure_dir_exists(encodeFilename(linkfn)):
3202                 return False
3203             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3204                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3205                 return True
3206             try:
3207                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3208                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3209                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3210                     template_vars = {'url': url}
3211                     if link_type == 'desktop':
3212                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3213                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3214             except OSError:
3215                 self.report_error(f'Cannot write internet shortcut {linkfn}')
3216                 return False
3217             return True
3218
3219         write_links = {
3220             'url': self.params.get('writeurllink'),
3221             'webloc': self.params.get('writewebloclink'),
3222             'desktop': self.params.get('writedesktoplink'),
3223         }
3224         if self.params.get('writelink'):
3225             link_type = ('webloc' if sys.platform == 'darwin'
3226                          else 'desktop' if sys.platform.startswith('linux')
3227                          else 'url')
3228             write_links[link_type] = True
3229
3230         if any(should_write and not _write_link_file(link_type)
3231                for link_type, should_write in write_links.items()):
3232             return
3233
3234         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3235         replace_info_dict(new_info)
3236
3237         if self.params.get('skip_download'):
3238             info_dict['filepath'] = temp_filename
3239             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3240             info_dict['__files_to_move'] = files_to_move
3241             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3242             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3243         else:
3244             # Download
3245             info_dict.setdefault('__postprocessors', [])
3246             try:
3247
3248                 def existing_video_file(*filepaths):
3249                     ext = info_dict.get('ext')
3250                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3251                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3252                                               default_overwrite=False)
3253                     if file:
3254                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3255                     return file
3256
3257                 fd, success = None, True
3258                 if info_dict.get('protocol') or info_dict.get('url'):
3259                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3260                     if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3261                             info_dict.get('section_start') or info_dict.get('section_end')):
3262                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3263                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
3264                         self.report_error(f'{msg}. Aborting')
3265                         return
3266
3267                 if info_dict.get('requested_formats') is not None:
3268                     old_ext = info_dict['ext']
3269                     if self.params.get('merge_output_format') is None:
3270                         if (info_dict['ext'] == 'webm'
3271                                 and info_dict.get('thumbnails')
3272                                 # check with type instead of pp_key, __name__, or isinstance
3273                                 # since we dont want any custom PPs to trigger this
3274                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3275                             info_dict['ext'] = 'mkv'
3276                             self.report_warning(
3277                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3278                     new_ext = info_dict['ext']
3279
3280                     def correct_ext(filename, ext=new_ext):
3281                         if filename == '-':
3282                             return filename
3283                         filename_real_ext = os.path.splitext(filename)[1][1:]
3284                         filename_wo_ext = (
3285                             os.path.splitext(filename)[0]
3286                             if filename_real_ext in (old_ext, new_ext)
3287                             else filename)
3288                         return f'{filename_wo_ext}.{ext}'
3289
3290                     # Ensure filename always has a correct extension for successful merge
3291                     full_filename = correct_ext(full_filename)
3292                     temp_filename = correct_ext(temp_filename)
3293                     dl_filename = existing_video_file(full_filename, temp_filename)
3294
3295                     info_dict['__real_download'] = False
3296                     # NOTE: Copy so that original format dicts are not modified
3297                     info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3298
3299                     merger = FFmpegMergerPP(self)
3300                     downloaded = []
3301                     if dl_filename is not None:
3302                         self.report_file_already_downloaded(dl_filename)
3303                     elif fd:
3304                         for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3305                             f['filepath'] = fname = prepend_extension(
3306                                 correct_ext(temp_filename, info_dict['ext']),
3307                                 'f%s' % f['format_id'], info_dict['ext'])
3308                             downloaded.append(fname)
3309                         info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3310                         success, real_download = self.dl(temp_filename, info_dict)
3311                         info_dict['__real_download'] = real_download
3312                     else:
3313                         if self.params.get('allow_unplayable_formats'):
3314                             self.report_warning(
3315                                 'You have requested merging of multiple formats '
3316                                 'while also allowing unplayable formats to be downloaded. '
3317                                 'The formats won\'t be merged to prevent data corruption.')
3318                         elif not merger.available:
3319                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3320                             if not self.params.get('ignoreerrors'):
3321                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3322                                 return
3323                             self.report_warning(f'{msg}. The formats won\'t be merged')
3324
3325                         if temp_filename == '-':
3326                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3327                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3328                                       else 'but ffmpeg is not installed')
3329                             self.report_warning(
3330                                 f'You have requested downloading multiple formats to stdout {reason}. '
3331                                 'The formats will be streamed one after the other')
3332                             fname = temp_filename
3333                         for f in info_dict['requested_formats']:
3334                             new_info = dict(info_dict)
3335                             del new_info['requested_formats']
3336                             new_info.update(f)
3337                             if temp_filename != '-':
3338                                 fname = prepend_extension(
3339                                     correct_ext(temp_filename, new_info['ext']),
3340                                     'f%s' % f['format_id'], new_info['ext'])
3341                                 if not self._ensure_dir_exists(fname):
3342                                     return
3343                                 f['filepath'] = fname
3344                                 downloaded.append(fname)
3345                             partial_success, real_download = self.dl(fname, new_info)
3346                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3347                             success = success and partial_success
3348
3349                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3350                         info_dict['__postprocessors'].append(merger)
3351                         info_dict['__files_to_merge'] = downloaded
3352                         # Even if there were no downloads, it is being merged only now
3353                         info_dict['__real_download'] = True
3354                     else:
3355                         for file in downloaded:
3356                             files_to_move[file] = None
3357                 else:
3358                     # Just a single file
3359                     dl_filename = existing_video_file(full_filename, temp_filename)
3360                     if dl_filename is None or dl_filename == temp_filename:
3361                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3362                         # So we should try to resume the download
3363                         success, real_download = self.dl(temp_filename, info_dict)
3364                         info_dict['__real_download'] = real_download
3365                     else:
3366                         self.report_file_already_downloaded(dl_filename)
3367
3368                 dl_filename = dl_filename or temp_filename
3369                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3370
3371             except network_exceptions as err:
3372                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3373                 return
3374             except OSError as err:
3375                 raise UnavailableVideoError(err)
3376             except (ContentTooShortError, ) as err:
3377                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3378                 return
3379
3380             self._raise_pending_errors(info_dict)
3381             if success and full_filename != '-':
3382
3383                 def fixup():
3384                     do_fixup = True
3385                     fixup_policy = self.params.get('fixup')
3386                     vid = info_dict['id']
3387
3388                     if fixup_policy in ('ignore', 'never'):
3389                         return
3390                     elif fixup_policy == 'warn':
3391                         do_fixup = 'warn'
3392                     elif fixup_policy != 'force':
3393                         assert fixup_policy in ('detect_or_warn', None)
3394                         if not info_dict.get('__real_download'):
3395                             do_fixup = False
3396
3397                     def ffmpeg_fixup(cndn, msg, cls):
3398                         if not (do_fixup and cndn):
3399                             return
3400                         elif do_fixup == 'warn':
3401                             self.report_warning(f'{vid}: {msg}')
3402                             return
3403                         pp = cls(self)
3404                         if pp.available:
3405                             info_dict['__postprocessors'].append(pp)
3406                         else:
3407                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3408
3409                     stretched_ratio = info_dict.get('stretched_ratio')
3410                     ffmpeg_fixup(stretched_ratio not in (1, None),
3411                                  f'Non-uniform pixel ratio {stretched_ratio}',
3412                                  FFmpegFixupStretchedPP)
3413
3414                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3415                     downloader = downloader.FD_NAME if downloader else None
3416
3417                     ext = info_dict.get('ext')
3418                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3419                         isinstance(pp, FFmpegVideoConvertorPP)
3420                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3421                     ) for pp in self._pps['post_process'])
3422
3423                     if not postprocessed_by_ffmpeg:
3424                         ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3425                                      'writing DASH m4a. Only some players support this container',
3426                                      FFmpegFixupM4aPP)
3427                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3428                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3429                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3430                                      FFmpegFixupM3u8PP)
3431                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
3432                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3433
3434                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3435                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3436
3437                 fixup()
3438                 try:
3439                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3440                 except PostProcessingError as err:
3441                     self.report_error('Postprocessing: %s' % str(err))
3442                     return
3443                 try:
3444                     for ph in self._post_hooks:
3445                         ph(info_dict['filepath'])
3446                 except Exception as err:
3447                     self.report_error('post hooks: %s' % str(err))
3448                     return
3449                 info_dict['__write_download_archive'] = True
3450
3451         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3452         if self.params.get('force_write_download_archive'):
3453             info_dict['__write_download_archive'] = True
3454         check_max_downloads()
3455
3456     def __download_wrapper(self, func):
3457         @functools.wraps(func)
3458         def wrapper(*args, **kwargs):
3459             try:
3460                 res = func(*args, **kwargs)
3461             except UnavailableVideoError as e:
3462                 self.report_error(e)
3463             except DownloadCancelled as e:
3464                 self.to_screen(f'[info] {e}')
3465                 if not self.params.get('break_per_url'):
3466                     raise
3467                 self._num_downloads = 0
3468             else:
3469                 if self.params.get('dump_single_json', False):
3470                     self.post_extract(res)
3471                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3472         return wrapper
3473
3474     def download(self, url_list):
3475         """Download a given list of URLs."""
3476         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3477         outtmpl = self.params['outtmpl']['default']
3478         if (len(url_list) > 1
3479                 and outtmpl != '-'
3480                 and '%' not in outtmpl
3481                 and self.params.get('max_downloads') != 1):
3482             raise SameFileError(outtmpl)
3483
3484         for url in url_list:
3485             self.__download_wrapper(self.extract_info)(
3486                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3487
3488         return self._download_retcode
3489
3490     def download_with_info_file(self, info_filename):
3491         with contextlib.closing(fileinput.FileInput(
3492                 [info_filename], mode='r',
3493                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3494             # FileInput doesn't have a read method, we can't call json.load
3495             infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3496                      for info in variadic(json.loads('\n'.join(f)))]
3497         for info in infos:
3498             self._load_cookies(info.get('cookies'), from_headers=False)
3499             self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False))  # compat
3500             try:
3501                 self.__download_wrapper(self.process_ie_result)(info, download=True)
3502             except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3503                 if not isinstance(e, EntryNotInPlaylist):
3504                     self.to_stderr('\r')
3505                 webpage_url = info.get('webpage_url')
3506                 if webpage_url is None:
3507                     raise
3508                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3509                 self.download([webpage_url])
3510         return self._download_retcode
3511
3512     @staticmethod
3513     def sanitize_info(info_dict, remove_private_keys=False):
3514         ''' Sanitize the infodict for converting to json '''
3515         if info_dict is None:
3516             return info_dict
3517         info_dict.setdefault('epoch', int(time.time()))
3518         info_dict.setdefault('_type', 'video')
3519         info_dict.setdefault('_version', {
3520             'version': __version__,
3521             'current_git_head': current_git_head(),
3522             'release_git_head': RELEASE_GIT_HEAD,
3523             'repository': REPOSITORY,
3524         })
3525
3526         if remove_private_keys:
3527             reject = lambda k, v: v is None or k.startswith('__') or k in {
3528                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3529                 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3530                 'playlist_autonumber', '_format_sort_fields',
3531             }
3532         else:
3533             reject = lambda k, v: False
3534
3535         def filter_fn(obj):
3536             if isinstance(obj, dict):
3537                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3538             elif isinstance(obj, (list, tuple, set, LazyList)):
3539                 return list(map(filter_fn, obj))
3540             elif obj is None or isinstance(obj, (str, int, float, bool)):
3541                 return obj
3542             else:
3543                 return repr(obj)
3544
3545         return filter_fn(info_dict)
3546
3547     @staticmethod
3548     def filter_requested_info(info_dict, actually_filter=True):
3549         ''' Alias of sanitize_info for backward compatibility '''
3550         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3551
3552     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3553         for filename in set(filter(None, files_to_delete)):
3554             if msg:
3555                 self.to_screen(msg % filename)
3556             try:
3557                 os.remove(filename)
3558             except OSError:
3559                 self.report_warning(f'Unable to delete file {filename}')
3560             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3561                 del info['__files_to_move'][filename]
3562
3563     @staticmethod
3564     def post_extract(info_dict):
3565         def actual_post_extract(info_dict):
3566             if info_dict.get('_type') in ('playlist', 'multi_video'):
3567                 for video_dict in info_dict.get('entries', {}):
3568                     actual_post_extract(video_dict or {})
3569                 return
3570
3571             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3572             info_dict.update(post_extractor())
3573
3574         actual_post_extract(info_dict or {})
3575
3576     def run_pp(self, pp, infodict):
3577         files_to_delete = []
3578         if '__files_to_move' not in infodict:
3579             infodict['__files_to_move'] = {}
3580         try:
3581             files_to_delete, infodict = pp.run(infodict)
3582         except PostProcessingError as e:
3583             # Must be True and not 'only_download'
3584             if self.params.get('ignoreerrors') is True:
3585                 self.report_error(e)
3586                 return infodict
3587             raise
3588
3589         if not files_to_delete:
3590             return infodict
3591         if self.params.get('keepvideo', False):
3592             for f in files_to_delete:
3593                 infodict['__files_to_move'].setdefault(f, '')
3594         else:
3595             self._delete_downloaded_files(
3596                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3597         return infodict
3598
3599     def run_all_pps(self, key, info, *, additional_pps=None):
3600         if key != 'video':
3601             self._forceprint(key, info)
3602         for pp in (additional_pps or []) + self._pps[key]:
3603             info = self.run_pp(pp, info)
3604         return info
3605
3606     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3607         info = dict(ie_info)
3608         info['__files_to_move'] = files_to_move or {}
3609         try:
3610             info = self.run_all_pps(key, info)
3611         except PostProcessingError as err:
3612             msg = f'Preprocessing: {err}'
3613             info.setdefault('__pending_error', msg)
3614             self.report_error(msg, is_error=False)
3615         return info, info.pop('__files_to_move', None)
3616
3617     def post_process(self, filename, info, files_to_move=None):
3618         """Run all the postprocessors on the given file."""
3619         info['filepath'] = filename
3620         info['__files_to_move'] = files_to_move or {}
3621         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3622         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3623         del info['__files_to_move']
3624         return self.run_all_pps('after_move', info)
3625
3626     def _make_archive_id(self, info_dict):
3627         video_id = info_dict.get('id')
3628         if not video_id:
3629             return
3630         # Future-proof against any change in case
3631         # and backwards compatibility with prior versions
3632         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3633         if extractor is None:
3634             url = str_or_none(info_dict.get('url'))
3635             if not url:
3636                 return
3637             # Try to find matching extractor for the URL and take its ie_key
3638             for ie_key, ie in self._ies.items():
3639                 if ie.suitable(url):
3640                     extractor = ie_key
3641                     break
3642             else:
3643                 return
3644         return make_archive_id(extractor, video_id)
3645
3646     def in_download_archive(self, info_dict):
3647         if not self.archive:
3648             return False
3649
3650         vid_ids = [self._make_archive_id(info_dict)]
3651         vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3652         return any(id_ in self.archive for id_ in vid_ids)
3653
3654     def record_download_archive(self, info_dict):
3655         fn = self.params.get('download_archive')
3656         if fn is None:
3657             return
3658         vid_id = self._make_archive_id(info_dict)
3659         assert vid_id
3660
3661         self.write_debug(f'Adding to archive: {vid_id}')
3662         if is_path_like(fn):
3663             with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3664                 archive_file.write(vid_id + '\n')
3665         self.archive.add(vid_id)
3666
3667     @staticmethod
3668     def format_resolution(format, default='unknown'):
3669         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3670             return 'audio only'
3671         if format.get('resolution') is not None:
3672             return format['resolution']
3673         if format.get('width') and format.get('height'):
3674             return '%dx%d' % (format['width'], format['height'])
3675         elif format.get('height'):
3676             return '%sp' % format['height']
3677         elif format.get('width'):
3678             return '%dx?' % format['width']
3679         return default
3680
3681     def _list_format_headers(self, *headers):
3682         if self.params.get('listformats_table', True) is not False:
3683             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3684         return headers
3685
3686     def _format_note(self, fdict):
3687         res = ''
3688         if fdict.get('ext') in ['f4f', 'f4m']:
3689             res += '(unsupported)'
3690         if fdict.get('language'):
3691             if res:
3692                 res += ' '
3693             res += '[%s]' % fdict['language']
3694         if fdict.get('format_note') is not None:
3695             if res:
3696                 res += ' '
3697             res += fdict['format_note']
3698         if fdict.get('tbr') is not None:
3699             if res:
3700                 res += ', '
3701             res += '%4dk' % fdict['tbr']
3702         if fdict.get('container') is not None:
3703             if res:
3704                 res += ', '
3705             res += '%s container' % fdict['container']
3706         if (fdict.get('vcodec') is not None
3707                 and fdict.get('vcodec') != 'none'):
3708             if res:
3709                 res += ', '
3710             res += fdict['vcodec']
3711             if fdict.get('vbr') is not None:
3712                 res += '@'
3713         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3714             res += 'video@'
3715         if fdict.get('vbr') is not None:
3716             res += '%4dk' % fdict['vbr']
3717         if fdict.get('fps') is not None:
3718             if res:
3719                 res += ', '
3720             res += '%sfps' % fdict['fps']
3721         if fdict.get('acodec') is not None:
3722             if res:
3723                 res += ', '
3724             if fdict['acodec'] == 'none':
3725                 res += 'video only'
3726             else:
3727                 res += '%-5s' % fdict['acodec']
3728         elif fdict.get('abr') is not None:
3729             if res:
3730                 res += ', '
3731             res += 'audio'
3732         if fdict.get('abr') is not None:
3733             res += '@%3dk' % fdict['abr']
3734         if fdict.get('asr') is not None:
3735             res += ' (%5dHz)' % fdict['asr']
3736         if fdict.get('filesize') is not None:
3737             if res:
3738                 res += ', '
3739             res += format_bytes(fdict['filesize'])
3740         elif fdict.get('filesize_approx') is not None:
3741             if res:
3742                 res += ', '
3743             res += '~' + format_bytes(fdict['filesize_approx'])
3744         return res
3745
3746     def _get_formats(self, info_dict):
3747         if info_dict.get('formats') is None:
3748             if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3749                 return [info_dict]
3750             return []
3751         return info_dict['formats']
3752
3753     def render_formats_table(self, info_dict):
3754         formats = self._get_formats(info_dict)
3755         if not formats:
3756             return
3757         if not self.params.get('listformats_table', True) is not False:
3758             table = [
3759                 [
3760                     format_field(f, 'format_id'),
3761                     format_field(f, 'ext'),
3762                     self.format_resolution(f),
3763                     self._format_note(f)
3764                 ] for f in formats if (f.get('preference') or 0) >= -1000]
3765             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3766
3767         def simplified_codec(f, field):
3768             assert field in ('acodec', 'vcodec')
3769             codec = f.get(field)
3770             if not codec:
3771                 return 'unknown'
3772             elif codec != 'none':
3773                 return '.'.join(codec.split('.')[:4])
3774
3775             if field == 'vcodec' and f.get('acodec') == 'none':
3776                 return 'images'
3777             elif field == 'acodec' and f.get('vcodec') == 'none':
3778                 return ''
3779             return self._format_out('audio only' if field == 'vcodec' else 'video only',
3780                                     self.Styles.SUPPRESS)
3781
3782         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3783         table = [
3784             [
3785                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3786                 format_field(f, 'ext'),
3787                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3788                 format_field(f, 'fps', '\t%d', func=round),
3789                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3790                 format_field(f, 'audio_channels', '\t%s'),
3791                 delim, (
3792                     format_field(f, 'filesize', ' \t%s', func=format_bytes)
3793                     or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3794                     or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3795                                     None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
3796                 format_field(f, 'tbr', '\t%dk', func=round),
3797                 shorten_protocol_name(f.get('protocol', '')),
3798                 delim,
3799                 simplified_codec(f, 'vcodec'),
3800                 format_field(f, 'vbr', '\t%dk', func=round),
3801                 simplified_codec(f, 'acodec'),
3802                 format_field(f, 'abr', '\t%dk', func=round),
3803                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3804                 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3805                     self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3806                     (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3807                      else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3808                     format_field(f, 'format_note'),
3809                     format_field(f, 'container', ignore=(None, f.get('ext'))),
3810                     delim=', '), delim=' '),
3811             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3812         header_line = self._list_format_headers(
3813             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3814             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3815
3816         return render_table(
3817             header_line, table, hide_empty=True,
3818             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3819
3820     def render_thumbnails_table(self, info_dict):
3821         thumbnails = list(info_dict.get('thumbnails') or [])
3822         if not thumbnails:
3823             return None
3824         return render_table(
3825             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3826             [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3827
3828     def render_subtitles_table(self, video_id, subtitles):
3829         def _row(lang, formats):
3830             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3831             if len(set(names)) == 1:
3832                 names = [] if names[0] == 'unknown' else names[:1]
3833             return [lang, ', '.join(names), ', '.join(exts)]
3834
3835         if not subtitles:
3836             return None
3837         return render_table(
3838             self._list_format_headers('Language', 'Name', 'Formats'),
3839             [_row(lang, formats) for lang, formats in subtitles.items()],
3840             hide_empty=True)
3841
3842     def __list_table(self, video_id, name, func, *args):
3843         table = func(*args)
3844         if not table:
3845             self.to_screen(f'{video_id} has no {name}')
3846             return
3847         self.to_screen(f'[info] Available {name} for {video_id}:')
3848         self.to_stdout(table)
3849
3850     def list_formats(self, info_dict):
3851         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3852
3853     def list_thumbnails(self, info_dict):
3854         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3855
3856     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3857         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3858
3859     def urlopen(self, req):
3860         """ Start an HTTP download """
3861         if isinstance(req, str):
3862             req = sanitized_Request(req)
3863         return self._opener.open(req, timeout=self._socket_timeout)
3864
3865     def print_debug_header(self):
3866         if not self.params.get('verbose'):
3867             return
3868
3869         from . import _IN_CLI  # Must be delayed import
3870
3871         # These imports can be slow. So import them only as needed
3872         from .extractor.extractors import _LAZY_LOADER
3873         from .extractor.extractors import (
3874             _PLUGIN_CLASSES as plugin_ies,
3875             _PLUGIN_OVERRIDES as plugin_ie_overrides
3876         )
3877
3878         def get_encoding(stream):
3879             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3880             additional_info = []
3881             if os.environ.get('TERM', '').lower() == 'dumb':
3882                 additional_info.append('dumb')
3883             if not supports_terminal_sequences(stream):
3884                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3885                 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3886             if additional_info:
3887                 ret = f'{ret} ({",".join(additional_info)})'
3888             return ret
3889
3890         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3891             locale.getpreferredencoding(),
3892             sys.getfilesystemencoding(),
3893             self.get_encoding(),
3894             ', '.join(
3895                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3896                 if stream is not None and key != 'console')
3897         )
3898
3899         logger = self.params.get('logger')
3900         if logger:
3901             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3902             write_debug(encoding_str)
3903         else:
3904             write_string(f'[debug] {encoding_str}\n', encoding=None)
3905             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3906
3907         source = detect_variant()
3908         if VARIANT not in (None, 'pip'):
3909             source += '*'
3910         klass = type(self)
3911         write_debug(join_nonempty(
3912             f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3913             f'{CHANNEL}@{__version__}',
3914             f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3915             '' if source == 'unknown' else f'({source})',
3916             '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3917             delim=' '))
3918
3919         if not _IN_CLI:
3920             write_debug(f'params: {self.params}')
3921
3922         if not _LAZY_LOADER:
3923             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3924                 write_debug('Lazy loading extractors is forcibly disabled')
3925             else:
3926                 write_debug('Lazy loading extractors is disabled')
3927         if self.params['compat_opts']:
3928             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3929
3930         if current_git_head():
3931             write_debug(f'Git HEAD: {current_git_head()}')
3932         write_debug(system_identifier())
3933
3934         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3935         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3936         if ffmpeg_features:
3937             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3938
3939         exe_versions['rtmpdump'] = rtmpdump_version()
3940         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3941         exe_str = ', '.join(
3942             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3943         ) or 'none'
3944         write_debug('exe versions: %s' % exe_str)
3945
3946         from .compat.compat_utils import get_package_info
3947         from .dependencies import available_dependencies
3948
3949         write_debug('Optional libraries: %s' % (', '.join(sorted({
3950             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3951         })) or 'none'))
3952
3953         self._setup_opener()
3954         proxy_map = {}
3955         for handler in self._opener.handlers:
3956             if hasattr(handler, 'proxies'):
3957                 proxy_map.update(handler.proxies)
3958         write_debug(f'Proxy map: {proxy_map}')
3959
3960         for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3961             display_list = ['%s%s' % (
3962                 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3963                 for name, klass in plugins.items()]
3964             if plugin_type == 'Extractor':
3965                 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3966                                     for parent, plugins in plugin_ie_overrides.items())
3967             if not display_list:
3968                 continue
3969             write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3970
3971         plugin_dirs = plugin_directories()
3972         if plugin_dirs:
3973             write_debug(f'Plugin directories: {plugin_dirs}')
3974
3975         # Not implemented
3976         if False and self.params.get('call_home'):
3977             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3978             write_debug('Public IP address: %s' % ipaddr)
3979             latest_version = self.urlopen(
3980                 'https://yt-dl.org/latest/version').read().decode()
3981             if version_tuple(latest_version) > version_tuple(__version__):
3982                 self.report_warning(
3983                     'You are using an outdated version (newest version: %s)! '
3984                     'See https://yt-dl.org/update if you need help updating.' %
3985                     latest_version)
3986
3987     def _setup_opener(self):
3988         if hasattr(self, '_opener'):
3989             return
3990         timeout_val = self.params.get('socket_timeout')
3991         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3992
3993         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3994         opts_cookiefile = self.params.get('cookiefile')
3995         opts_proxy = self.params.get('proxy')
3996
3997         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3998
3999         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
4000         if opts_proxy is not None:
4001             if opts_proxy == '':
4002                 proxies = {}
4003             else:
4004                 proxies = {'http': opts_proxy, 'https': opts_proxy}
4005         else:
4006             proxies = urllib.request.getproxies()
4007             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
4008             if 'http' in proxies and 'https' not in proxies:
4009                 proxies['https'] = proxies['http']
4010         proxy_handler = PerRequestProxyHandler(proxies)
4011
4012         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
4013         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
4014         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
4015         redirect_handler = YoutubeDLRedirectHandler()
4016         data_handler = urllib.request.DataHandler()
4017
4018         # When passing our own FileHandler instance, build_opener won't add the
4019         # default FileHandler and allows us to disable the file protocol, which
4020         # can be used for malicious purposes (see
4021         # https://github.com/ytdl-org/youtube-dl/issues/8227)
4022         file_handler = urllib.request.FileHandler()
4023
4024         if not self.params.get('enable_file_urls'):
4025             def file_open(*args, **kwargs):
4026                 raise urllib.error.URLError(
4027                     'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
4028                     'Use --enable-file-urls to enable at your own risk.')
4029             file_handler.file_open = file_open
4030
4031         opener = urllib.request.build_opener(
4032             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
4033
4034         # Delete the default user-agent header, which would otherwise apply in
4035         # cases where our custom HTTP handler doesn't come into play
4036         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
4037         opener.addheaders = []
4038         self._opener = opener
4039
4040     def encode(self, s):
4041         if isinstance(s, bytes):
4042             return s  # Already encoded
4043
4044         try:
4045             return s.encode(self.get_encoding())
4046         except UnicodeEncodeError as err:
4047             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4048             raise
4049
4050     def get_encoding(self):
4051         encoding = self.params.get('encoding')
4052         if encoding is None:
4053             encoding = preferredencoding()
4054         return encoding
4055
4056     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4057         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4058         if overwrite is None:
4059             overwrite = self.params.get('overwrites', True)
4060         if not self.params.get('writeinfojson'):
4061             return False
4062         elif not infofn:
4063             self.write_debug(f'Skipping writing {label} infojson')
4064             return False
4065         elif not self._ensure_dir_exists(infofn):
4066             return None
4067         elif not overwrite and os.path.exists(infofn):
4068             self.to_screen(f'[info] {label.title()} metadata is already present')
4069             return 'exists'
4070
4071         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4072         try:
4073             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4074             return True
4075         except OSError:
4076             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4077             return None
4078
4079     def _write_description(self, label, ie_result, descfn):
4080         ''' Write description and returns True = written, False = skip, None = error '''
4081         if not self.params.get('writedescription'):
4082             return False
4083         elif not descfn:
4084             self.write_debug(f'Skipping writing {label} description')
4085             return False
4086         elif not self._ensure_dir_exists(descfn):
4087             return None
4088         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4089             self.to_screen(f'[info] {label.title()} description is already present')
4090         elif ie_result.get('description') is None:
4091             self.to_screen(f'[info] There\'s no {label} description to write')
4092             return False
4093         else:
4094             try:
4095                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4096                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4097                     descfile.write(ie_result['description'])
4098             except OSError:
4099                 self.report_error(f'Cannot write {label} description file {descfn}')
4100                 return None
4101         return True
4102
4103     def _write_subtitles(self, info_dict, filename):
4104         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4105         ret = []
4106         subtitles = info_dict.get('requested_subtitles')
4107         if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4108             # subtitles download errors are already managed as troubles in relevant IE
4109             # that way it will silently go on when used with unsupporting IE
4110             return ret
4111         elif not subtitles:
4112             self.to_screen('[info] There are no subtitles for the requested languages')
4113             return ret
4114         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4115         if not sub_filename_base:
4116             self.to_screen('[info] Skipping writing video subtitles')
4117             return ret
4118
4119         for sub_lang, sub_info in subtitles.items():
4120             sub_format = sub_info['ext']
4121             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4122             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4123             existing_sub = self.existing_file((sub_filename_final, sub_filename))
4124             if existing_sub:
4125                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4126                 sub_info['filepath'] = existing_sub
4127                 ret.append((existing_sub, sub_filename_final))
4128                 continue
4129
4130             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4131             if sub_info.get('data') is not None:
4132                 try:
4133                     # Use newline='' to prevent conversion of newline characters
4134                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
4135                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4136                         subfile.write(sub_info['data'])
4137                     sub_info['filepath'] = sub_filename
4138                     ret.append((sub_filename, sub_filename_final))
4139                     continue
4140                 except OSError:
4141                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
4142                     return None
4143
4144             try:
4145                 sub_copy = sub_info.copy()
4146                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4147                 self.dl(sub_filename, sub_copy, subtitle=True)
4148                 sub_info['filepath'] = sub_filename
4149                 ret.append((sub_filename, sub_filename_final))
4150             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4151                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4152                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
4153                     if not self.params.get('ignoreerrors'):
4154                         self.report_error(msg)
4155                     raise DownloadError(msg)
4156                 self.report_warning(msg)
4157         return ret
4158
4159     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4160         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4161         write_all = self.params.get('write_all_thumbnails', False)
4162         thumbnails, ret = [], []
4163         if write_all or self.params.get('writethumbnail', False):
4164             thumbnails = info_dict.get('thumbnails') or []
4165             if not thumbnails:
4166                 self.to_screen(f'[info] There are no {label} thumbnails to download')
4167                 return ret
4168         multiple = write_all and len(thumbnails) > 1
4169
4170         if thumb_filename_base is None:
4171             thumb_filename_base = filename
4172         if thumbnails and not thumb_filename_base:
4173             self.write_debug(f'Skipping writing {label} thumbnail')
4174             return ret
4175
4176         for idx, t in list(enumerate(thumbnails))[::-1]:
4177             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4178             thumb_display_id = f'{label} thumbnail {t["id"]}'
4179             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4180             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4181
4182             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4183             if existing_thumb:
4184                 self.to_screen('[info] %s is already present' % (
4185                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4186                 t['filepath'] = existing_thumb
4187                 ret.append((existing_thumb, thumb_filename_final))
4188             else:
4189                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4190                 try:
4191                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
4192                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4193                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4194                         shutil.copyfileobj(uf, thumbf)
4195                     ret.append((thumb_filename, thumb_filename_final))
4196                     t['filepath'] = thumb_filename
4197                 except network_exceptions as err:
4198                     if isinstance(err, urllib.error.HTTPError) and err.code == 404:
4199                         self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4200                     else:
4201                         self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4202                     thumbnails.pop(idx)
4203             if ret and not write_all:
4204                 break
4205         return ret