yt_dlp/YoutubeDL.py

   1 import collections
   2 import contextlib
   3 import datetime
   4 import errno
   5 import fileinput
   6 import functools
   7 import io
   8 import itertools
   9 import json
  10 import locale
  11 import operator
  12 import os
  13 import random
  14 import re
  15 import shutil
  16 import string
  17 import subprocess
  18 import sys
  19 import tempfile
  20 import time
  21 import tokenize
  22 import traceback
  23 import unicodedata
  24 import urllib.request
  25
  26 from .cache import Cache
  27 from .compat import compat_os_name, compat_shlex_quote
  28 from .cookies import load_cookies
  29 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  30 from .downloader.rtmp import rtmpdump_version
  31 from .extractor import gen_extractor_classes, get_info_extractor
  32 from .extractor.common import UnsupportedURLIE
  33 from .extractor.openload import PhantomJSwrapper
  34 from .minicurses import format_text
  35 from .plugins import directories as plugin_directories
  36 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
  37 from .postprocessor import (
  38     EmbedThumbnailPP,
  39     FFmpegFixupDuplicateMoovPP,
  40     FFmpegFixupDurationPP,
  41     FFmpegFixupM3u8PP,
  42     FFmpegFixupM4aPP,
  43     FFmpegFixupStretchedPP,
  44     FFmpegFixupTimestampPP,
  45     FFmpegMergerPP,
  46     FFmpegPostProcessor,
  47     FFmpegVideoConvertorPP,
  48     MoveFilesAfterDownloadPP,
  49     get_postprocessor,
  50 )
  51 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
  52 from .update import REPOSITORY, current_git_head, detect_variant
  53 from .utils import (
  54     DEFAULT_OUTTMPL,
  55     IDENTITY,
  56     LINK_TEMPLATES,
  57     MEDIA_EXTENSIONS,
  58     NO_DEFAULT,
  59     NUMBER_RE,
  60     OUTTMPL_TYPES,
  61     POSTPROCESS_WHEN,
  62     STR_FORMAT_RE_TMPL,
  63     STR_FORMAT_TYPES,
  64     ContentTooShortError,
  65     DateRange,
  66     DownloadCancelled,
  67     DownloadError,
  68     EntryNotInPlaylist,
  69     ExistingVideoReached,
  70     ExtractorError,
  71     FormatSorter,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     ISO3166Utils,
  75     LazyList,
  76     MaxDownloadsReached,
  77     Namespace,
  78     PagedList,
  79     PerRequestProxyHandler,
  80     PlaylistEntries,
  81     Popen,
  82     PostProcessingError,
  83     ReExtractInfo,
  84     RejectedVideoReached,
  85     SameFileError,
  86     UnavailableVideoError,
  87     UserNotLive,
  88     YoutubeDLCookieProcessor,
  89     YoutubeDLHandler,
  90     YoutubeDLRedirectHandler,
  91     age_restricted,
  92     args_to_str,
  93     bug_reports_message,
  94     date_from_str,
  95     deprecation_warning,
  96     determine_ext,
  97     determine_protocol,
  98     encode_compat_str,
  99     encodeFilename,
 100     error_to_compat_str,
 101     escapeHTML,
 102     expand_path,
 103     filter_dict,
 104     float_or_none,
 105     format_bytes,
 106     format_decimal_suffix,
 107     format_field,
 108     formatSeconds,
 109     get_compatible_ext,
 110     get_domain,
 111     int_or_none,
 112     iri_to_uri,
 113     is_path_like,
 114     join_nonempty,
 115     locked_file,
 116     make_archive_id,
 117     make_dir,
 118     make_HTTPS_handler,
 119     merge_headers,
 120     network_exceptions,
 121     number_of_digits,
 122     orderedSet,
 123     orderedSet_from_options,
 124     parse_filesize,
 125     preferredencoding,
 126     prepend_extension,
 127     register_socks_protocols,
 128     remove_terminal_sequences,
 129     render_table,
 130     replace_extension,
 131     sanitize_filename,
 132     sanitize_path,
 133     sanitize_url,
 134     sanitized_Request,
 135     std_headers,
 136     str_or_none,
 137     strftime_or_none,
 138     subtitles_filename,
 139     supports_terminal_sequences,
 140     system_identifier,
 141     timetuple_from_msec,
 142     to_high_limit_path,
 143     traverse_obj,
 144     try_call,
 145     try_get,
 146     url_basename,
 147     variadic,
 148     version_tuple,
 149     windows_enable_vt_mode,
 150     write_json_file,
 151     write_string,
 152 )
 153 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
 154
 155 if compat_os_name == 'nt':
 156     import ctypes
 157
 158
 159 class YoutubeDL:
 160     """YoutubeDL class.
 161
 162     YoutubeDL objects are the ones responsible of downloading the
 163     actual video file and writing it to disk if the user has requested
 164     it, among some other tasks. In most cases there should be one per
 165     program. As, given a video URL, the downloader doesn't know how to
 166     extract all the needed information, task that InfoExtractors do, it
 167     has to pass the URL to one of them.
 168
 169     For this, YoutubeDL objects have a method that allows
 170     InfoExtractors to be registered in a given order. When it is passed
 171     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 172     finds that reports being able to handle it. The InfoExtractor extracts
 173     all the information about the video or videos the URL refers to, and
 174     YoutubeDL process the extracted information, possibly using a File
 175     Downloader to download the video.
 176
 177     YoutubeDL objects accept a lot of parameters. In order not to saturate
 178     the object constructor with arguments, it receives a dictionary of
 179     options instead. These options are available through the params
 180     attribute for the InfoExtractors to use. The YoutubeDL also
 181     registers itself as the downloader in charge for the InfoExtractors
 182     that are added to it, so this is a "mutual registration".
 183
 184     Available options:
 185
 186     username:          Username for authentication purposes.
 187     password:          Password for authentication purposes.
 188     videopassword:     Password for accessing a video.
 189     ap_mso:            Adobe Pass multiple-system operator identifier.
 190     ap_username:       Multiple-system operator account username.
 191     ap_password:       Multiple-system operator account password.
 192     usenetrc:          Use netrc for authentication instead.
 193     netrc_location:    Location of the netrc file. Defaults to ~/.netrc.
 194     verbose:           Print additional info to stdout.
 195     quiet:             Do not print messages to stdout.
 196     no_warnings:       Do not print out anything for warnings.
 197     forceprint:        A dict with keys WHEN mapped to a list of templates to
 198                        print to stdout. The allowed keys are video or any of the
 199                        items in utils.POSTPROCESS_WHEN.
 200                        For compatibility, a single list is also accepted
 201     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 202                        a list of tuples with (template, filename)
 203     forcejson:         Force printing info_dict as JSON.
 204     dump_single_json:  Force printing the info_dict of the whole playlist
 205                        (or video) as a single JSON line.
 206     force_write_download_archive: Force writing download archive regardless
 207                        of 'skip_download' or 'simulate'.
 208     simulate:          Do not download the video files. If unset (or None),
 209                        simulate only if listsubtitles, listformats or list_thumbnails is used
 210     format:            Video format code. see "FORMAT SELECTION" for more details.
 211                        You can also pass a function. The function takes 'ctx' as
 212                        argument and returns the formats to download.
 213                        See "build_format_selector" for an implementation
 214     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 215     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 216                        extracting metadata even if the video is not actually
 217                        available for download (experimental)
 218     format_sort:       A list of fields by which to sort the video formats.
 219                        See "Sorting Formats" for more details.
 220     format_sort_force: Force the given format_sort. see "Sorting Formats"
 221                        for more details.
 222     prefer_free_formats: Whether to prefer video formats with free containers
 223                        over non-free ones of same quality.
 224     allow_multiple_video_streams:   Allow multiple video streams to be merged
 225                        into a single file
 226     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 227                        into a single file
 228     check_formats      Whether to test if the formats are downloadable.
 229                        Can be True (check all), False (check none),
 230                        'selected' (check selected formats),
 231                        or None (check only if requested by extractor)
 232     paths:             Dictionary of output paths. The allowed keys are 'home'
 233                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 234     outtmpl:           Dictionary of templates for output names. Allowed keys
 235                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 236                        For compatibility with youtube-dl, a single string can also be used
 237     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 238     restrictfilenames: Do not allow "&" and spaces in file names
 239     trim_file_name:    Limit length of filename (extension excluded)
 240     windowsfilenames:  Force the filenames to be windows compatible
 241     ignoreerrors:      Do not stop on download/postprocessing errors.
 242                        Can be 'only_download' to ignore only download errors.
 243                        Default is 'only_download' for CLI, but False for API
 244     skip_playlist_after_errors: Number of allowed failures until the rest of
 245                        the playlist is skipped
 246     allowed_extractors:  List of regexes to match against extractor names that are allowed
 247     overwrites:        Overwrite all video and metadata files if True,
 248                        overwrite only non-video files if None
 249                        and don't overwrite any file if False
 250                        For compatibility with youtube-dl,
 251                        "nooverwrites" may also be used instead
 252     playlist_items:    Specific indices of playlist to download.
 253     playlistrandom:    Download playlist items in random order.
 254     lazy_playlist:     Process playlist entries as they are received.
 255     matchtitle:        Download only matching titles.
 256     rejecttitle:       Reject downloads for matching titles.
 257     logger:            Log messages to a logging.Logger instance.
 258     logtostderr:       Print everything to stderr instead of stdout.
 259     consoletitle:      Display progress in console window's titlebar.
 260     writedescription:  Write the video description to a .description file
 261     writeinfojson:     Write the video description to a .info.json file
 262     clean_infojson:    Remove private fields from the infojson
 263     getcomments:       Extract video comments. This will not be written to disk
 264                        unless writeinfojson is also given
 265     writeannotations:  Write the video annotations to a .annotations.xml file
 266     writethumbnail:    Write the thumbnail image to a file
 267     allow_playlist_files: Whether to write playlists' description, infojson etc
 268                        also to disk when using the 'write*' options
 269     write_all_thumbnails:  Write all thumbnail formats to files
 270     writelink:         Write an internet shortcut file, depending on the
 271                        current platform (.url/.webloc/.desktop)
 272     writeurllink:      Write a Windows internet shortcut file (.url)
 273     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 274     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 275     writesubtitles:    Write the video subtitles to a file
 276     writeautomaticsub: Write the automatically generated subtitles to a file
 277     listsubtitles:     Lists all available subtitles for the video
 278     subtitlesformat:   The format code for subtitles
 279     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 280                        The list may contain "all" to refer to all the available
 281                        subtitles. The language can be prefixed with a "-" to
 282                        exclude it from the requested languages, e.g. ['all', '-live_chat']
 283     keepvideo:         Keep the video file after post-processing
 284     daterange:         A DateRange object, download only if the upload_date is in the range.
 285     skip_download:     Skip the actual download of the video file
 286     cachedir:          Location of the cache files in the filesystem.
 287                        False to disable filesystem cache.
 288     noplaylist:        Download single video instead of a playlist if in doubt.
 289     age_limit:         An integer representing the user's age in years.
 290                        Unsuitable videos for the given age are skipped.
 291     min_views:         An integer representing the minimum view count the video
 292                        must have in order to not be skipped.
 293                        Videos without view count information are always
 294                        downloaded. None for no limit.
 295     max_views:         An integer representing the maximum view count.
 296                        Videos that are more popular than that are not
 297                        downloaded.
 298                        Videos without view count information are always
 299                        downloaded. None for no limit.
 300     download_archive:  A set, or the name of a file where all downloads are recorded.
 301                        Videos already present in the file are not downloaded again.
 302     break_on_existing: Stop the download process after attempting to download a
 303                        file that is in the archive.
 304     break_per_url:     Whether break_on_reject and break_on_existing
 305                        should act on each input URL as opposed to for the entire queue
 306     cookiefile:        File name or text stream from where cookies should be read and dumped to
 307     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 308                        name/path from where cookies are loaded, the name of the keyring,
 309                        and the container name, e.g. ('chrome', ) or
 310                        ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
 311     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 312                        support RFC 5746 secure renegotiation
 313     nocheckcertificate:  Do not verify SSL certificates
 314     client_certificate:  Path to client certificate file in PEM format. May include the private key
 315     client_certificate_key:  Path to private key file for client certificate
 316     client_certificate_password:  Password for client certificate private key, if encrypted.
 317                         If not provided and the key is encrypted, yt-dlp will ask interactively
 318     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 319                        (Only supported by some extractors)
 320     enable_file_urls:  Enable file:// URLs. This is disabled by default for security reasons.
 321     http_headers:      A dictionary of custom headers to be used for all requests
 322     proxy:             URL of the proxy server to use
 323     geo_verification_proxy:  URL of the proxy to use for IP address verification
 324                        on geo-restricted sites.
 325     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 326     bidi_workaround:   Work around buggy terminals without bidirectional text
 327                        support, using fridibi
 328     debug_printtraffic:Print out sent and received HTTP traffic
 329     default_search:    Prepend this string if an input url is not valid.
 330                        'auto' for elaborate guessing
 331     encoding:          Use this encoding instead of the system-specified.
 332     extract_flat:      Whether to resolve and process url_results further
 333                        * False:     Always process (default)
 334                        * True:      Never process
 335                        * 'in_playlist': Do not process inside playlist/multi_video
 336                        * 'discard': Always process, but don't return the result
 337                                     from inside playlist/multi_video
 338                        * 'discard_in_playlist': Same as "discard", but only for
 339                                     playlists (not multi_video)
 340     wait_for_video:    If given, wait for scheduled streams to become available.
 341                        The value should be a tuple containing the range
 342                        (min_secs, max_secs) to wait between retries
 343     postprocessors:    A list of dictionaries, each with an entry
 344                        * key:  The name of the postprocessor. See
 345                                yt_dlp/postprocessor/__init__.py for a list.
 346                        * when: When to run the postprocessor. Allowed values are
 347                                the entries of utils.POSTPROCESS_WHEN
 348                                Assumed to be 'post_process' if not given
 349     progress_hooks:    A list of functions that get called on download
 350                        progress, with a dictionary with the entries
 351                        * status: One of "downloading", "error", or "finished".
 352                                  Check this first and ignore unknown values.
 353                        * info_dict: The extracted info_dict
 354
 355                        If status is one of "downloading", or "finished", the
 356                        following properties may also be present:
 357                        * filename: The final filename (always present)
 358                        * tmpfilename: The filename we're currently writing to
 359                        * downloaded_bytes: Bytes on disk
 360                        * total_bytes: Size of the whole file, None if unknown
 361                        * total_bytes_estimate: Guess of the eventual file size,
 362                                                None if unavailable.
 363                        * elapsed: The number of seconds since download started.
 364                        * eta: The estimated time in seconds, None if unknown
 365                        * speed: The download speed in bytes/second, None if
 366                                 unknown
 367                        * fragment_index: The counter of the currently
 368                                          downloaded video fragment.
 369                        * fragment_count: The number of fragments (= individual
 370                                          files that will be merged)
 371
 372                        Progress hooks are guaranteed to be called at least once
 373                        (with status "finished") if the download is successful.
 374     postprocessor_hooks:  A list of functions that get called on postprocessing
 375                        progress, with a dictionary with the entries
 376                        * status: One of "started", "processing", or "finished".
 377                                  Check this first and ignore unknown values.
 378                        * postprocessor: Name of the postprocessor
 379                        * info_dict: The extracted info_dict
 380
 381                        Progress hooks are guaranteed to be called at least twice
 382                        (with status "started" and "finished") if the processing is successful.
 383     merge_output_format: "/" separated list of extensions to use when merging formats.
 384     final_ext:         Expected final extension; used to detect when the file was
 385                        already downloaded and converted
 386     fixup:             Automatically correct known faults of the file.
 387                        One of:
 388                        - "never": do nothing
 389                        - "warn": only emit a warning
 390                        - "detect_or_warn": check whether we can do anything
 391                                            about it, warn otherwise (default)
 392     source_address:    Client-side IP address to bind to.
 393     sleep_interval_requests: Number of seconds to sleep between requests
 394                        during extraction
 395     sleep_interval:    Number of seconds to sleep before each download when
 396                        used alone or a lower bound of a range for randomized
 397                        sleep before each download (minimum possible number
 398                        of seconds to sleep) when used along with
 399                        max_sleep_interval.
 400     max_sleep_interval:Upper bound of a range for randomized sleep before each
 401                        download (maximum possible number of seconds to sleep).
 402                        Must only be used along with sleep_interval.
 403                        Actual sleep time will be a random float from range
 404                        [sleep_interval; max_sleep_interval].
 405     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 406     listformats:       Print an overview of available video formats and exit.
 407     list_thumbnails:   Print a table of all thumbnails and exit.
 408     match_filter:      A function that gets called for every video with the signature
 409                        (info_dict, *, incomplete: bool) -> Optional[str]
 410                        For backward compatibility with youtube-dl, the signature
 411                        (info_dict) -> Optional[str] is also allowed.
 412                        - If it returns a message, the video is ignored.
 413                        - If it returns None, the video is downloaded.
 414                        - If it returns utils.NO_DEFAULT, the user is interactively
 415                          asked whether to download the video.
 416                        - Raise utils.DownloadCancelled(msg) to abort remaining
 417                          downloads when a video is rejected.
 418                        match_filter_func in utils.py is one example for this.
 419     no_color:          Do not emit color codes in output.
 420     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 421                        HTTP header
 422     geo_bypass_country:
 423                        Two-letter ISO 3166-2 country code that will be used for
 424                        explicit geographic restriction bypassing via faking
 425                        X-Forwarded-For HTTP header
 426     geo_bypass_ip_block:
 427                        IP range in CIDR notation that will be used similarly to
 428                        geo_bypass_country
 429     external_downloader: A dictionary of protocol keys and the executable of the
 430                        external downloader to use for it. The allowed protocols
 431                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 432                        Set the value to 'native' to use the native downloader
 433     compat_opts:       Compatibility options. See "Differences in default behavior".
 434                        The following options do not work when used through the API:
 435                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 436                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 437                        Refer __init__.py for their implementation
 438     progress_template: Dictionary of templates for progress outputs.
 439                        Allowed keys are 'download', 'postprocess',
 440                        'download-title' (console title) and 'postprocess-title'.
 441                        The template is mapped on a dictionary with keys 'progress' and 'info'
 442     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 443                        as argument and returns the time to sleep in seconds.
 444                        Allowed keys are 'http', 'fragment', 'file_access'
 445     download_ranges:   A callback function that gets called for every video with
 446                        the signature (info_dict, ydl) -> Iterable[Section].
 447                        Only the returned sections will be downloaded.
 448                        Each Section is a dict with the following keys:
 449                        * start_time: Start time of the section in seconds
 450                        * end_time: End time of the section in seconds
 451                        * title: Section title (Optional)
 452                        * index: Section number (Optional)
 453     force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
 454     noprogress:        Do not print the progress bar
 455     live_from_start:   Whether to download livestreams videos from the start
 456
 457     The following parameters are not used by YoutubeDL itself, they are used by
 458     the downloader (see yt_dlp/downloader/common.py):
 459     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 460     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 461     continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 462     external_downloader_args, concurrent_fragment_downloads.
 463
 464     The following options are used by the post processors:
 465     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 466                        to the binary or its containing directory.
 467     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 468                        and a list of additional command-line arguments for the
 469                        postprocessor/executable. The dict can also have "PP+EXE" keys
 470                        which are used when the given exe is used by the given PP.
 471                        Use 'default' as the name for arguments to passed to all PP
 472                        For compatibility with youtube-dl, a single list of args
 473                        can also be used
 474
 475     The following options are used by the extractors:
 476     extractor_retries: Number of times to retry for known errors
 477     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 478     hls_split_discontinuity: Split HLS playlists to different formats at
 479                        discontinuities such as ad breaks (default: False)
 480     extractor_args:    A dictionary of arguments to be passed to the extractors.
 481                        See "EXTRACTOR ARGUMENTS" for details.
 482                        E.g. {'youtube': {'skip': ['dash', 'hls']}}
 483     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 484
 485     The following options are deprecated and may be removed in the future:
 486
 487     break_on_reject:   Stop the download process when encountering a video that
 488                        has been filtered out.
 489                        - `raise DownloadCancelled(msg)` in match_filter instead
 490     force_generic_extractor: Force downloader to use the generic extractor
 491                        - Use allowed_extractors = ['generic', 'default']
 492     playliststart:     - Use playlist_items
 493                        Playlist item to start at.
 494     playlistend:       - Use playlist_items
 495                        Playlist item to end at.
 496     playlistreverse:   - Use playlist_items
 497                        Download playlist items in reverse order.
 498     forceurl:          - Use forceprint
 499                        Force printing final URL.
 500     forcetitle:        - Use forceprint
 501                        Force printing title.
 502     forceid:           - Use forceprint
 503                        Force printing ID.
 504     forcethumbnail:    - Use forceprint
 505                        Force printing thumbnail URL.
 506     forcedescription:  - Use forceprint
 507                        Force printing description.
 508     forcefilename:     - Use forceprint
 509                        Force printing final filename.
 510     forceduration:     - Use forceprint
 511                        Force printing duration.
 512     allsubtitles:      - Use subtitleslangs = ['all']
 513                        Downloads all the subtitles of the video
 514                        (requires writesubtitles or writeautomaticsub)
 515     include_ads:       - Doesn't work
 516                        Download ads as well
 517     call_home:         - Not implemented
 518                        Boolean, true iff we are allowed to contact the
 519                        yt-dlp servers for debugging.
 520     post_hooks:        - Register a custom postprocessor
 521                        A list of functions that get called as the final step
 522                        for each video file, after all postprocessors have been
 523                        called. The filename will be passed as the only argument.
 524     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 525                        Use the native HLS downloader instead of ffmpeg/avconv
 526                        if True, otherwise use ffmpeg/avconv if False, otherwise
 527                        use downloader suggested by extractor if None.
 528     prefer_ffmpeg:     - avconv support is deprecated
 529                        If False, use avconv instead of ffmpeg if both are available,
 530                        otherwise prefer ffmpeg.
 531     youtube_include_dash_manifest: - Use extractor_args
 532                        If True (default), DASH manifests and related
 533                        data will be downloaded and processed by extractor.
 534                        You can reduce network I/O by disabling it if you don't
 535                        care about DASH. (only for youtube)
 536     youtube_include_hls_manifest: - Use extractor_args
 537                        If True (default), HLS manifests and related
 538                        data will be downloaded and processed by extractor.
 539                        You can reduce network I/O by disabling it if you don't
 540                        care about HLS. (only for youtube)
 541     """
 542
 543     _NUMERIC_FIELDS = {
 544         'width', 'height', 'asr', 'audio_channels', 'fps',
 545         'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
 546         'timestamp', 'release_timestamp',
 547         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 548         'average_rating', 'comment_count', 'age_limit',
 549         'start_time', 'end_time',
 550         'chapter_number', 'season_number', 'episode_number',
 551         'track_number', 'disc_number', 'release_year',
 552     }
 553
 554     _format_fields = {
 555         # NB: Keep in sync with the docstring of extractor/common.py
 556         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 557         'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
 558         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
 559         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 560         'preference', 'language', 'language_preference', 'quality', 'source_preference',
 561         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
 562         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 563     }
 564     _format_selection_exts = {
 565         'audio': set(MEDIA_EXTENSIONS.common_audio),
 566         'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
 567         'storyboards': set(MEDIA_EXTENSIONS.storyboards),
 568     }
 569
 570     def __init__(self, params=None, auto_init=True):
 571         """Create a FileDownloader object with the given options.
 572         @param auto_init    Whether to load the default extractors and print header (if verbose).
 573                             Set to 'no_verbose_header' to not print the header
 574         """
 575         if params is None:
 576             params = {}
 577         self.params = params
 578         self._ies = {}
 579         self._ies_instances = {}
 580         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 581         self._printed_messages = set()
 582         self._first_webpage_request = True
 583         self._post_hooks = []
 584         self._progress_hooks = []
 585         self._postprocessor_hooks = []
 586         self._download_retcode = 0
 587         self._num_downloads = 0
 588         self._num_videos = 0
 589         self._playlist_level = 0
 590         self._playlist_urls = set()
 591         self.cache = Cache(self)
 592
 593         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 594         self._out_files = Namespace(
 595             out=stdout,
 596             error=sys.stderr,
 597             screen=sys.stderr if self.params.get('quiet') else stdout,
 598             console=None if compat_os_name == 'nt' else next(
 599                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 600         )
 601
 602         try:
 603             windows_enable_vt_mode()
 604         except Exception as e:
 605             self.write_debug(f'Failed to enable VT mode: {e}')
 606
 607         self._allow_colors = Namespace(**{
 608             type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
 609             for type_, stream in self._out_files.items_ if type_ != 'console'
 610         })
 611
 612         # The code is left like this to be reused for future deprecations
 613         MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
 614         current_version = sys.version_info[:2]
 615         if current_version < MIN_RECOMMENDED:
 616             msg = ('Support for Python version %d.%d has been deprecated. '
 617                    'See  https://github.com/yt-dlp/yt-dlp/issues/3764  for more details.'
 618                    '\n                    You will no longer receive updates on this version')
 619             if current_version < MIN_SUPPORTED:
 620                 msg = 'Python version %d.%d is no longer supported'
 621             self.deprecated_feature(
 622                 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
 623
 624         if self.params.get('allow_unplayable_formats'):
 625             self.report_warning(
 626                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 627                 'This is a developer option intended for debugging. \n'
 628                 '         If you experience any issues while using this option, '
 629                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 630
 631         if self.params.get('bidi_workaround', False):
 632             try:
 633                 import pty
 634                 master, slave = pty.openpty()
 635                 width = shutil.get_terminal_size().columns
 636                 width_args = [] if width is None else ['-w', str(width)]
 637                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 638                 try:
 639                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 640                 except OSError:
 641                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 642                 self._output_channel = os.fdopen(master, 'rb')
 643             except OSError as ose:
 644                 if ose.errno == errno.ENOENT:
 645                     self.report_warning(
 646                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 647                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 648                 else:
 649                     raise
 650
 651         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 652         if auto_init and auto_init != 'no_verbose_header':
 653             self.print_debug_header()
 654
 655         def check_deprecated(param, option, suggestion):
 656             if self.params.get(param) is not None:
 657                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 658                 return True
 659             return False
 660
 661         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 662             if self.params.get('geo_verification_proxy') is None:
 663                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 664
 665         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 666         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 667         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 668
 669         for msg in self.params.get('_warnings', []):
 670             self.report_warning(msg)
 671         for msg in self.params.get('_deprecation_warnings', []):
 672             self.deprecated_feature(msg)
 673
 674         if 'list-formats' in self.params['compat_opts']:
 675             self.params['listformats_table'] = False
 676
 677         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 678             # nooverwrites was unnecessarily changed to overwrites
 679             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 680             # This ensures compatibility with both keys
 681             self.params['overwrites'] = not self.params['nooverwrites']
 682         elif self.params.get('overwrites') is None:
 683             self.params.pop('overwrites', None)
 684         else:
 685             self.params['nooverwrites'] = not self.params['overwrites']
 686
 687         if self.params.get('simulate') is None and any((
 688             self.params.get('list_thumbnails'),
 689             self.params.get('listformats'),
 690             self.params.get('listsubtitles'),
 691         )):
 692             self.params['simulate'] = 'list_only'
 693
 694         self.params.setdefault('forceprint', {})
 695         self.params.setdefault('print_to_file', {})
 696
 697         # Compatibility with older syntax
 698         if not isinstance(params['forceprint'], dict):
 699             self.params['forceprint'] = {'video': params['forceprint']}
 700
 701         if auto_init:
 702             self.add_default_info_extractors()
 703
 704         if (sys.platform != 'win32'
 705                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 706                 and not self.params.get('restrictfilenames', False)):
 707             # Unicode filesystem API will throw errors (#1474, #13027)
 708             self.report_warning(
 709                 'Assuming --restrict-filenames since file system encoding '
 710                 'cannot encode all characters. '
 711                 'Set the LC_ALL environment variable to fix this.')
 712             self.params['restrictfilenames'] = True
 713
 714         self._parse_outtmpl()
 715
 716         # Creating format selector here allows us to catch syntax errors before the extraction
 717         self.format_selector = (
 718             self.params.get('format') if self.params.get('format') in (None, '-')
 719             else self.params['format'] if callable(self.params['format'])
 720             else self.build_format_selector(self.params['format']))
 721
 722         # Set http_headers defaults according to std_headers
 723         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 724
 725         hooks = {
 726             'post_hooks': self.add_post_hook,
 727             'progress_hooks': self.add_progress_hook,
 728             'postprocessor_hooks': self.add_postprocessor_hook,
 729         }
 730         for opt, fn in hooks.items():
 731             for ph in self.params.get(opt, []):
 732                 fn(ph)
 733
 734         for pp_def_raw in self.params.get('postprocessors', []):
 735             pp_def = dict(pp_def_raw)
 736             when = pp_def.pop('when', 'post_process')
 737             self.add_post_processor(
 738                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 739                 when=when)
 740
 741         self._setup_opener()
 742         register_socks_protocols()
 743
 744         def preload_download_archive(fn):
 745             """Preload the archive, if any is specified"""
 746             archive = set()
 747             if fn is None:
 748                 return archive
 749             elif not is_path_like(fn):
 750                 return fn
 751
 752             self.write_debug(f'Loading archive file {fn!r}')
 753             try:
 754                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 755                     for line in archive_file:
 756                         archive.add(line.strip())
 757             except OSError as ioe:
 758                 if ioe.errno != errno.ENOENT:
 759                     raise
 760             return archive
 761
 762         self.archive = preload_download_archive(self.params.get('download_archive'))
 763
 764     def warn_if_short_id(self, argv):
 765         # short YouTube ID starting with dash?
 766         idxs = [
 767             i for i, a in enumerate(argv)
 768             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 769         if idxs:
 770             correct_argv = (
 771                 ['yt-dlp']
 772                 + [a for i, a in enumerate(argv) if i not in idxs]
 773                 + ['--'] + [argv[i] for i in idxs]
 774             )
 775             self.report_warning(
 776                 'Long argument string detected. '
 777                 'Use -- to separate parameters and URLs, like this:\n%s' %
 778                 args_to_str(correct_argv))
 779
 780     def add_info_extractor(self, ie):
 781         """Add an InfoExtractor object to the end of the list."""
 782         ie_key = ie.ie_key()
 783         self._ies[ie_key] = ie
 784         if not isinstance(ie, type):
 785             self._ies_instances[ie_key] = ie
 786             ie.set_downloader(self)
 787
 788     def get_info_extractor(self, ie_key):
 789         """
 790         Get an instance of an IE with name ie_key, it will try to get one from
 791         the _ies list, if there's no instance it will create a new one and add
 792         it to the extractor list.
 793         """
 794         ie = self._ies_instances.get(ie_key)
 795         if ie is None:
 796             ie = get_info_extractor(ie_key)()
 797             self.add_info_extractor(ie)
 798         return ie
 799
 800     def add_default_info_extractors(self):
 801         """
 802         Add the InfoExtractors returned by gen_extractors to the end of the list
 803         """
 804         all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
 805         all_ies['end'] = UnsupportedURLIE()
 806         try:
 807             ie_names = orderedSet_from_options(
 808                 self.params.get('allowed_extractors', ['default']), {
 809                     'all': list(all_ies),
 810                     'default': [name for name, ie in all_ies.items() if ie._ENABLED],
 811                 }, use_regex=True)
 812         except re.error as e:
 813             raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
 814         for name in ie_names:
 815             self.add_info_extractor(all_ies[name])
 816         self.write_debug(f'Loaded {len(ie_names)} extractors')
 817
 818     def add_post_processor(self, pp, when='post_process'):
 819         """Add a PostProcessor object to the end of the chain."""
 820         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 821         self._pps[when].append(pp)
 822         pp.set_downloader(self)
 823
 824     def add_post_hook(self, ph):
 825         """Add the post hook"""
 826         self._post_hooks.append(ph)
 827
 828     def add_progress_hook(self, ph):
 829         """Add the download progress hook"""
 830         self._progress_hooks.append(ph)
 831
 832     def add_postprocessor_hook(self, ph):
 833         """Add the postprocessing progress hook"""
 834         self._postprocessor_hooks.append(ph)
 835         for pps in self._pps.values():
 836             for pp in pps:
 837                 pp.add_progress_hook(ph)
 838
 839     def _bidi_workaround(self, message):
 840         if not hasattr(self, '_output_channel'):
 841             return message
 842
 843         assert hasattr(self, '_output_process')
 844         assert isinstance(message, str)
 845         line_count = message.count('\n') + 1
 846         self._output_process.stdin.write((message + '\n').encode())
 847         self._output_process.stdin.flush()
 848         res = ''.join(self._output_channel.readline().decode()
 849                       for _ in range(line_count))
 850         return res[:-len('\n')]
 851
 852     def _write_string(self, message, out=None, only_once=False):
 853         if only_once:
 854             if message in self._printed_messages:
 855                 return
 856             self._printed_messages.add(message)
 857         write_string(message, out=out, encoding=self.params.get('encoding'))
 858
 859     def to_stdout(self, message, skip_eol=False, quiet=None):
 860         """Print message to stdout"""
 861         if quiet is not None:
 862             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
 863                                      'Use "YoutubeDL.to_screen" instead')
 864         if skip_eol is not False:
 865             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
 866                                      'Use "YoutubeDL.to_screen" instead')
 867         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 868
 869     def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
 870         """Print message to screen if not in quiet mode"""
 871         if self.params.get('logger'):
 872             self.params['logger'].debug(message)
 873             return
 874         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 875             return
 876         self._write_string(
 877             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 878             self._out_files.screen, only_once=only_once)
 879
 880     def to_stderr(self, message, only_once=False):
 881         """Print message to stderr"""
 882         assert isinstance(message, str)
 883         if self.params.get('logger'):
 884             self.params['logger'].error(message)
 885         else:
 886             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 887
 888     def _send_console_code(self, code):
 889         if compat_os_name == 'nt' or not self._out_files.console:
 890             return
 891         self._write_string(code, self._out_files.console)
 892
 893     def to_console_title(self, message):
 894         if not self.params.get('consoletitle', False):
 895             return
 896         message = remove_terminal_sequences(message)
 897         if compat_os_name == 'nt':
 898             if ctypes.windll.kernel32.GetConsoleWindow():
 899                 # c_wchar_p() might not be necessary if `message` is
 900                 # already of type unicode()
 901                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 902         else:
 903             self._send_console_code(f'\033]0;{message}\007')
 904
 905     def save_console_title(self):
 906         if not self.params.get('consoletitle') or self.params.get('simulate'):
 907             return
 908         self._send_console_code('\033[22;0t')  # Save the title on stack
 909
 910     def restore_console_title(self):
 911         if not self.params.get('consoletitle') or self.params.get('simulate'):
 912             return
 913         self._send_console_code('\033[23;0t')  # Restore the title from stack
 914
 915     def __enter__(self):
 916         self.save_console_title()
 917         return self
 918
 919     def __exit__(self, *args):
 920         self.restore_console_title()
 921
 922         if self.params.get('cookiefile') is not None:
 923             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 924
 925     def trouble(self, message=None, tb=None, is_error=True):
 926         """Determine action to take when a download problem appears.
 927
 928         Depending on if the downloader has been configured to ignore
 929         download errors or not, this method may throw an exception or
 930         not when errors are found, after printing the message.
 931
 932         @param tb          If given, is additional traceback information
 933         @param is_error    Whether to raise error according to ignorerrors
 934         """
 935         if message is not None:
 936             self.to_stderr(message)
 937         if self.params.get('verbose'):
 938             if tb is None:
 939                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 940                     tb = ''
 941                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 942                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 943                     tb += encode_compat_str(traceback.format_exc())
 944                 else:
 945                     tb_data = traceback.format_list(traceback.extract_stack())
 946                     tb = ''.join(tb_data)
 947             if tb:
 948                 self.to_stderr(tb)
 949         if not is_error:
 950             return
 951         if not self.params.get('ignoreerrors'):
 952             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 953                 exc_info = sys.exc_info()[1].exc_info
 954             else:
 955                 exc_info = sys.exc_info()
 956             raise DownloadError(message, exc_info)
 957         self._download_retcode = 1
 958
 959     Styles = Namespace(
 960         HEADERS='yellow',
 961         EMPHASIS='light blue',
 962         FILENAME='green',
 963         ID='green',
 964         DELIM='blue',
 965         ERROR='red',
 966         WARNING='yellow',
 967         SUPPRESS='light black',
 968     )
 969
 970     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 971         text = str(text)
 972         if test_encoding:
 973             original_text = text
 974             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 975             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 976             text = text.encode(encoding, 'ignore').decode(encoding)
 977             if fallback is not None and text != original_text:
 978                 text = fallback
 979         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 980
 981     def _format_out(self, *args, **kwargs):
 982         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
 983
 984     def _format_screen(self, *args, **kwargs):
 985         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
 986
 987     def _format_err(self, *args, **kwargs):
 988         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
 989
 990     def report_warning(self, message, only_once=False):
 991         '''
 992         Print the message to stderr, it will be prefixed with 'WARNING:'
 993         If stderr is a tty file the 'WARNING:' will be colored
 994         '''
 995         if self.params.get('logger') is not None:
 996             self.params['logger'].warning(message)
 997         else:
 998             if self.params.get('no_warnings'):
 999                 return
1000             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1001
1002     def deprecation_warning(self, message, *, stacklevel=0):
1003         deprecation_warning(
1004             message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1005
1006     def deprecated_feature(self, message):
1007         if self.params.get('logger') is not None:
1008             self.params['logger'].warning(f'Deprecated Feature: {message}')
1009         self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1010
1011     def report_error(self, message, *args, **kwargs):
1012         '''
1013         Do the same as trouble, but prefixes the message with 'ERROR:', colored
1014         in red if stderr is a tty file.
1015         '''
1016         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1017
1018     def write_debug(self, message, only_once=False):
1019         '''Log debug message or Print message to stderr'''
1020         if not self.params.get('verbose', False):
1021             return
1022         message = f'[debug] {message}'
1023         if self.params.get('logger'):
1024             self.params['logger'].debug(message)
1025         else:
1026             self.to_stderr(message, only_once)
1027
1028     def report_file_already_downloaded(self, file_name):
1029         """Report file has already been fully downloaded."""
1030         try:
1031             self.to_screen('[download] %s has already been downloaded' % file_name)
1032         except UnicodeEncodeError:
1033             self.to_screen('[download] The file has already been downloaded')
1034
1035     def report_file_delete(self, file_name):
1036         """Report that existing file will be deleted."""
1037         try:
1038             self.to_screen('Deleting existing file %s' % file_name)
1039         except UnicodeEncodeError:
1040             self.to_screen('Deleting existing file')
1041
1042     def raise_no_formats(self, info, forced=False, *, msg=None):
1043         has_drm = info.get('_has_drm')
1044         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1045         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1046         if forced or not ignored:
1047             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1048                                  expected=has_drm or ignored or expected)
1049         else:
1050             self.report_warning(msg)
1051
1052     def parse_outtmpl(self):
1053         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1054         self._parse_outtmpl()
1055         return self.params['outtmpl']
1056
1057     def _parse_outtmpl(self):
1058         sanitize = IDENTITY
1059         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1060             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1061
1062         outtmpl = self.params.setdefault('outtmpl', {})
1063         if not isinstance(outtmpl, dict):
1064             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1065         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1066
1067     def get_output_path(self, dir_type='', filename=None):
1068         paths = self.params.get('paths', {})
1069         assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1070         path = os.path.join(
1071             expand_path(paths.get('home', '').strip()),
1072             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1073             filename or '')
1074         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1075
1076     @staticmethod
1077     def _outtmpl_expandpath(outtmpl):
1078         # expand_path translates '%%' into '%' and '$$' into '$'
1079         # correspondingly that is not what we want since we need to keep
1080         # '%%' intact for template dict substitution step. Working around
1081         # with boundary-alike separator hack.
1082         sep = ''.join(random.choices(string.ascii_letters, k=32))
1083         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1084
1085         # outtmpl should be expand_path'ed before template dict substitution
1086         # because meta fields may contain env variables we don't want to
1087         # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1088         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1089         return expand_path(outtmpl).replace(sep, '')
1090
1091     @staticmethod
1092     def escape_outtmpl(outtmpl):
1093         ''' Escape any remaining strings like %s, %abc% etc. '''
1094         return re.sub(
1095             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1096             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1097             outtmpl)
1098
1099     @classmethod
1100     def validate_outtmpl(cls, outtmpl):
1101         ''' @return None or Exception object '''
1102         outtmpl = re.sub(
1103             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1104             lambda mobj: f'{mobj.group(0)[:-1]}s',
1105             cls._outtmpl_expandpath(outtmpl))
1106         try:
1107             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1108             return None
1109         except ValueError as err:
1110             return err
1111
1112     @staticmethod
1113     def _copy_infodict(info_dict):
1114         info_dict = dict(info_dict)
1115         info_dict.pop('__postprocessors', None)
1116         info_dict.pop('__pending_error', None)
1117         return info_dict
1118
1119     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1120         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1121         @param sanitize    Whether to sanitize the output as a filename.
1122                            For backward compatibility, a function can also be passed
1123         """
1124
1125         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1126
1127         info_dict = self._copy_infodict(info_dict)
1128         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1129             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1130             if info_dict.get('duration', None) is not None
1131             else None)
1132         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1133         info_dict['video_autonumber'] = self._num_videos
1134         if info_dict.get('resolution') is None:
1135             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1136
1137         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1138         # of %(field)s to %(field)0Nd for backward compatibility
1139         field_size_compat_map = {
1140             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1141             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1142             'autonumber': self.params.get('autonumber_size') or 5,
1143         }
1144
1145         TMPL_DICT = {}
1146         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1147         MATH_FUNCTIONS = {
1148             '+': float.__add__,
1149             '-': float.__sub__,
1150         }
1151         # Field is of the form key1.key2...
1152         # where keys (except first) can be string, int, slice or "{field, ...}"
1153         FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1154         FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1155             'inner': FIELD_INNER_RE,
1156             'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1157         }
1158         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1159         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1160         INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1161             (?P<negate>-)?
1162             (?P<fields>{FIELD_RE})
1163             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1164             (?:>(?P<strf_format>.+?))?
1165             (?P<remaining>
1166                 (?P<alternate>(?<!\\),[^|&)]+)?
1167                 (?:&(?P<replacement>.*?))?
1168                 (?:\|(?P<default>.*?))?
1169             )$''')
1170
1171         def _traverse_infodict(fields):
1172             fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1173                       for f in ([x] if x.startswith('{') else x.split('.'))]
1174             for i in (0, -1):
1175                 if fields and not fields[i]:
1176                     fields.pop(i)
1177
1178             for i, f in enumerate(fields):
1179                 if not f.startswith('{'):
1180                     continue
1181                 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1182                 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1183
1184             return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1185
1186         def get_value(mdict):
1187             # Object traversal
1188             value = _traverse_infodict(mdict['fields'])
1189             # Negative
1190             if mdict['negate']:
1191                 value = float_or_none(value)
1192                 if value is not None:
1193                     value *= -1
1194             # Do maths
1195             offset_key = mdict['maths']
1196             if offset_key:
1197                 value = float_or_none(value)
1198                 operator = None
1199                 while offset_key:
1200                     item = re.match(
1201                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1202                         offset_key).group(0)
1203                     offset_key = offset_key[len(item):]
1204                     if operator is None:
1205                         operator = MATH_FUNCTIONS[item]
1206                         continue
1207                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1208                     offset = float_or_none(item)
1209                     if offset is None:
1210                         offset = float_or_none(_traverse_infodict(item))
1211                     try:
1212                         value = operator(value, multiplier * offset)
1213                     except (TypeError, ZeroDivisionError):
1214                         return None
1215                     operator = None
1216             # Datetime formatting
1217             if mdict['strf_format']:
1218                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1219
1220             # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1221             if sanitize and value == '':
1222                 value = None
1223             return value
1224
1225         na = self.params.get('outtmpl_na_placeholder', 'NA')
1226
1227         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1228             return sanitize_filename(str(value), restricted=restricted, is_id=(
1229                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1230                 if 'filename-sanitization' in self.params['compat_opts']
1231                 else NO_DEFAULT))
1232
1233         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1234         sanitize = bool(sanitize)
1235
1236         def _dumpjson_default(obj):
1237             if isinstance(obj, (set, LazyList)):
1238                 return list(obj)
1239             return repr(obj)
1240
1241         class _ReplacementFormatter(string.Formatter):
1242             def get_field(self, field_name, args, kwargs):
1243                 if field_name.isdigit():
1244                     return args[0], -1
1245                 raise ValueError('Unsupported field')
1246
1247         replacement_formatter = _ReplacementFormatter()
1248
1249         def create_key(outer_mobj):
1250             if not outer_mobj.group('has_key'):
1251                 return outer_mobj.group(0)
1252             key = outer_mobj.group('key')
1253             mobj = re.match(INTERNAL_FORMAT_RE, key)
1254             initial_field = mobj.group('fields') if mobj else ''
1255             value, replacement, default = None, None, na
1256             while mobj:
1257                 mobj = mobj.groupdict()
1258                 default = mobj['default'] if mobj['default'] is not None else default
1259                 value = get_value(mobj)
1260                 replacement = mobj['replacement']
1261                 if value is None and mobj['alternate']:
1262                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1263                 else:
1264                     break
1265
1266             fmt = outer_mobj.group('format')
1267             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1268                 fmt = f'0{field_size_compat_map[key]:d}d'
1269
1270             if value is None:
1271                 value = default
1272             elif replacement is not None:
1273                 try:
1274                     value = replacement_formatter.format(replacement, value)
1275                 except ValueError:
1276                     value = na
1277
1278             flags = outer_mobj.group('conversion') or ''
1279             str_fmt = f'{fmt[:-1]}s'
1280             if fmt[-1] == 'l':  # list
1281                 delim = '\n' if '#' in flags else ', '
1282                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1283             elif fmt[-1] == 'j':  # json
1284                 value, fmt = json.dumps(
1285                     value, default=_dumpjson_default,
1286                     indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1287             elif fmt[-1] == 'h':  # html
1288                 value, fmt = escapeHTML(str(value)), str_fmt
1289             elif fmt[-1] == 'q':  # quoted
1290                 value = map(str, variadic(value) if '#' in flags else [value])
1291                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1292             elif fmt[-1] == 'B':  # bytes
1293                 value = f'%{str_fmt}'.encode() % str(value).encode()
1294                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1295             elif fmt[-1] == 'U':  # unicode normalized
1296                 value, fmt = unicodedata.normalize(
1297                     # "+" = compatibility equivalence, "#" = NFD
1298                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1299                     value), str_fmt
1300             elif fmt[-1] == 'D':  # decimal suffix
1301                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1302                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1303                                               factor=1024 if '#' in flags else 1000)
1304             elif fmt[-1] == 'S':  # filename sanitization
1305                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1306             elif fmt[-1] == 'c':
1307                 if value:
1308                     value = str(value)[0]
1309                 else:
1310                     fmt = str_fmt
1311             elif fmt[-1] not in 'rs':  # numeric
1312                 value = float_or_none(value)
1313                 if value is None:
1314                     value, fmt = default, 's'
1315
1316             if sanitize:
1317                 if fmt[-1] == 'r':
1318                     # If value is an object, sanitize might convert it to a string
1319                     # So we convert it to repr first
1320                     value, fmt = repr(value), str_fmt
1321                 if fmt[-1] in 'csr':
1322                     value = sanitizer(initial_field, value)
1323
1324             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1325             TMPL_DICT[key] = value
1326             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1327
1328         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1329
1330     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1331         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1332         return self.escape_outtmpl(outtmpl) % info_dict
1333
1334     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1335         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1336         if outtmpl is None:
1337             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1338         try:
1339             outtmpl = self._outtmpl_expandpath(outtmpl)
1340             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1341             if not filename:
1342                 return None
1343
1344             if tmpl_type in ('', 'temp'):
1345                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1346                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1347                     filename = replace_extension(filename, ext, final_ext)
1348             elif tmpl_type:
1349                 force_ext = OUTTMPL_TYPES[tmpl_type]
1350                 if force_ext:
1351                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1352
1353             # https://github.com/blackjack4494/youtube-dlc/issues/85
1354             trim_file_name = self.params.get('trim_file_name', False)
1355             if trim_file_name:
1356                 no_ext, *ext = filename.rsplit('.', 2)
1357                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1358
1359             return filename
1360         except ValueError as err:
1361             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1362             return None
1363
1364     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1365         """Generate the output filename"""
1366         if outtmpl:
1367             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1368             dir_type = None
1369         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1370         if not filename and dir_type not in ('', 'temp'):
1371             return ''
1372
1373         if warn:
1374             if not self.params.get('paths'):
1375                 pass
1376             elif filename == '-':
1377                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1378             elif os.path.isabs(filename):
1379                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1380         if filename == '-' or not filename:
1381             return filename
1382
1383         return self.get_output_path(dir_type, filename)
1384
1385     def _match_entry(self, info_dict, incomplete=False, silent=False):
1386         """Returns None if the file should be downloaded"""
1387         _type = info_dict.get('_type', 'video')
1388         assert incomplete or _type == 'video', 'Only video result can be considered complete'
1389
1390         video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1391
1392         def check_filter():
1393             if _type in ('playlist', 'multi_video'):
1394                 return
1395             elif _type in ('url', 'url_transparent') and not try_call(
1396                     lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1397                 return
1398
1399             if 'title' in info_dict:
1400                 # This can happen when we're just evaluating the playlist
1401                 title = info_dict['title']
1402                 matchtitle = self.params.get('matchtitle', False)
1403                 if matchtitle:
1404                     if not re.search(matchtitle, title, re.IGNORECASE):
1405                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1406                 rejecttitle = self.params.get('rejecttitle', False)
1407                 if rejecttitle:
1408                     if re.search(rejecttitle, title, re.IGNORECASE):
1409                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1410
1411             date = info_dict.get('upload_date')
1412             if date is not None:
1413                 dateRange = self.params.get('daterange', DateRange())
1414                 if date not in dateRange:
1415                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1416             view_count = info_dict.get('view_count')
1417             if view_count is not None:
1418                 min_views = self.params.get('min_views')
1419                 if min_views is not None and view_count < min_views:
1420                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1421                 max_views = self.params.get('max_views')
1422                 if max_views is not None and view_count > max_views:
1423                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1424             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1425                 return 'Skipping "%s" because it is age restricted' % video_title
1426
1427             match_filter = self.params.get('match_filter')
1428             if match_filter is None:
1429                 return None
1430
1431             cancelled = None
1432             try:
1433                 try:
1434                     ret = match_filter(info_dict, incomplete=incomplete)
1435                 except TypeError:
1436                     # For backward compatibility
1437                     ret = None if incomplete else match_filter(info_dict)
1438             except DownloadCancelled as err:
1439                 if err.msg is not NO_DEFAULT:
1440                     raise
1441                 ret, cancelled = err.msg, err
1442
1443             if ret is NO_DEFAULT:
1444                 while True:
1445                     filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1446                     reply = input(self._format_screen(
1447                         f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1448                     if reply in {'y', ''}:
1449                         return None
1450                     elif reply == 'n':
1451                         if cancelled:
1452                             raise type(cancelled)(f'Skipping {video_title}')
1453                         return f'Skipping {video_title}'
1454             return ret
1455
1456         if self.in_download_archive(info_dict):
1457             reason = '%s has already been recorded in the archive' % video_title
1458             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1459         else:
1460             try:
1461                 reason = check_filter()
1462             except DownloadCancelled as e:
1463                 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1464             else:
1465                 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1466         if reason is not None:
1467             if not silent:
1468                 self.to_screen('[download] ' + reason)
1469             if self.params.get(break_opt, False):
1470                 raise break_err()
1471         return reason
1472
1473     @staticmethod
1474     def add_extra_info(info_dict, extra_info):
1475         '''Set the keys from extra_info in info dict if they are missing'''
1476         for key, value in extra_info.items():
1477             info_dict.setdefault(key, value)
1478
1479     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1480                      process=True, force_generic_extractor=False):
1481         """
1482         Extract and return the information dictionary of the URL
1483
1484         Arguments:
1485         @param url          URL to extract
1486
1487         Keyword arguments:
1488         @param download     Whether to download videos
1489         @param process      Whether to resolve all unresolved references (URLs, playlist items).
1490                             Must be True for download to work
1491         @param ie_key       Use only the extractor with this key
1492
1493         @param extra_info   Dictionary containing the extra values to add to the info (For internal use only)
1494         @force_generic_extractor  Force using the generic extractor (Deprecated; use ie_key='Generic')
1495         """
1496
1497         if extra_info is None:
1498             extra_info = {}
1499
1500         if not ie_key and force_generic_extractor:
1501             ie_key = 'Generic'
1502
1503         if ie_key:
1504             ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1505         else:
1506             ies = self._ies
1507
1508         for key, ie in ies.items():
1509             if not ie.suitable(url):
1510                 continue
1511
1512             if not ie.working():
1513                 self.report_warning('The program functionality for this site has been marked as broken, '
1514                                     'and will probably not work.')
1515
1516             temp_id = ie.get_temp_id(url)
1517             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1518                 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
1519                 if self.params.get('break_on_existing', False):
1520                     raise ExistingVideoReached()
1521                 break
1522             return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1523         else:
1524             extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1525             self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1526                               tb=False if extractors_restricted else None)
1527
1528     def _handle_extraction_exceptions(func):
1529         @functools.wraps(func)
1530         def wrapper(self, *args, **kwargs):
1531             while True:
1532                 try:
1533                     return func(self, *args, **kwargs)
1534                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1535                     raise
1536                 except ReExtractInfo as e:
1537                     if e.expected:
1538                         self.to_screen(f'{e}; Re-extracting data')
1539                     else:
1540                         self.to_stderr('\r')
1541                         self.report_warning(f'{e}; Re-extracting data')
1542                     continue
1543                 except GeoRestrictedError as e:
1544                     msg = e.msg
1545                     if e.countries:
1546                         msg += '\nThis video is available in %s.' % ', '.join(
1547                             map(ISO3166Utils.short2full, e.countries))
1548                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1549                     self.report_error(msg)
1550                 except ExtractorError as e:  # An error we somewhat expected
1551                     self.report_error(str(e), e.format_traceback())
1552                 except Exception as e:
1553                     if self.params.get('ignoreerrors'):
1554                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1555                     else:
1556                         raise
1557                 break
1558         return wrapper
1559
1560     def _wait_for_video(self, ie_result={}):
1561         if (not self.params.get('wait_for_video')
1562                 or ie_result.get('_type', 'video') != 'video'
1563                 or ie_result.get('formats') or ie_result.get('url')):
1564             return
1565
1566         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1567         last_msg = ''
1568
1569         def progress(msg):
1570             nonlocal last_msg
1571             full_msg = f'{msg}\n'
1572             if not self.params.get('noprogress'):
1573                 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1574             elif last_msg:
1575                 return
1576             self.to_screen(full_msg, skip_eol=True)
1577             last_msg = msg
1578
1579         min_wait, max_wait = self.params.get('wait_for_video')
1580         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1581         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1582             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1583             self.report_warning('Release time of video is not known')
1584         elif ie_result and (diff or 0) <= 0:
1585             self.report_warning('Video should already be available according to extracted info')
1586         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1587         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1588
1589         wait_till = time.time() + diff
1590         try:
1591             while True:
1592                 diff = wait_till - time.time()
1593                 if diff <= 0:
1594                     progress('')
1595                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1596                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1597                 time.sleep(1)
1598         except KeyboardInterrupt:
1599             progress('')
1600             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1601         except BaseException as e:
1602             if not isinstance(e, ReExtractInfo):
1603                 self.to_screen('')
1604             raise
1605
1606     @_handle_extraction_exceptions
1607     def __extract_info(self, url, ie, download, extra_info, process):
1608         try:
1609             ie_result = ie.extract(url)
1610         except UserNotLive as e:
1611             if process:
1612                 if self.params.get('wait_for_video'):
1613                     self.report_warning(e)
1614                 self._wait_for_video()
1615             raise
1616         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1617             self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1618             return
1619         if isinstance(ie_result, list):
1620             # Backwards compatibility: old IE result format
1621             ie_result = {
1622                 '_type': 'compat_list',
1623                 'entries': ie_result,
1624             }
1625         if extra_info.get('original_url'):
1626             ie_result.setdefault('original_url', extra_info['original_url'])
1627         self.add_default_extra_info(ie_result, ie, url)
1628         if process:
1629             self._wait_for_video(ie_result)
1630             return self.process_ie_result(ie_result, download, extra_info)
1631         else:
1632             return ie_result
1633
1634     def add_default_extra_info(self, ie_result, ie, url):
1635         if url is not None:
1636             self.add_extra_info(ie_result, {
1637                 'webpage_url': url,
1638                 'original_url': url,
1639             })
1640         webpage_url = ie_result.get('webpage_url')
1641         if webpage_url:
1642             self.add_extra_info(ie_result, {
1643                 'webpage_url_basename': url_basename(webpage_url),
1644                 'webpage_url_domain': get_domain(webpage_url),
1645             })
1646         if ie is not None:
1647             self.add_extra_info(ie_result, {
1648                 'extractor': ie.IE_NAME,
1649                 'extractor_key': ie.ie_key(),
1650             })
1651
1652     def process_ie_result(self, ie_result, download=True, extra_info=None):
1653         """
1654         Take the result of the ie(may be modified) and resolve all unresolved
1655         references (URLs, playlist items).
1656
1657         It will also download the videos if 'download'.
1658         Returns the resolved ie_result.
1659         """
1660         if extra_info is None:
1661             extra_info = {}
1662         result_type = ie_result.get('_type', 'video')
1663
1664         if result_type in ('url', 'url_transparent'):
1665             ie_result['url'] = sanitize_url(
1666                 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1667             if ie_result.get('original_url') and not extra_info.get('original_url'):
1668                 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1669
1670             extract_flat = self.params.get('extract_flat', False)
1671             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1672                     or extract_flat is True):
1673                 info_copy = ie_result.copy()
1674                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1675                 if ie and not ie_result.get('id'):
1676                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1677                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1678                 self.add_extra_info(info_copy, extra_info)
1679                 info_copy, _ = self.pre_process(info_copy)
1680                 self._fill_common_fields(info_copy, False)
1681                 self.__forced_printings(info_copy)
1682                 self._raise_pending_errors(info_copy)
1683                 if self.params.get('force_write_download_archive', False):
1684                     self.record_download_archive(info_copy)
1685                 return ie_result
1686
1687         if result_type == 'video':
1688             self.add_extra_info(ie_result, extra_info)
1689             ie_result = self.process_video_result(ie_result, download=download)
1690             self._raise_pending_errors(ie_result)
1691             additional_urls = (ie_result or {}).get('additional_urls')
1692             if additional_urls:
1693                 # TODO: Improve MetadataParserPP to allow setting a list
1694                 if isinstance(additional_urls, str):
1695                     additional_urls = [additional_urls]
1696                 self.to_screen(
1697                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1698                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1699                 ie_result['additional_entries'] = [
1700                     self.extract_info(
1701                         url, download, extra_info=extra_info,
1702                         force_generic_extractor=self.params.get('force_generic_extractor'))
1703                     for url in additional_urls
1704                 ]
1705             return ie_result
1706         elif result_type == 'url':
1707             # We have to add extra_info to the results because it may be
1708             # contained in a playlist
1709             return self.extract_info(
1710                 ie_result['url'], download,
1711                 ie_key=ie_result.get('ie_key'),
1712                 extra_info=extra_info)
1713         elif result_type == 'url_transparent':
1714             # Use the information from the embedding page
1715             info = self.extract_info(
1716                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1717                 extra_info=extra_info, download=False, process=False)
1718
1719             # extract_info may return None when ignoreerrors is enabled and
1720             # extraction failed with an error, don't crash and return early
1721             # in this case
1722             if not info:
1723                 return info
1724
1725             exempted_fields = {'_type', 'url', 'ie_key'}
1726             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1727                 # For video clips, the id etc of the clip extractor should be used
1728                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1729
1730             new_result = info.copy()
1731             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1732
1733             # Extracted info may not be a video result (i.e.
1734             # info.get('_type', 'video') != video) but rather an url or
1735             # url_transparent. In such cases outer metadata (from ie_result)
1736             # should be propagated to inner one (info). For this to happen
1737             # _type of info should be overridden with url_transparent. This
1738             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1739             if new_result.get('_type') == 'url':
1740                 new_result['_type'] = 'url_transparent'
1741
1742             return self.process_ie_result(
1743                 new_result, download=download, extra_info=extra_info)
1744         elif result_type in ('playlist', 'multi_video'):
1745             # Protect from infinite recursion due to recursively nested playlists
1746             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1747             webpage_url = ie_result.get('webpage_url')  # Playlists maynot have webpage_url
1748             if webpage_url and webpage_url in self._playlist_urls:
1749                 self.to_screen(
1750                     '[download] Skipping already downloaded playlist: %s'
1751                     % ie_result.get('title') or ie_result.get('id'))
1752                 return
1753
1754             self._playlist_level += 1
1755             self._playlist_urls.add(webpage_url)
1756             self._fill_common_fields(ie_result, False)
1757             self._sanitize_thumbnails(ie_result)
1758             try:
1759                 return self.__process_playlist(ie_result, download)
1760             finally:
1761                 self._playlist_level -= 1
1762                 if not self._playlist_level:
1763                     self._playlist_urls.clear()
1764         elif result_type == 'compat_list':
1765             self.report_warning(
1766                 'Extractor %s returned a compat_list result. '
1767                 'It needs to be updated.' % ie_result.get('extractor'))
1768
1769             def _fixup(r):
1770                 self.add_extra_info(r, {
1771                     'extractor': ie_result['extractor'],
1772                     'webpage_url': ie_result['webpage_url'],
1773                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1774                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1775                     'extractor_key': ie_result['extractor_key'],
1776                 })
1777                 return r
1778             ie_result['entries'] = [
1779                 self.process_ie_result(_fixup(r), download, extra_info)
1780                 for r in ie_result['entries']
1781             ]
1782             return ie_result
1783         else:
1784             raise Exception('Invalid result type: %s' % result_type)
1785
1786     def _ensure_dir_exists(self, path):
1787         return make_dir(path, self.report_error)
1788
1789     @staticmethod
1790     def _playlist_infodict(ie_result, strict=False, **kwargs):
1791         info = {
1792             'playlist_count': ie_result.get('playlist_count'),
1793             'playlist': ie_result.get('title') or ie_result.get('id'),
1794             'playlist_id': ie_result.get('id'),
1795             'playlist_title': ie_result.get('title'),
1796             'playlist_uploader': ie_result.get('uploader'),
1797             'playlist_uploader_id': ie_result.get('uploader_id'),
1798             **kwargs,
1799         }
1800         if strict:
1801             return info
1802         if ie_result.get('webpage_url'):
1803             info.update({
1804                 'webpage_url': ie_result['webpage_url'],
1805                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1806                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1807             })
1808         return {
1809             **info,
1810             'playlist_index': 0,
1811             '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1812             'extractor': ie_result['extractor'],
1813             'extractor_key': ie_result['extractor_key'],
1814         }
1815
1816     def __process_playlist(self, ie_result, download):
1817         """Process each entry in the playlist"""
1818         assert ie_result['_type'] in ('playlist', 'multi_video')
1819
1820         common_info = self._playlist_infodict(ie_result, strict=True)
1821         title = common_info.get('playlist') or '<Untitled>'
1822         if self._match_entry(common_info, incomplete=True) is not None:
1823             return
1824         self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1825
1826         all_entries = PlaylistEntries(self, ie_result)
1827         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1828
1829         lazy = self.params.get('lazy_playlist')
1830         if lazy:
1831             resolved_entries, n_entries = [], 'N/A'
1832             ie_result['requested_entries'], ie_result['entries'] = None, None
1833         else:
1834             entries = resolved_entries = list(entries)
1835             n_entries = len(resolved_entries)
1836             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1837         if not ie_result.get('playlist_count'):
1838             # Better to do this after potentially exhausting entries
1839             ie_result['playlist_count'] = all_entries.get_full_count()
1840
1841         extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1842         ie_copy = collections.ChainMap(ie_result, extra)
1843
1844         _infojson_written = False
1845         write_playlist_files = self.params.get('allow_playlist_files', True)
1846         if write_playlist_files and self.params.get('list_thumbnails'):
1847             self.list_thumbnails(ie_result)
1848         if write_playlist_files and not self.params.get('simulate'):
1849             _infojson_written = self._write_info_json(
1850                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1851             if _infojson_written is None:
1852                 return
1853             if self._write_description('playlist', ie_result,
1854                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1855                 return
1856             # TODO: This should be passed to ThumbnailsConvertor if necessary
1857             self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1858
1859         if lazy:
1860             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1861                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1862         elif self.params.get('playlistreverse'):
1863             entries.reverse()
1864         elif self.params.get('playlistrandom'):
1865             random.shuffle(entries)
1866
1867         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1868                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1869
1870         keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1871         if self.params.get('extract_flat') == 'discard_in_playlist':
1872             keep_resolved_entries = ie_result['_type'] != 'playlist'
1873         if keep_resolved_entries:
1874             self.write_debug('The information of all playlist entries will be held in memory')
1875
1876         failures = 0
1877         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1878         for i, (playlist_index, entry) in enumerate(entries):
1879             if lazy:
1880                 resolved_entries.append((playlist_index, entry))
1881             if not entry:
1882                 continue
1883
1884             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1885             if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1886                 playlist_index = ie_result['requested_entries'][i]
1887
1888             entry_copy = collections.ChainMap(entry, {
1889                 **common_info,
1890                 'n_entries': int_or_none(n_entries),
1891                 'playlist_index': playlist_index,
1892                 'playlist_autonumber': i + 1,
1893             })
1894
1895             if self._match_entry(entry_copy, incomplete=True) is not None:
1896                 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1897                 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1898                 continue
1899
1900             self.to_screen('[download] Downloading item %s of %s' % (
1901                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1902
1903             entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
1904                 'playlist_index': playlist_index,
1905                 'playlist_autonumber': i + 1,
1906             }, extra))
1907             if not entry_result:
1908                 failures += 1
1909             if failures >= max_failures:
1910                 self.report_error(
1911                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1912                 break
1913             if keep_resolved_entries:
1914                 resolved_entries[i] = (playlist_index, entry_result)
1915
1916         # Update with processed data
1917         ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
1918         ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1919         if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
1920             # Do not set for full playlist
1921             ie_result.pop('requested_entries')
1922
1923         # Write the updated info to json
1924         if _infojson_written is True and self._write_info_json(
1925                 'updated playlist', ie_result,
1926                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1927             return
1928
1929         ie_result = self.run_all_pps('playlist', ie_result)
1930         self.to_screen(f'[download] Finished downloading playlist: {title}')
1931         return ie_result
1932
1933     @_handle_extraction_exceptions
1934     def __process_iterable_entry(self, entry, download, extra_info):
1935         return self.process_ie_result(
1936             entry, download=download, extra_info=extra_info)
1937
1938     def _build_format_filter(self, filter_spec):
1939         " Returns a function to filter the formats according to the filter_spec "
1940
1941         OPERATORS = {
1942             '<': operator.lt,
1943             '<=': operator.le,
1944             '>': operator.gt,
1945             '>=': operator.ge,
1946             '=': operator.eq,
1947             '!=': operator.ne,
1948         }
1949         operator_rex = re.compile(r'''(?x)\s*
1950             (?P<key>[\w.-]+)\s*
1951             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1952             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1953             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1954         m = operator_rex.fullmatch(filter_spec)
1955         if m:
1956             try:
1957                 comparison_value = int(m.group('value'))
1958             except ValueError:
1959                 comparison_value = parse_filesize(m.group('value'))
1960                 if comparison_value is None:
1961                     comparison_value = parse_filesize(m.group('value') + 'B')
1962                 if comparison_value is None:
1963                     raise ValueError(
1964                         'Invalid value %r in format specification %r' % (
1965                             m.group('value'), filter_spec))
1966             op = OPERATORS[m.group('op')]
1967
1968         if not m:
1969             STR_OPERATORS = {
1970                 '=': operator.eq,
1971                 '^=': lambda attr, value: attr.startswith(value),
1972                 '$=': lambda attr, value: attr.endswith(value),
1973                 '*=': lambda attr, value: value in attr,
1974                 '~=': lambda attr, value: value.search(attr) is not None
1975             }
1976             str_operator_rex = re.compile(r'''(?x)\s*
1977                 (?P<key>[a-zA-Z0-9._-]+)\s*
1978                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1979                 (?P<quote>["'])?
1980                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1981                 (?(quote)(?P=quote))\s*
1982                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1983             m = str_operator_rex.fullmatch(filter_spec)
1984             if m:
1985                 if m.group('op') == '~=':
1986                     comparison_value = re.compile(m.group('value'))
1987                 else:
1988                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1989                 str_op = STR_OPERATORS[m.group('op')]
1990                 if m.group('negation'):
1991                     op = lambda attr, value: not str_op(attr, value)
1992                 else:
1993                     op = str_op
1994
1995         if not m:
1996             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1997
1998         def _filter(f):
1999             actual_value = f.get(m.group('key'))
2000             if actual_value is None:
2001                 return m.group('none_inclusive')
2002             return op(actual_value, comparison_value)
2003         return _filter
2004
2005     def _check_formats(self, formats):
2006         for f in formats:
2007             self.to_screen('[info] Testing format %s' % f['format_id'])
2008             path = self.get_output_path('temp')
2009             if not self._ensure_dir_exists(f'{path}/'):
2010                 continue
2011             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2012             temp_file.close()
2013             try:
2014                 success, _ = self.dl(temp_file.name, f, test=True)
2015             except (DownloadError, OSError, ValueError) + network_exceptions:
2016                 success = False
2017             finally:
2018                 if os.path.exists(temp_file.name):
2019                     try:
2020                         os.remove(temp_file.name)
2021                     except OSError:
2022                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2023             if success:
2024                 yield f
2025             else:
2026                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2027
2028     def _default_format_spec(self, info_dict, download=True):
2029
2030         def can_merge():
2031             merger = FFmpegMergerPP(self)
2032             return merger.available and merger.can_merge()
2033
2034         prefer_best = (
2035             not self.params.get('simulate')
2036             and download
2037             and (
2038                 not can_merge()
2039                 or info_dict.get('is_live') and not self.params.get('live_from_start')
2040                 or self.params['outtmpl']['default'] == '-'))
2041         compat = (
2042             prefer_best
2043             or self.params.get('allow_multiple_audio_streams', False)
2044             or 'format-spec' in self.params['compat_opts'])
2045
2046         return (
2047             'best/bestvideo+bestaudio' if prefer_best
2048             else 'bestvideo*+bestaudio/best' if not compat
2049             else 'bestvideo+bestaudio/best')
2050
2051     def build_format_selector(self, format_spec):
2052         def syntax_error(note, start):
2053             message = (
2054                 'Invalid format specification: '
2055                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2056             return SyntaxError(message)
2057
2058         PICKFIRST = 'PICKFIRST'
2059         MERGE = 'MERGE'
2060         SINGLE = 'SINGLE'
2061         GROUP = 'GROUP'
2062         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2063
2064         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2065                                   'video': self.params.get('allow_multiple_video_streams', False)}
2066
2067         check_formats = self.params.get('check_formats') == 'selected'
2068
2069         def _parse_filter(tokens):
2070             filter_parts = []
2071             for type, string_, start, _, _ in tokens:
2072                 if type == tokenize.OP and string_ == ']':
2073                     return ''.join(filter_parts)
2074                 else:
2075                     filter_parts.append(string_)
2076
2077         def _remove_unused_ops(tokens):
2078             # Remove operators that we don't use and join them with the surrounding strings.
2079             # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2080             ALLOWED_OPS = ('/', '+', ',', '(', ')')
2081             last_string, last_start, last_end, last_line = None, None, None, None
2082             for type, string_, start, end, line in tokens:
2083                 if type == tokenize.OP and string_ == '[':
2084                     if last_string:
2085                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2086                         last_string = None
2087                     yield type, string_, start, end, line
2088                     # everything inside brackets will be handled by _parse_filter
2089                     for type, string_, start, end, line in tokens:
2090                         yield type, string_, start, end, line
2091                         if type == tokenize.OP and string_ == ']':
2092                             break
2093                 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2094                     if last_string:
2095                         yield tokenize.NAME, last_string, last_start, last_end, last_line
2096                         last_string = None
2097                     yield type, string_, start, end, line
2098                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2099                     if not last_string:
2100                         last_string = string_
2101                         last_start = start
2102                         last_end = end
2103                     else:
2104                         last_string += string_
2105             if last_string:
2106                 yield tokenize.NAME, last_string, last_start, last_end, last_line
2107
2108         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2109             selectors = []
2110             current_selector = None
2111             for type, string_, start, _, _ in tokens:
2112                 # ENCODING is only defined in python 3.x
2113                 if type == getattr(tokenize, 'ENCODING', None):
2114                     continue
2115                 elif type in [tokenize.NAME, tokenize.NUMBER]:
2116                     current_selector = FormatSelector(SINGLE, string_, [])
2117                 elif type == tokenize.OP:
2118                     if string_ == ')':
2119                         if not inside_group:
2120                             # ')' will be handled by the parentheses group
2121                             tokens.restore_last_token()
2122                         break
2123                     elif inside_merge and string_ in ['/', ',']:
2124                         tokens.restore_last_token()
2125                         break
2126                     elif inside_choice and string_ == ',':
2127                         tokens.restore_last_token()
2128                         break
2129                     elif string_ == ',':
2130                         if not current_selector:
2131                             raise syntax_error('"," must follow a format selector', start)
2132                         selectors.append(current_selector)
2133                         current_selector = None
2134                     elif string_ == '/':
2135                         if not current_selector:
2136                             raise syntax_error('"/" must follow a format selector', start)
2137                         first_choice = current_selector
2138                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2139                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2140                     elif string_ == '[':
2141                         if not current_selector:
2142                             current_selector = FormatSelector(SINGLE, 'best', [])
2143                         format_filter = _parse_filter(tokens)
2144                         current_selector.filters.append(format_filter)
2145                     elif string_ == '(':
2146                         if current_selector:
2147                             raise syntax_error('Unexpected "("', start)
2148                         group = _parse_format_selection(tokens, inside_group=True)
2149                         current_selector = FormatSelector(GROUP, group, [])
2150                     elif string_ == '+':
2151                         if not current_selector:
2152                             raise syntax_error('Unexpected "+"', start)
2153                         selector_1 = current_selector
2154                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2155                         if not selector_2:
2156                             raise syntax_error('Expected a selector', start)
2157                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2158                     else:
2159                         raise syntax_error(f'Operator not recognized: "{string_}"', start)
2160                 elif type == tokenize.ENDMARKER:
2161                     break
2162             if current_selector:
2163                 selectors.append(current_selector)
2164             return selectors
2165
2166         def _merge(formats_pair):
2167             format_1, format_2 = formats_pair
2168
2169             formats_info = []
2170             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2171             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2172
2173             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2174                 get_no_more = {'video': False, 'audio': False}
2175                 for (i, fmt_info) in enumerate(formats_info):
2176                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2177                         formats_info.pop(i)
2178                         continue
2179                     for aud_vid in ['audio', 'video']:
2180                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2181                             if get_no_more[aud_vid]:
2182                                 formats_info.pop(i)
2183                                 break
2184                             get_no_more[aud_vid] = True
2185
2186             if len(formats_info) == 1:
2187                 return formats_info[0]
2188
2189             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2190             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2191
2192             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2193             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2194
2195             output_ext = get_compatible_ext(
2196                 vcodecs=[f.get('vcodec') for f in video_fmts],
2197                 acodecs=[f.get('acodec') for f in audio_fmts],
2198                 vexts=[f['ext'] for f in video_fmts],
2199                 aexts=[f['ext'] for f in audio_fmts],
2200                 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2201                              or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2202
2203             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2204
2205             new_dict = {
2206                 'requested_formats': formats_info,
2207                 'format': '+'.join(filtered('format')),
2208                 'format_id': '+'.join(filtered('format_id')),
2209                 'ext': output_ext,
2210                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2211                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2212                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2213                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2214                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2215             }
2216
2217             if the_only_video:
2218                 new_dict.update({
2219                     'width': the_only_video.get('width'),
2220                     'height': the_only_video.get('height'),
2221                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2222                     'fps': the_only_video.get('fps'),
2223                     'dynamic_range': the_only_video.get('dynamic_range'),
2224                     'vcodec': the_only_video.get('vcodec'),
2225                     'vbr': the_only_video.get('vbr'),
2226                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2227                     'aspect_ratio': the_only_video.get('aspect_ratio'),
2228                 })
2229
2230             if the_only_audio:
2231                 new_dict.update({
2232                     'acodec': the_only_audio.get('acodec'),
2233                     'abr': the_only_audio.get('abr'),
2234                     'asr': the_only_audio.get('asr'),
2235                     'audio_channels': the_only_audio.get('audio_channels')
2236                 })
2237
2238             return new_dict
2239
2240         def _check_formats(formats):
2241             if not check_formats:
2242                 yield from formats
2243                 return
2244             yield from self._check_formats(formats)
2245
2246         def _build_selector_function(selector):
2247             if isinstance(selector, list):  # ,
2248                 fs = [_build_selector_function(s) for s in selector]
2249
2250                 def selector_function(ctx):
2251                     for f in fs:
2252                         yield from f(ctx)
2253                 return selector_function
2254
2255             elif selector.type == GROUP:  # ()
2256                 selector_function = _build_selector_function(selector.selector)
2257
2258             elif selector.type == PICKFIRST:  # /
2259                 fs = [_build_selector_function(s) for s in selector.selector]
2260
2261                 def selector_function(ctx):
2262                     for f in fs:
2263                         picked_formats = list(f(ctx))
2264                         if picked_formats:
2265                             return picked_formats
2266                     return []
2267
2268             elif selector.type == MERGE:  # +
2269                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2270
2271                 def selector_function(ctx):
2272                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2273                         yield _merge(pair)
2274
2275             elif selector.type == SINGLE:  # atom
2276                 format_spec = selector.selector or 'best'
2277
2278                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2279                 if format_spec == 'all':
2280                     def selector_function(ctx):
2281                         yield from _check_formats(ctx['formats'][::-1])
2282                 elif format_spec == 'mergeall':
2283                     def selector_function(ctx):
2284                         formats = list(_check_formats(
2285                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2286                         if not formats:
2287                             return
2288                         merged_format = formats[-1]
2289                         for f in formats[-2::-1]:
2290                             merged_format = _merge((merged_format, f))
2291                         yield merged_format
2292
2293                 else:
2294                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2295                     mobj = re.match(
2296                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2297                         format_spec)
2298                     if mobj is not None:
2299                         format_idx = int_or_none(mobj.group('n'), default=1)
2300                         format_reverse = mobj.group('bw')[0] == 'b'
2301                         format_type = (mobj.group('type') or [None])[0]
2302                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2303                         format_modified = mobj.group('mod') is not None
2304
2305                         format_fallback = not format_type and not format_modified  # for b, w
2306                         _filter_f = (
2307                             (lambda f: f.get('%scodec' % format_type) != 'none')
2308                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2309                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2310                             if format_type  # bv, ba, wv, wa
2311                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2312                             if not format_modified  # b, w
2313                             else lambda f: True)  # b*, w*
2314                         filter_f = lambda f: _filter_f(f) and (
2315                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2316                     else:
2317                         if format_spec in self._format_selection_exts['audio']:
2318                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2319                         elif format_spec in self._format_selection_exts['video']:
2320                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2321                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2322                         elif format_spec in self._format_selection_exts['storyboards']:
2323                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2324                         else:
2325                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2326
2327                     def selector_function(ctx):
2328                         formats = list(ctx['formats'])
2329                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2330                         if not matches:
2331                             if format_fallback and ctx['incomplete_formats']:
2332                                 # for extractors with incomplete formats (audio only (soundcloud)
2333                                 # or video only (imgur)) best/worst will fallback to
2334                                 # best/worst {video,audio}-only format
2335                                 matches = formats
2336                             elif seperate_fallback and not ctx['has_merged_format']:
2337                                 # for compatibility with youtube-dl when there is no pre-merged format
2338                                 matches = list(filter(seperate_fallback, formats))
2339                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2340                         try:
2341                             yield matches[format_idx - 1]
2342                         except LazyList.IndexError:
2343                             return
2344
2345             filters = [self._build_format_filter(f) for f in selector.filters]
2346
2347             def final_selector(ctx):
2348                 ctx_copy = dict(ctx)
2349                 for _filter in filters:
2350                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2351                 return selector_function(ctx_copy)
2352             return final_selector
2353
2354         stream = io.BytesIO(format_spec.encode())
2355         try:
2356             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2357         except tokenize.TokenError:
2358             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2359
2360         class TokenIterator:
2361             def __init__(self, tokens):
2362                 self.tokens = tokens
2363                 self.counter = 0
2364
2365             def __iter__(self):
2366                 return self
2367
2368             def __next__(self):
2369                 if self.counter >= len(self.tokens):
2370                     raise StopIteration()
2371                 value = self.tokens[self.counter]
2372                 self.counter += 1
2373                 return value
2374
2375             next = __next__
2376
2377             def restore_last_token(self):
2378                 self.counter -= 1
2379
2380         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2381         return _build_selector_function(parsed_selector)
2382
2383     def _calc_headers(self, info_dict):
2384         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2385
2386         cookies = self._calc_cookies(info_dict['url'])
2387         if cookies:
2388             res['Cookie'] = cookies
2389
2390         if 'X-Forwarded-For' not in res:
2391             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2392             if x_forwarded_for_ip:
2393                 res['X-Forwarded-For'] = x_forwarded_for_ip
2394
2395         return res
2396
2397     def _calc_cookies(self, url):
2398         pr = sanitized_Request(url)
2399         self.cookiejar.add_cookie_header(pr)
2400         return pr.get_header('Cookie')
2401
2402     def _sort_thumbnails(self, thumbnails):
2403         thumbnails.sort(key=lambda t: (
2404             t.get('preference') if t.get('preference') is not None else -1,
2405             t.get('width') if t.get('width') is not None else -1,
2406             t.get('height') if t.get('height') is not None else -1,
2407             t.get('id') if t.get('id') is not None else '',
2408             t.get('url')))
2409
2410     def _sanitize_thumbnails(self, info_dict):
2411         thumbnails = info_dict.get('thumbnails')
2412         if thumbnails is None:
2413             thumbnail = info_dict.get('thumbnail')
2414             if thumbnail:
2415                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2416         if not thumbnails:
2417             return
2418
2419         def check_thumbnails(thumbnails):
2420             for t in thumbnails:
2421                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2422                 try:
2423                     self.urlopen(HEADRequest(t['url']))
2424                 except network_exceptions as err:
2425                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2426                     continue
2427                 yield t
2428
2429         self._sort_thumbnails(thumbnails)
2430         for i, t in enumerate(thumbnails):
2431             if t.get('id') is None:
2432                 t['id'] = '%d' % i
2433             if t.get('width') and t.get('height'):
2434                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2435             t['url'] = sanitize_url(t['url'])
2436
2437         if self.params.get('check_formats') is True:
2438             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2439         else:
2440             info_dict['thumbnails'] = thumbnails
2441
2442     def _fill_common_fields(self, info_dict, final=True):
2443         # TODO: move sanitization here
2444         if final:
2445             title = info_dict['fulltitle'] = info_dict.get('title')
2446             if not title:
2447                 if title == '':
2448                     self.write_debug('Extractor gave empty title. Creating a generic title')
2449                 else:
2450                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2451                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2452
2453         if info_dict.get('duration') is not None:
2454             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2455
2456         for ts_key, date_key in (
2457                 ('timestamp', 'upload_date'),
2458                 ('release_timestamp', 'release_date'),
2459                 ('modified_timestamp', 'modified_date'),
2460         ):
2461             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2462                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2463                 # see http://bugs.python.org/issue1646728)
2464                 with contextlib.suppress(ValueError, OverflowError, OSError):
2465                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2466                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2467
2468         live_keys = ('is_live', 'was_live')
2469         live_status = info_dict.get('live_status')
2470         if live_status is None:
2471             for key in live_keys:
2472                 if info_dict.get(key) is False:
2473                     continue
2474                 if info_dict.get(key):
2475                     live_status = key
2476                 break
2477             if all(info_dict.get(key) is False for key in live_keys):
2478                 live_status = 'not_live'
2479         if live_status:
2480             info_dict['live_status'] = live_status
2481             for key in live_keys:
2482                 if info_dict.get(key) is None:
2483                     info_dict[key] = (live_status == key)
2484         if live_status == 'post_live':
2485             info_dict['was_live'] = True
2486
2487         # Auto generate title fields corresponding to the *_number fields when missing
2488         # in order to always have clean titles. This is very common for TV series.
2489         for field in ('chapter', 'season', 'episode'):
2490             if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2491                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2492
2493     def _raise_pending_errors(self, info):
2494         err = info.pop('__pending_error', None)
2495         if err:
2496             self.report_error(err, tb=False)
2497
2498     def sort_formats(self, info_dict):
2499         formats = self._get_formats(info_dict)
2500         formats.sort(key=FormatSorter(
2501             self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2502
2503     def process_video_result(self, info_dict, download=True):
2504         assert info_dict.get('_type', 'video') == 'video'
2505         self._num_videos += 1
2506
2507         if 'id' not in info_dict:
2508             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2509         elif not info_dict.get('id'):
2510             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2511
2512         def report_force_conversion(field, field_not, conversion):
2513             self.report_warning(
2514                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2515                 % (field, field_not, conversion))
2516
2517         def sanitize_string_field(info, string_field):
2518             field = info.get(string_field)
2519             if field is None or isinstance(field, str):
2520                 return
2521             report_force_conversion(string_field, 'a string', 'string')
2522             info[string_field] = str(field)
2523
2524         def sanitize_numeric_fields(info):
2525             for numeric_field in self._NUMERIC_FIELDS:
2526                 field = info.get(numeric_field)
2527                 if field is None or isinstance(field, (int, float)):
2528                     continue
2529                 report_force_conversion(numeric_field, 'numeric', 'int')
2530                 info[numeric_field] = int_or_none(field)
2531
2532         sanitize_string_field(info_dict, 'id')
2533         sanitize_numeric_fields(info_dict)
2534         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2535             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2536         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2537             self.report_warning('"duration" field is negative, there is an error in extractor')
2538
2539         chapters = info_dict.get('chapters') or []
2540         if chapters and chapters[0].get('start_time'):
2541             chapters.insert(0, {'start_time': 0})
2542
2543         dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2544         for idx, (prev, current, next_) in enumerate(zip(
2545                 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2546             if current.get('start_time') is None:
2547                 current['start_time'] = prev.get('end_time')
2548             if not current.get('end_time'):
2549                 current['end_time'] = next_.get('start_time')
2550             if not current.get('title'):
2551                 current['title'] = f'<Untitled Chapter {idx}>'
2552
2553         if 'playlist' not in info_dict:
2554             # It isn't part of a playlist
2555             info_dict['playlist'] = None
2556             info_dict['playlist_index'] = None
2557
2558         self._sanitize_thumbnails(info_dict)
2559
2560         thumbnail = info_dict.get('thumbnail')
2561         thumbnails = info_dict.get('thumbnails')
2562         if thumbnail:
2563             info_dict['thumbnail'] = sanitize_url(thumbnail)
2564         elif thumbnails:
2565             info_dict['thumbnail'] = thumbnails[-1]['url']
2566
2567         if info_dict.get('display_id') is None and 'id' in info_dict:
2568             info_dict['display_id'] = info_dict['id']
2569
2570         self._fill_common_fields(info_dict)
2571
2572         for cc_kind in ('subtitles', 'automatic_captions'):
2573             cc = info_dict.get(cc_kind)
2574             if cc:
2575                 for _, subtitle in cc.items():
2576                     for subtitle_format in subtitle:
2577                         if subtitle_format.get('url'):
2578                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2579                         if subtitle_format.get('ext') is None:
2580                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2581
2582         automatic_captions = info_dict.get('automatic_captions')
2583         subtitles = info_dict.get('subtitles')
2584
2585         info_dict['requested_subtitles'] = self.process_subtitles(
2586             info_dict['id'], subtitles, automatic_captions)
2587
2588         formats = self._get_formats(info_dict)
2589
2590         # Backward compatibility with InfoExtractor._sort_formats
2591         field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2592         if field_preference:
2593             info_dict['_format_sort_fields'] = field_preference
2594
2595         # or None ensures --clean-infojson removes it
2596         info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
2597         if not self.params.get('allow_unplayable_formats'):
2598             formats = [f for f in formats if not f.get('has_drm')]
2599
2600         if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2601             self.report_warning(
2602                 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2603                 'only images are available for download. Use --list-formats to see them'.capitalize())
2604
2605         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2606         if not get_from_start:
2607             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2608         if info_dict.get('is_live') and formats:
2609             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2610             if get_from_start and not formats:
2611                 self.raise_no_formats(info_dict, msg=(
2612                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2613                     'If you want to download from the current time, use --no-live-from-start'))
2614
2615         def is_wellformed(f):
2616             url = f.get('url')
2617             if not url:
2618                 self.report_warning(
2619                     '"url" field is missing or empty - skipping format, '
2620                     'there is an error in extractor')
2621                 return False
2622             if isinstance(url, bytes):
2623                 sanitize_string_field(f, 'url')
2624             return True
2625
2626         # Filter out malformed formats for better extraction robustness
2627         formats = list(filter(is_wellformed, formats or []))
2628
2629         if not formats:
2630             self.raise_no_formats(info_dict)
2631
2632         for format in formats:
2633             sanitize_string_field(format, 'format_id')
2634             sanitize_numeric_fields(format)
2635             format['url'] = sanitize_url(format['url'])
2636             if format.get('ext') is None:
2637                 format['ext'] = determine_ext(format['url']).lower()
2638             if format.get('protocol') is None:
2639                 format['protocol'] = determine_protocol(format)
2640             if format.get('resolution') is None:
2641                 format['resolution'] = self.format_resolution(format, default=None)
2642             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2643                 format['dynamic_range'] = 'SDR'
2644             if format.get('aspect_ratio') is None:
2645                 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2646             if (info_dict.get('duration') and format.get('tbr')
2647                     and not format.get('filesize') and not format.get('filesize_approx')):
2648                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2649             format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
2650
2651         # This is copied to http_headers by the above _calc_headers and can now be removed
2652         if '__x_forwarded_for_ip' in info_dict:
2653             del info_dict['__x_forwarded_for_ip']
2654
2655         self.sort_formats({
2656             'formats': formats,
2657             '_format_sort_fields': info_dict.get('_format_sort_fields')
2658         })
2659
2660         # Sanitize and group by format_id
2661         formats_dict = {}
2662         for i, format in enumerate(formats):
2663             if not format.get('format_id'):
2664                 format['format_id'] = str(i)
2665             else:
2666                 # Sanitize format_id from characters used in format selector expression
2667                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2668             formats_dict.setdefault(format['format_id'], []).append(format)
2669
2670         # Make sure all formats have unique format_id
2671         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2672         for format_id, ambiguous_formats in formats_dict.items():
2673             ambigious_id = len(ambiguous_formats) > 1
2674             for i, format in enumerate(ambiguous_formats):
2675                 if ambigious_id:
2676                     format['format_id'] = '%s-%d' % (format_id, i)
2677                 # Ensure there is no conflict between id and ext in format selection
2678                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2679                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2680                     format['format_id'] = 'f%s' % format['format_id']
2681
2682                 if format.get('format') is None:
2683                     format['format'] = '{id} - {res}{note}'.format(
2684                         id=format['format_id'],
2685                         res=self.format_resolution(format),
2686                         note=format_field(format, 'format_note', ' (%s)'),
2687                     )
2688
2689         if self.params.get('check_formats') is True:
2690             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2691
2692         if not formats or formats[0] is not info_dict:
2693             # only set the 'formats' fields if the original info_dict list them
2694             # otherwise we end up with a circular reference, the first (and unique)
2695             # element in the 'formats' field in info_dict is info_dict itself,
2696             # which can't be exported to json
2697             info_dict['formats'] = formats
2698
2699         info_dict, _ = self.pre_process(info_dict)
2700
2701         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2702             return info_dict
2703
2704         self.post_extract(info_dict)
2705         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2706
2707         # The pre-processors may have modified the formats
2708         formats = self._get_formats(info_dict)
2709
2710         list_only = self.params.get('simulate') == 'list_only'
2711         interactive_format_selection = not list_only and self.format_selector == '-'
2712         if self.params.get('list_thumbnails'):
2713             self.list_thumbnails(info_dict)
2714         if self.params.get('listsubtitles'):
2715             if 'automatic_captions' in info_dict:
2716                 self.list_subtitles(
2717                     info_dict['id'], automatic_captions, 'automatic captions')
2718             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2719         if self.params.get('listformats') or interactive_format_selection:
2720             self.list_formats(info_dict)
2721         if list_only:
2722             # Without this printing, -F --print-json will not work
2723             self.__forced_printings(info_dict)
2724             return info_dict
2725
2726         format_selector = self.format_selector
2727         if format_selector is None:
2728             req_format = self._default_format_spec(info_dict, download=download)
2729             self.write_debug('Default format spec: %s' % req_format)
2730             format_selector = self.build_format_selector(req_format)
2731
2732         while True:
2733             if interactive_format_selection:
2734                 req_format = input(
2735                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2736                 try:
2737                     format_selector = self.build_format_selector(req_format)
2738                 except SyntaxError as err:
2739                     self.report_error(err, tb=False, is_error=False)
2740                     continue
2741
2742             formats_to_download = list(format_selector({
2743                 'formats': formats,
2744                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2745                 'incomplete_formats': (
2746                     # All formats are video-only or
2747                     all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2748                     # all formats are audio-only
2749                     or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2750             }))
2751             if interactive_format_selection and not formats_to_download:
2752                 self.report_error('Requested format is not available', tb=False, is_error=False)
2753                 continue
2754             break
2755
2756         if not formats_to_download:
2757             if not self.params.get('ignore_no_formats_error'):
2758                 raise ExtractorError(
2759                     'Requested format is not available. Use --list-formats for a list of available formats',
2760                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2761             self.report_warning('Requested format is not available')
2762             # Process what we can, even without any available formats.
2763             formats_to_download = [{}]
2764
2765         requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2766         best_format, downloaded_formats = formats_to_download[-1], []
2767         if download:
2768             if best_format and requested_ranges:
2769                 def to_screen(*msg):
2770                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2771
2772                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2773                           (f['format_id'] for f in formats_to_download))
2774                 if requested_ranges != ({}, ):
2775                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2776                               (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2777             max_downloads_reached = False
2778
2779             for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2780                 new_info = self._copy_infodict(info_dict)
2781                 new_info.update(fmt)
2782                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2783                 end_time = offset + min(chapter.get('end_time', duration), duration)
2784                 if chapter or offset:
2785                     new_info.update({
2786                         'section_start': offset + chapter.get('start_time', 0),
2787                         # duration may not be accurate. So allow deviations <1sec
2788                         'section_end': end_time if end_time <= offset + duration + 1 else None,
2789                         'section_title': chapter.get('title'),
2790                         'section_number': chapter.get('index'),
2791                     })
2792                 downloaded_formats.append(new_info)
2793                 try:
2794                     self.process_info(new_info)
2795                 except MaxDownloadsReached:
2796                     max_downloads_reached = True
2797                 self._raise_pending_errors(new_info)
2798                 # Remove copied info
2799                 for key, val in tuple(new_info.items()):
2800                     if info_dict.get(key) == val:
2801                         new_info.pop(key)
2802                 if max_downloads_reached:
2803                     break
2804
2805             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2806             assert write_archive.issubset({True, False, 'ignore'})
2807             if True in write_archive and False not in write_archive:
2808                 self.record_download_archive(info_dict)
2809
2810             info_dict['requested_downloads'] = downloaded_formats
2811             info_dict = self.run_all_pps('after_video', info_dict)
2812             if max_downloads_reached:
2813                 raise MaxDownloadsReached()
2814
2815         # We update the info dict with the selected best quality format (backwards compatibility)
2816         info_dict.update(best_format)
2817         return info_dict
2818
2819     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2820         """Select the requested subtitles and their format"""
2821         available_subs, normal_sub_langs = {}, []
2822         if normal_subtitles and self.params.get('writesubtitles'):
2823             available_subs.update(normal_subtitles)
2824             normal_sub_langs = tuple(normal_subtitles.keys())
2825         if automatic_captions and self.params.get('writeautomaticsub'):
2826             for lang, cap_info in automatic_captions.items():
2827                 if lang not in available_subs:
2828                     available_subs[lang] = cap_info
2829
2830         if not available_subs or (
2831                 not self.params.get('writesubtitles')
2832                 and not self.params.get('writeautomaticsub')):
2833             return None
2834
2835         all_sub_langs = tuple(available_subs.keys())
2836         if self.params.get('allsubtitles', False):
2837             requested_langs = all_sub_langs
2838         elif self.params.get('subtitleslangs', False):
2839             try:
2840                 requested_langs = orderedSet_from_options(
2841                     self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2842             except re.error as e:
2843                 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2844         else:
2845             requested_langs = LazyList(itertools.chain(
2846                 ['en'] if 'en' in normal_sub_langs else [],
2847                 filter(lambda f: f.startswith('en'), normal_sub_langs),
2848                 ['en'] if 'en' in all_sub_langs else [],
2849                 filter(lambda f: f.startswith('en'), all_sub_langs),
2850                 normal_sub_langs, all_sub_langs,
2851             ))[:1]
2852         if requested_langs:
2853             self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2854
2855         formats_query = self.params.get('subtitlesformat', 'best')
2856         formats_preference = formats_query.split('/') if formats_query else []
2857         subs = {}
2858         for lang in requested_langs:
2859             formats = available_subs.get(lang)
2860             if formats is None:
2861                 self.report_warning(f'{lang} subtitles not available for {video_id}')
2862                 continue
2863             for ext in formats_preference:
2864                 if ext == 'best':
2865                     f = formats[-1]
2866                     break
2867                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2868                 if matches:
2869                     f = matches[-1]
2870                     break
2871             else:
2872                 f = formats[-1]
2873                 self.report_warning(
2874                     'No subtitle format found matching "%s" for language %s, '
2875                     'using %s' % (formats_query, lang, f['ext']))
2876             subs[lang] = f
2877         return subs
2878
2879     def _forceprint(self, key, info_dict):
2880         if info_dict is None:
2881             return
2882         info_copy = info_dict.copy()
2883         info_copy.setdefault('filename', self.prepare_filename(info_dict))
2884         if info_dict.get('requested_formats') is not None:
2885             # For RTMP URLs, also include the playpath
2886             info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2887         elif info_dict.get('url'):
2888             info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2889         info_copy['formats_table'] = self.render_formats_table(info_dict)
2890         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2891         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2892         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2893
2894         def format_tmpl(tmpl):
2895             mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
2896             if not mobj:
2897                 return tmpl
2898
2899             fmt = '%({})s'
2900             if tmpl.startswith('{'):
2901                 tmpl, fmt = f'.{tmpl}', '%({})j'
2902             if tmpl.endswith('='):
2903                 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
2904             return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
2905
2906         for tmpl in self.params['forceprint'].get(key, []):
2907             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2908
2909         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2910             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2911             tmpl = format_tmpl(tmpl)
2912             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2913             if self._ensure_dir_exists(filename):
2914                 with open(filename, 'a', encoding='utf-8', newline='') as f:
2915                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
2916
2917         return info_copy
2918
2919     def __forced_printings(self, info_dict, filename=None, incomplete=True):
2920         if (self.params.get('forcejson')
2921                 or self.params['forceprint'].get('video')
2922                 or self.params['print_to_file'].get('video')):
2923             self.post_extract(info_dict)
2924         if filename:
2925             info_dict['filename'] = filename
2926         info_copy = self._forceprint('video', info_dict)
2927
2928         def print_field(field, actual_field=None, optional=False):
2929             if actual_field is None:
2930                 actual_field = field
2931             if self.params.get(f'force{field}') and (
2932                     info_copy.get(field) is not None or (not optional and not incomplete)):
2933                 self.to_stdout(info_copy[actual_field])
2934
2935         print_field('title')
2936         print_field('id')
2937         print_field('url', 'urls')
2938         print_field('thumbnail', optional=True)
2939         print_field('description', optional=True)
2940         if filename:
2941             print_field('filename')
2942         if self.params.get('forceduration') and info_copy.get('duration') is not None:
2943             self.to_stdout(formatSeconds(info_copy['duration']))
2944         print_field('format')
2945
2946         if self.params.get('forcejson'):
2947             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2948
2949     def dl(self, name, info, subtitle=False, test=False):
2950         if not info.get('url'):
2951             self.raise_no_formats(info, True)
2952
2953         if test:
2954             verbose = self.params.get('verbose')
2955             params = {
2956                 'test': True,
2957                 'quiet': self.params.get('quiet') or not verbose,
2958                 'verbose': verbose,
2959                 'noprogress': not verbose,
2960                 'nopart': True,
2961                 'skip_unavailable_fragments': False,
2962                 'keep_fragments': False,
2963                 'overwrites': True,
2964                 '_no_ytdl_file': True,
2965             }
2966         else:
2967             params = self.params
2968         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2969         if not test:
2970             for ph in self._progress_hooks:
2971                 fd.add_progress_hook(ph)
2972             urls = '", "'.join(
2973                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2974                 for f in info.get('requested_formats', []) or [info])
2975             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
2976
2977         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2978         # But it may contain objects that are not deep-copyable
2979         new_info = self._copy_infodict(info)
2980         if new_info.get('http_headers') is None:
2981             new_info['http_headers'] = self._calc_headers(new_info)
2982         return fd.download(name, new_info, subtitle)
2983
2984     def existing_file(self, filepaths, *, default_overwrite=True):
2985         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2986         if existing_files and not self.params.get('overwrites', default_overwrite):
2987             return existing_files[0]
2988
2989         for file in existing_files:
2990             self.report_file_delete(file)
2991             os.remove(file)
2992         return None
2993
2994     def process_info(self, info_dict):
2995         """Process a single resolved IE result. (Modifies it in-place)"""
2996
2997         assert info_dict.get('_type', 'video') == 'video'
2998         original_infodict = info_dict
2999
3000         if 'format' not in info_dict and 'ext' in info_dict:
3001             info_dict['format'] = info_dict['ext']
3002
3003         if self._match_entry(info_dict) is not None:
3004             info_dict['__write_download_archive'] = 'ignore'
3005             return
3006
3007         # Does nothing under normal operation - for backward compatibility of process_info
3008         self.post_extract(info_dict)
3009
3010         def replace_info_dict(new_info):
3011             nonlocal info_dict
3012             if new_info == info_dict:
3013                 return
3014             info_dict.clear()
3015             info_dict.update(new_info)
3016
3017         new_info, _ = self.pre_process(info_dict, 'video')
3018         replace_info_dict(new_info)
3019         self._num_downloads += 1
3020
3021         # info_dict['_filename'] needs to be set for backward compatibility
3022         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3023         temp_filename = self.prepare_filename(info_dict, 'temp')
3024         files_to_move = {}
3025
3026         # Forced printings
3027         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3028
3029         def check_max_downloads():
3030             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3031                 raise MaxDownloadsReached()
3032
3033         if self.params.get('simulate'):
3034             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3035             check_max_downloads()
3036             return
3037
3038         if full_filename is None:
3039             return
3040         if not self._ensure_dir_exists(encodeFilename(full_filename)):
3041             return
3042         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3043             return
3044
3045         if self._write_description('video', info_dict,
3046                                    self.prepare_filename(info_dict, 'description')) is None:
3047             return
3048
3049         sub_files = self._write_subtitles(info_dict, temp_filename)
3050         if sub_files is None:
3051             return
3052         files_to_move.update(dict(sub_files))
3053
3054         thumb_files = self._write_thumbnails(
3055             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3056         if thumb_files is None:
3057             return
3058         files_to_move.update(dict(thumb_files))
3059
3060         infofn = self.prepare_filename(info_dict, 'infojson')
3061         _infojson_written = self._write_info_json('video', info_dict, infofn)
3062         if _infojson_written:
3063             info_dict['infojson_filename'] = infofn
3064             # For backward compatibility, even though it was a private field
3065             info_dict['__infojson_filename'] = infofn
3066         elif _infojson_written is None:
3067             return
3068
3069         # Note: Annotations are deprecated
3070         annofn = None
3071         if self.params.get('writeannotations', False):
3072             annofn = self.prepare_filename(info_dict, 'annotation')
3073         if annofn:
3074             if not self._ensure_dir_exists(encodeFilename(annofn)):
3075                 return
3076             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3077                 self.to_screen('[info] Video annotations are already present')
3078             elif not info_dict.get('annotations'):
3079                 self.report_warning('There are no annotations to write.')
3080             else:
3081                 try:
3082                     self.to_screen('[info] Writing video annotations to: ' + annofn)
3083                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3084                         annofile.write(info_dict['annotations'])
3085                 except (KeyError, TypeError):
3086                     self.report_warning('There are no annotations to write.')
3087                 except OSError:
3088                     self.report_error('Cannot write annotations file: ' + annofn)
3089                     return
3090
3091         # Write internet shortcut files
3092         def _write_link_file(link_type):
3093             url = try_get(info_dict['webpage_url'], iri_to_uri)
3094             if not url:
3095                 self.report_warning(
3096                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3097                 return True
3098             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3099             if not self._ensure_dir_exists(encodeFilename(linkfn)):
3100                 return False
3101             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3102                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3103                 return True
3104             try:
3105                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3106                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3107                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3108                     template_vars = {'url': url}
3109                     if link_type == 'desktop':
3110                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3111                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3112             except OSError:
3113                 self.report_error(f'Cannot write internet shortcut {linkfn}')
3114                 return False
3115             return True
3116
3117         write_links = {
3118             'url': self.params.get('writeurllink'),
3119             'webloc': self.params.get('writewebloclink'),
3120             'desktop': self.params.get('writedesktoplink'),
3121         }
3122         if self.params.get('writelink'):
3123             link_type = ('webloc' if sys.platform == 'darwin'
3124                          else 'desktop' if sys.platform.startswith('linux')
3125                          else 'url')
3126             write_links[link_type] = True
3127
3128         if any(should_write and not _write_link_file(link_type)
3129                for link_type, should_write in write_links.items()):
3130             return
3131
3132         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3133         replace_info_dict(new_info)
3134
3135         if self.params.get('skip_download'):
3136             info_dict['filepath'] = temp_filename
3137             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3138             info_dict['__files_to_move'] = files_to_move
3139             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3140             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3141         else:
3142             # Download
3143             info_dict.setdefault('__postprocessors', [])
3144             try:
3145
3146                 def existing_video_file(*filepaths):
3147                     ext = info_dict.get('ext')
3148                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3149                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3150                                               default_overwrite=False)
3151                     if file:
3152                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3153                     return file
3154
3155                 fd, success = None, True
3156                 if info_dict.get('protocol') or info_dict.get('url'):
3157                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3158                     if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3159                             info_dict.get('section_start') or info_dict.get('section_end')):
3160                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3161                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
3162                         self.report_error(f'{msg}. Aborting')
3163                         return
3164
3165                 if info_dict.get('requested_formats') is not None:
3166                     requested_formats = info_dict['requested_formats']
3167                     old_ext = info_dict['ext']
3168                     if self.params.get('merge_output_format') is None:
3169                         if (info_dict['ext'] == 'webm'
3170                                 and info_dict.get('thumbnails')
3171                                 # check with type instead of pp_key, __name__, or isinstance
3172                                 # since we dont want any custom PPs to trigger this
3173                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3174                             info_dict['ext'] = 'mkv'
3175                             self.report_warning(
3176                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3177                     new_ext = info_dict['ext']
3178
3179                     def correct_ext(filename, ext=new_ext):
3180                         if filename == '-':
3181                             return filename
3182                         filename_real_ext = os.path.splitext(filename)[1][1:]
3183                         filename_wo_ext = (
3184                             os.path.splitext(filename)[0]
3185                             if filename_real_ext in (old_ext, new_ext)
3186                             else filename)
3187                         return f'{filename_wo_ext}.{ext}'
3188
3189                     # Ensure filename always has a correct extension for successful merge
3190                     full_filename = correct_ext(full_filename)
3191                     temp_filename = correct_ext(temp_filename)
3192                     dl_filename = existing_video_file(full_filename, temp_filename)
3193                     info_dict['__real_download'] = False
3194
3195                     merger = FFmpegMergerPP(self)
3196                     downloaded = []
3197                     if dl_filename is not None:
3198                         self.report_file_already_downloaded(dl_filename)
3199                     elif fd:
3200                         for f in requested_formats if fd != FFmpegFD else []:
3201                             f['filepath'] = fname = prepend_extension(
3202                                 correct_ext(temp_filename, info_dict['ext']),
3203                                 'f%s' % f['format_id'], info_dict['ext'])
3204                             downloaded.append(fname)
3205                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3206                         success, real_download = self.dl(temp_filename, info_dict)
3207                         info_dict['__real_download'] = real_download
3208                     else:
3209                         if self.params.get('allow_unplayable_formats'):
3210                             self.report_warning(
3211                                 'You have requested merging of multiple formats '
3212                                 'while also allowing unplayable formats to be downloaded. '
3213                                 'The formats won\'t be merged to prevent data corruption.')
3214                         elif not merger.available:
3215                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3216                             if not self.params.get('ignoreerrors'):
3217                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3218                                 return
3219                             self.report_warning(f'{msg}. The formats won\'t be merged')
3220
3221                         if temp_filename == '-':
3222                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3223                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3224                                       else 'but ffmpeg is not installed')
3225                             self.report_warning(
3226                                 f'You have requested downloading multiple formats to stdout {reason}. '
3227                                 'The formats will be streamed one after the other')
3228                             fname = temp_filename
3229                         for f in requested_formats:
3230                             new_info = dict(info_dict)
3231                             del new_info['requested_formats']
3232                             new_info.update(f)
3233                             if temp_filename != '-':
3234                                 fname = prepend_extension(
3235                                     correct_ext(temp_filename, new_info['ext']),
3236                                     'f%s' % f['format_id'], new_info['ext'])
3237                                 if not self._ensure_dir_exists(fname):
3238                                     return
3239                                 f['filepath'] = fname
3240                                 downloaded.append(fname)
3241                             partial_success, real_download = self.dl(fname, new_info)
3242                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3243                             success = success and partial_success
3244
3245                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3246                         info_dict['__postprocessors'].append(merger)
3247                         info_dict['__files_to_merge'] = downloaded
3248                         # Even if there were no downloads, it is being merged only now
3249                         info_dict['__real_download'] = True
3250                     else:
3251                         for file in downloaded:
3252                             files_to_move[file] = None
3253                 else:
3254                     # Just a single file
3255                     dl_filename = existing_video_file(full_filename, temp_filename)
3256                     if dl_filename is None or dl_filename == temp_filename:
3257                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3258                         # So we should try to resume the download
3259                         success, real_download = self.dl(temp_filename, info_dict)
3260                         info_dict['__real_download'] = real_download
3261                     else:
3262                         self.report_file_already_downloaded(dl_filename)
3263
3264                 dl_filename = dl_filename or temp_filename
3265                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3266
3267             except network_exceptions as err:
3268                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3269                 return
3270             except OSError as err:
3271                 raise UnavailableVideoError(err)
3272             except (ContentTooShortError, ) as err:
3273                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3274                 return
3275
3276             self._raise_pending_errors(info_dict)
3277             if success and full_filename != '-':
3278
3279                 def fixup():
3280                     do_fixup = True
3281                     fixup_policy = self.params.get('fixup')
3282                     vid = info_dict['id']
3283
3284                     if fixup_policy in ('ignore', 'never'):
3285                         return
3286                     elif fixup_policy == 'warn':
3287                         do_fixup = 'warn'
3288                     elif fixup_policy != 'force':
3289                         assert fixup_policy in ('detect_or_warn', None)
3290                         if not info_dict.get('__real_download'):
3291                             do_fixup = False
3292
3293                     def ffmpeg_fixup(cndn, msg, cls):
3294                         if not (do_fixup and cndn):
3295                             return
3296                         elif do_fixup == 'warn':
3297                             self.report_warning(f'{vid}: {msg}')
3298                             return
3299                         pp = cls(self)
3300                         if pp.available:
3301                             info_dict['__postprocessors'].append(pp)
3302                         else:
3303                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3304
3305                     stretched_ratio = info_dict.get('stretched_ratio')
3306                     ffmpeg_fixup(stretched_ratio not in (1, None),
3307                                  f'Non-uniform pixel ratio {stretched_ratio}',
3308                                  FFmpegFixupStretchedPP)
3309
3310                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3311                     downloader = downloader.FD_NAME if downloader else None
3312
3313                     ext = info_dict.get('ext')
3314                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3315                         isinstance(pp, FFmpegVideoConvertorPP)
3316                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3317                     ) for pp in self._pps['post_process'])
3318
3319                     if not postprocessed_by_ffmpeg:
3320                         ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3321                                      'writing DASH m4a. Only some players support this container',
3322                                      FFmpegFixupM4aPP)
3323                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3324                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3325                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3326                                      FFmpegFixupM3u8PP)
3327                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
3328                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3329
3330                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3331                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3332
3333                 fixup()
3334                 try:
3335                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3336                 except PostProcessingError as err:
3337                     self.report_error('Postprocessing: %s' % str(err))
3338                     return
3339                 try:
3340                     for ph in self._post_hooks:
3341                         ph(info_dict['filepath'])
3342                 except Exception as err:
3343                     self.report_error('post hooks: %s' % str(err))
3344                     return
3345                 info_dict['__write_download_archive'] = True
3346
3347         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3348         if self.params.get('force_write_download_archive'):
3349             info_dict['__write_download_archive'] = True
3350         check_max_downloads()
3351
3352     def __download_wrapper(self, func):
3353         @functools.wraps(func)
3354         def wrapper(*args, **kwargs):
3355             try:
3356                 res = func(*args, **kwargs)
3357             except UnavailableVideoError as e:
3358                 self.report_error(e)
3359             except DownloadCancelled as e:
3360                 self.to_screen(f'[info] {e}')
3361                 if not self.params.get('break_per_url'):
3362                     raise
3363                 self._num_downloads = 0
3364             else:
3365                 if self.params.get('dump_single_json', False):
3366                     self.post_extract(res)
3367                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3368         return wrapper
3369
3370     def download(self, url_list):
3371         """Download a given list of URLs."""
3372         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3373         outtmpl = self.params['outtmpl']['default']
3374         if (len(url_list) > 1
3375                 and outtmpl != '-'
3376                 and '%' not in outtmpl
3377                 and self.params.get('max_downloads') != 1):
3378             raise SameFileError(outtmpl)
3379
3380         for url in url_list:
3381             self.__download_wrapper(self.extract_info)(
3382                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3383
3384         return self._download_retcode
3385
3386     def download_with_info_file(self, info_filename):
3387         with contextlib.closing(fileinput.FileInput(
3388                 [info_filename], mode='r',
3389                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3390             # FileInput doesn't have a read method, we can't call json.load
3391             infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3392                      for info in variadic(json.loads('\n'.join(f)))]
3393         for info in infos:
3394             try:
3395                 self.__download_wrapper(self.process_ie_result)(info, download=True)
3396             except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3397                 if not isinstance(e, EntryNotInPlaylist):
3398                     self.to_stderr('\r')
3399                 webpage_url = info.get('webpage_url')
3400                 if webpage_url is None:
3401                     raise
3402                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3403                 self.download([webpage_url])
3404         return self._download_retcode
3405
3406     @staticmethod
3407     def sanitize_info(info_dict, remove_private_keys=False):
3408         ''' Sanitize the infodict for converting to json '''
3409         if info_dict is None:
3410             return info_dict
3411         info_dict.setdefault('epoch', int(time.time()))
3412         info_dict.setdefault('_type', 'video')
3413         info_dict.setdefault('_version', {
3414             'version': __version__,
3415             'current_git_head': current_git_head(),
3416             'release_git_head': RELEASE_GIT_HEAD,
3417             'repository': REPOSITORY,
3418         })
3419
3420         if remove_private_keys:
3421             reject = lambda k, v: v is None or k.startswith('__') or k in {
3422                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3423                 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3424                 'playlist_autonumber', '_format_sort_fields',
3425             }
3426         else:
3427             reject = lambda k, v: False
3428
3429         def filter_fn(obj):
3430             if isinstance(obj, dict):
3431                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3432             elif isinstance(obj, (list, tuple, set, LazyList)):
3433                 return list(map(filter_fn, obj))
3434             elif obj is None or isinstance(obj, (str, int, float, bool)):
3435                 return obj
3436             else:
3437                 return repr(obj)
3438
3439         return filter_fn(info_dict)
3440
3441     @staticmethod
3442     def filter_requested_info(info_dict, actually_filter=True):
3443         ''' Alias of sanitize_info for backward compatibility '''
3444         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3445
3446     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3447         for filename in set(filter(None, files_to_delete)):
3448             if msg:
3449                 self.to_screen(msg % filename)
3450             try:
3451                 os.remove(filename)
3452             except OSError:
3453                 self.report_warning(f'Unable to delete file {filename}')
3454             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3455                 del info['__files_to_move'][filename]
3456
3457     @staticmethod
3458     def post_extract(info_dict):
3459         def actual_post_extract(info_dict):
3460             if info_dict.get('_type') in ('playlist', 'multi_video'):
3461                 for video_dict in info_dict.get('entries', {}):
3462                     actual_post_extract(video_dict or {})
3463                 return
3464
3465             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3466             info_dict.update(post_extractor())
3467
3468         actual_post_extract(info_dict or {})
3469
3470     def run_pp(self, pp, infodict):
3471         files_to_delete = []
3472         if '__files_to_move' not in infodict:
3473             infodict['__files_to_move'] = {}
3474         try:
3475             files_to_delete, infodict = pp.run(infodict)
3476         except PostProcessingError as e:
3477             # Must be True and not 'only_download'
3478             if self.params.get('ignoreerrors') is True:
3479                 self.report_error(e)
3480                 return infodict
3481             raise
3482
3483         if not files_to_delete:
3484             return infodict
3485         if self.params.get('keepvideo', False):
3486             for f in files_to_delete:
3487                 infodict['__files_to_move'].setdefault(f, '')
3488         else:
3489             self._delete_downloaded_files(
3490                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3491         return infodict
3492
3493     def run_all_pps(self, key, info, *, additional_pps=None):
3494         if key != 'video':
3495             self._forceprint(key, info)
3496         for pp in (additional_pps or []) + self._pps[key]:
3497             info = self.run_pp(pp, info)
3498         return info
3499
3500     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3501         info = dict(ie_info)
3502         info['__files_to_move'] = files_to_move or {}
3503         try:
3504             info = self.run_all_pps(key, info)
3505         except PostProcessingError as err:
3506             msg = f'Preprocessing: {err}'
3507             info.setdefault('__pending_error', msg)
3508             self.report_error(msg, is_error=False)
3509         return info, info.pop('__files_to_move', None)
3510
3511     def post_process(self, filename, info, files_to_move=None):
3512         """Run all the postprocessors on the given file."""
3513         info['filepath'] = filename
3514         info['__files_to_move'] = files_to_move or {}
3515         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3516         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3517         del info['__files_to_move']
3518         return self.run_all_pps('after_move', info)
3519
3520     def _make_archive_id(self, info_dict):
3521         video_id = info_dict.get('id')
3522         if not video_id:
3523             return
3524         # Future-proof against any change in case
3525         # and backwards compatibility with prior versions
3526         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3527         if extractor is None:
3528             url = str_or_none(info_dict.get('url'))
3529             if not url:
3530                 return
3531             # Try to find matching extractor for the URL and take its ie_key
3532             for ie_key, ie in self._ies.items():
3533                 if ie.suitable(url):
3534                     extractor = ie_key
3535                     break
3536             else:
3537                 return
3538         return make_archive_id(extractor, video_id)
3539
3540     def in_download_archive(self, info_dict):
3541         if not self.archive:
3542             return False
3543
3544         vid_ids = [self._make_archive_id(info_dict)]
3545         vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3546         return any(id_ in self.archive for id_ in vid_ids)
3547
3548     def record_download_archive(self, info_dict):
3549         fn = self.params.get('download_archive')
3550         if fn is None:
3551             return
3552         vid_id = self._make_archive_id(info_dict)
3553         assert vid_id
3554
3555         self.write_debug(f'Adding to archive: {vid_id}')
3556         if is_path_like(fn):
3557             with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3558                 archive_file.write(vid_id + '\n')
3559         self.archive.add(vid_id)
3560
3561     @staticmethod
3562     def format_resolution(format, default='unknown'):
3563         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3564             return 'audio only'
3565         if format.get('resolution') is not None:
3566             return format['resolution']
3567         if format.get('width') and format.get('height'):
3568             return '%dx%d' % (format['width'], format['height'])
3569         elif format.get('height'):
3570             return '%sp' % format['height']
3571         elif format.get('width'):
3572             return '%dx?' % format['width']
3573         return default
3574
3575     def _list_format_headers(self, *headers):
3576         if self.params.get('listformats_table', True) is not False:
3577             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3578         return headers
3579
3580     def _format_note(self, fdict):
3581         res = ''
3582         if fdict.get('ext') in ['f4f', 'f4m']:
3583             res += '(unsupported)'
3584         if fdict.get('language'):
3585             if res:
3586                 res += ' '
3587             res += '[%s]' % fdict['language']
3588         if fdict.get('format_note') is not None:
3589             if res:
3590                 res += ' '
3591             res += fdict['format_note']
3592         if fdict.get('tbr') is not None:
3593             if res:
3594                 res += ', '
3595             res += '%4dk' % fdict['tbr']
3596         if fdict.get('container') is not None:
3597             if res:
3598                 res += ', '
3599             res += '%s container' % fdict['container']
3600         if (fdict.get('vcodec') is not None
3601                 and fdict.get('vcodec') != 'none'):
3602             if res:
3603                 res += ', '
3604             res += fdict['vcodec']
3605             if fdict.get('vbr') is not None:
3606                 res += '@'
3607         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3608             res += 'video@'
3609         if fdict.get('vbr') is not None:
3610             res += '%4dk' % fdict['vbr']
3611         if fdict.get('fps') is not None:
3612             if res:
3613                 res += ', '
3614             res += '%sfps' % fdict['fps']
3615         if fdict.get('acodec') is not None:
3616             if res:
3617                 res += ', '
3618             if fdict['acodec'] == 'none':
3619                 res += 'video only'
3620             else:
3621                 res += '%-5s' % fdict['acodec']
3622         elif fdict.get('abr') is not None:
3623             if res:
3624                 res += ', '
3625             res += 'audio'
3626         if fdict.get('abr') is not None:
3627             res += '@%3dk' % fdict['abr']
3628         if fdict.get('asr') is not None:
3629             res += ' (%5dHz)' % fdict['asr']
3630         if fdict.get('filesize') is not None:
3631             if res:
3632                 res += ', '
3633             res += format_bytes(fdict['filesize'])
3634         elif fdict.get('filesize_approx') is not None:
3635             if res:
3636                 res += ', '
3637             res += '~' + format_bytes(fdict['filesize_approx'])
3638         return res
3639
3640     def _get_formats(self, info_dict):
3641         if info_dict.get('formats') is None:
3642             if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3643                 return [info_dict]
3644             return []
3645         return info_dict['formats']
3646
3647     def render_formats_table(self, info_dict):
3648         formats = self._get_formats(info_dict)
3649         if not formats:
3650             return
3651         if not self.params.get('listformats_table', True) is not False:
3652             table = [
3653                 [
3654                     format_field(f, 'format_id'),
3655                     format_field(f, 'ext'),
3656                     self.format_resolution(f),
3657                     self._format_note(f)
3658                 ] for f in formats if (f.get('preference') or 0) >= -1000]
3659             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3660
3661         def simplified_codec(f, field):
3662             assert field in ('acodec', 'vcodec')
3663             codec = f.get(field, 'unknown')
3664             if not codec:
3665                 return 'unknown'
3666             elif codec != 'none':
3667                 return '.'.join(codec.split('.')[:4])
3668
3669             if field == 'vcodec' and f.get('acodec') == 'none':
3670                 return 'images'
3671             elif field == 'acodec' and f.get('vcodec') == 'none':
3672                 return ''
3673             return self._format_out('audio only' if field == 'vcodec' else 'video only',
3674                                     self.Styles.SUPPRESS)
3675
3676         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3677         table = [
3678             [
3679                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3680                 format_field(f, 'ext'),
3681                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3682                 format_field(f, 'fps', '\t%d', func=round),
3683                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3684                 format_field(f, 'audio_channels', '\t%s'),
3685                 delim,
3686                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3687                 format_field(f, 'tbr', '\t%dk', func=round),
3688                 shorten_protocol_name(f.get('protocol', '')),
3689                 delim,
3690                 simplified_codec(f, 'vcodec'),
3691                 format_field(f, 'vbr', '\t%dk', func=round),
3692                 simplified_codec(f, 'acodec'),
3693                 format_field(f, 'abr', '\t%dk', func=round),
3694                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3695                 join_nonempty(
3696                     self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3697                     self._format_out('DRM', 'light red') if f.get('has_drm') else None,
3698                     format_field(f, 'language', '[%s]'),
3699                     join_nonempty(format_field(f, 'format_note'),
3700                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3701                                   delim=', '),
3702                     delim=' '),
3703             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3704         header_line = self._list_format_headers(
3705             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3706             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3707
3708         return render_table(
3709             header_line, table, hide_empty=True,
3710             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3711
3712     def render_thumbnails_table(self, info_dict):
3713         thumbnails = list(info_dict.get('thumbnails') or [])
3714         if not thumbnails:
3715             return None
3716         return render_table(
3717             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3718             [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3719
3720     def render_subtitles_table(self, video_id, subtitles):
3721         def _row(lang, formats):
3722             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3723             if len(set(names)) == 1:
3724                 names = [] if names[0] == 'unknown' else names[:1]
3725             return [lang, ', '.join(names), ', '.join(exts)]
3726
3727         if not subtitles:
3728             return None
3729         return render_table(
3730             self._list_format_headers('Language', 'Name', 'Formats'),
3731             [_row(lang, formats) for lang, formats in subtitles.items()],
3732             hide_empty=True)
3733
3734     def __list_table(self, video_id, name, func, *args):
3735         table = func(*args)
3736         if not table:
3737             self.to_screen(f'{video_id} has no {name}')
3738             return
3739         self.to_screen(f'[info] Available {name} for {video_id}:')
3740         self.to_stdout(table)
3741
3742     def list_formats(self, info_dict):
3743         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3744
3745     def list_thumbnails(self, info_dict):
3746         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3747
3748     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3749         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3750
3751     def urlopen(self, req):
3752         """ Start an HTTP download """
3753         if isinstance(req, str):
3754             req = sanitized_Request(req)
3755         return self._opener.open(req, timeout=self._socket_timeout)
3756
3757     def print_debug_header(self):
3758         if not self.params.get('verbose'):
3759             return
3760
3761         from . import _IN_CLI  # Must be delayed import
3762
3763         # These imports can be slow. So import them only as needed
3764         from .extractor.extractors import _LAZY_LOADER
3765         from .extractor.extractors import (
3766             _PLUGIN_CLASSES as plugin_ies,
3767             _PLUGIN_OVERRIDES as plugin_ie_overrides
3768         )
3769
3770         def get_encoding(stream):
3771             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3772             if not supports_terminal_sequences(stream):
3773                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3774                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3775             return ret
3776
3777         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3778             locale.getpreferredencoding(),
3779             sys.getfilesystemencoding(),
3780             self.get_encoding(),
3781             ', '.join(
3782                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3783                 if stream is not None and key != 'console')
3784         )
3785
3786         logger = self.params.get('logger')
3787         if logger:
3788             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3789             write_debug(encoding_str)
3790         else:
3791             write_string(f'[debug] {encoding_str}\n', encoding=None)
3792             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3793
3794         source = detect_variant()
3795         if VARIANT not in (None, 'pip'):
3796             source += '*'
3797         klass = type(self)
3798         write_debug(join_nonempty(
3799             f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3800             f'{CHANNEL}@{__version__}',
3801             f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3802             '' if source == 'unknown' else f'({source})',
3803             '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3804             delim=' '))
3805
3806         if not _IN_CLI:
3807             write_debug(f'params: {self.params}')
3808
3809         if not _LAZY_LOADER:
3810             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3811                 write_debug('Lazy loading extractors is forcibly disabled')
3812             else:
3813                 write_debug('Lazy loading extractors is disabled')
3814         if self.params['compat_opts']:
3815             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3816
3817         if current_git_head():
3818             write_debug(f'Git HEAD: {current_git_head()}')
3819         write_debug(system_identifier())
3820
3821         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3822         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3823         if ffmpeg_features:
3824             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3825
3826         exe_versions['rtmpdump'] = rtmpdump_version()
3827         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3828         exe_str = ', '.join(
3829             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3830         ) or 'none'
3831         write_debug('exe versions: %s' % exe_str)
3832
3833         from .compat.compat_utils import get_package_info
3834         from .dependencies import available_dependencies
3835
3836         write_debug('Optional libraries: %s' % (', '.join(sorted({
3837             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3838         })) or 'none'))
3839
3840         self._setup_opener()
3841         proxy_map = {}
3842         for handler in self._opener.handlers:
3843             if hasattr(handler, 'proxies'):
3844                 proxy_map.update(handler.proxies)
3845         write_debug(f'Proxy map: {proxy_map}')
3846
3847         for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3848             display_list = ['%s%s' % (
3849                 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3850                 for name, klass in plugins.items()]
3851             if plugin_type == 'Extractor':
3852                 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3853                                     for parent, plugins in plugin_ie_overrides.items())
3854             if not display_list:
3855                 continue
3856             write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3857
3858         plugin_dirs = plugin_directories()
3859         if plugin_dirs:
3860             write_debug(f'Plugin directories: {plugin_dirs}')
3861
3862         # Not implemented
3863         if False and self.params.get('call_home'):
3864             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3865             write_debug('Public IP address: %s' % ipaddr)
3866             latest_version = self.urlopen(
3867                 'https://yt-dl.org/latest/version').read().decode()
3868             if version_tuple(latest_version) > version_tuple(__version__):
3869                 self.report_warning(
3870                     'You are using an outdated version (newest version: %s)! '
3871                     'See https://yt-dl.org/update if you need help updating.' %
3872                     latest_version)
3873
3874     def _setup_opener(self):
3875         if hasattr(self, '_opener'):
3876             return
3877         timeout_val = self.params.get('socket_timeout')
3878         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3879
3880         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3881         opts_cookiefile = self.params.get('cookiefile')
3882         opts_proxy = self.params.get('proxy')
3883
3884         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3885
3886         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3887         if opts_proxy is not None:
3888             if opts_proxy == '':
3889                 proxies = {}
3890             else:
3891                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3892         else:
3893             proxies = urllib.request.getproxies()
3894             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3895             if 'http' in proxies and 'https' not in proxies:
3896                 proxies['https'] = proxies['http']
3897         proxy_handler = PerRequestProxyHandler(proxies)
3898
3899         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3900         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3901         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3902         redirect_handler = YoutubeDLRedirectHandler()
3903         data_handler = urllib.request.DataHandler()
3904
3905         # When passing our own FileHandler instance, build_opener won't add the
3906         # default FileHandler and allows us to disable the file protocol, which
3907         # can be used for malicious purposes (see
3908         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3909         file_handler = urllib.request.FileHandler()
3910
3911         if not self.params.get('enable_file_urls'):
3912             def file_open(*args, **kwargs):
3913                 raise urllib.error.URLError(
3914                     'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
3915                     'Use --enable-file-urls to enable at your own risk.')
3916             file_handler.file_open = file_open
3917
3918         opener = urllib.request.build_opener(
3919             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3920
3921         # Delete the default user-agent header, which would otherwise apply in
3922         # cases where our custom HTTP handler doesn't come into play
3923         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3924         opener.addheaders = []
3925         self._opener = opener
3926
3927     def encode(self, s):
3928         if isinstance(s, bytes):
3929             return s  # Already encoded
3930
3931         try:
3932             return s.encode(self.get_encoding())
3933         except UnicodeEncodeError as err:
3934             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3935             raise
3936
3937     def get_encoding(self):
3938         encoding = self.params.get('encoding')
3939         if encoding is None:
3940             encoding = preferredencoding()
3941         return encoding
3942
3943     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3944         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3945         if overwrite is None:
3946             overwrite = self.params.get('overwrites', True)
3947         if not self.params.get('writeinfojson'):
3948             return False
3949         elif not infofn:
3950             self.write_debug(f'Skipping writing {label} infojson')
3951             return False
3952         elif not self._ensure_dir_exists(infofn):
3953             return None
3954         elif not overwrite and os.path.exists(infofn):
3955             self.to_screen(f'[info] {label.title()} metadata is already present')
3956             return 'exists'
3957
3958         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3959         try:
3960             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3961             return True
3962         except OSError:
3963             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3964             return None
3965
3966     def _write_description(self, label, ie_result, descfn):
3967         ''' Write description and returns True = written, False = skip, None = error '''
3968         if not self.params.get('writedescription'):
3969             return False
3970         elif not descfn:
3971             self.write_debug(f'Skipping writing {label} description')
3972             return False
3973         elif not self._ensure_dir_exists(descfn):
3974             return None
3975         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3976             self.to_screen(f'[info] {label.title()} description is already present')
3977         elif ie_result.get('description') is None:
3978             self.to_screen(f'[info] There\'s no {label} description to write')
3979             return False
3980         else:
3981             try:
3982                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3983                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3984                     descfile.write(ie_result['description'])
3985             except OSError:
3986                 self.report_error(f'Cannot write {label} description file {descfn}')
3987                 return None
3988         return True
3989
3990     def _write_subtitles(self, info_dict, filename):
3991         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3992         ret = []
3993         subtitles = info_dict.get('requested_subtitles')
3994         if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3995             # subtitles download errors are already managed as troubles in relevant IE
3996             # that way it will silently go on when used with unsupporting IE
3997             return ret
3998         elif not subtitles:
3999             self.to_screen('[info] There are no subtitles for the requested languages')
4000             return ret
4001         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4002         if not sub_filename_base:
4003             self.to_screen('[info] Skipping writing video subtitles')
4004             return ret
4005
4006         for sub_lang, sub_info in subtitles.items():
4007             sub_format = sub_info['ext']
4008             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4009             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4010             existing_sub = self.existing_file((sub_filename_final, sub_filename))
4011             if existing_sub:
4012                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4013                 sub_info['filepath'] = existing_sub
4014                 ret.append((existing_sub, sub_filename_final))
4015                 continue
4016
4017             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4018             if sub_info.get('data') is not None:
4019                 try:
4020                     # Use newline='' to prevent conversion of newline characters
4021                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
4022                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4023                         subfile.write(sub_info['data'])
4024                     sub_info['filepath'] = sub_filename
4025                     ret.append((sub_filename, sub_filename_final))
4026                     continue
4027                 except OSError:
4028                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
4029                     return None
4030
4031             try:
4032                 sub_copy = sub_info.copy()
4033                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4034                 self.dl(sub_filename, sub_copy, subtitle=True)
4035                 sub_info['filepath'] = sub_filename
4036                 ret.append((sub_filename, sub_filename_final))
4037             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4038                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4039                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
4040                     if not self.params.get('ignoreerrors'):
4041                         self.report_error(msg)
4042                     raise DownloadError(msg)
4043                 self.report_warning(msg)
4044         return ret
4045
4046     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4047         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4048         write_all = self.params.get('write_all_thumbnails', False)
4049         thumbnails, ret = [], []
4050         if write_all or self.params.get('writethumbnail', False):
4051             thumbnails = info_dict.get('thumbnails') or []
4052             if not thumbnails:
4053                 self.to_screen(f'[info] There are no {label} thumbnails to download')
4054                 return ret
4055         multiple = write_all and len(thumbnails) > 1
4056
4057         if thumb_filename_base is None:
4058             thumb_filename_base = filename
4059         if thumbnails and not thumb_filename_base:
4060             self.write_debug(f'Skipping writing {label} thumbnail')
4061             return ret
4062
4063         for idx, t in list(enumerate(thumbnails))[::-1]:
4064             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4065             thumb_display_id = f'{label} thumbnail {t["id"]}'
4066             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4067             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4068
4069             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4070             if existing_thumb:
4071                 self.to_screen('[info] %s is already present' % (
4072                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4073                 t['filepath'] = existing_thumb
4074                 ret.append((existing_thumb, thumb_filename_final))
4075             else:
4076                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4077                 try:
4078                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
4079                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4080                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4081                         shutil.copyfileobj(uf, thumbf)
4082                     ret.append((thumb_filename, thumb_filename_final))
4083                     t['filepath'] = thumb_filename
4084                 except network_exceptions as err:
4085                     thumbnails.pop(idx)
4086                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4087             if ret and not write_all:
4088                 break
4089         return ret