yt_dlp/YoutubeDL.py

   1 import collections
   2 import contextlib
   3 import datetime
   4 import errno
   5 import fileinput
   6 import functools
   7 import io
   8 import itertools
   9 import json
  10 import locale
  11 import operator
  12 import os
  13 import random
  14 import re
  15 import shutil
  16 import subprocess
  17 import sys
  18 import tempfile
  19 import time
  20 import tokenize
  21 import traceback
  22 import unicodedata
  23 import urllib.request
  24 from string import ascii_letters
  25
  26 from .cache import Cache
  27 from .compat import HAS_LEGACY as compat_has_legacy
  28 from .compat import compat_os_name, compat_shlex_quote
  29 from .cookies import load_cookies
  30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  31 from .downloader.rtmp import rtmpdump_version
  32 from .extractor import gen_extractor_classes, get_info_extractor
  33 from .extractor.openload import PhantomJSwrapper
  34 from .minicurses import format_text
  35 from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
  36 from .postprocessor import (
  37     EmbedThumbnailPP,
  38     FFmpegFixupDuplicateMoovPP,
  39     FFmpegFixupDurationPP,
  40     FFmpegFixupM3u8PP,
  41     FFmpegFixupM4aPP,
  42     FFmpegFixupStretchedPP,
  43     FFmpegFixupTimestampPP,
  44     FFmpegMergerPP,
  45     FFmpegPostProcessor,
  46     FFmpegVideoConvertorPP,
  47     MoveFilesAfterDownloadPP,
  48     get_postprocessor,
  49 )
  50 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
  51 from .update import detect_variant
  52 from .utils import (
  53     DEFAULT_OUTTMPL,
  54     IDENTITY,
  55     LINK_TEMPLATES,
  56     NO_DEFAULT,
  57     NUMBER_RE,
  58     OUTTMPL_TYPES,
  59     POSTPROCESS_WHEN,
  60     STR_FORMAT_RE_TMPL,
  61     STR_FORMAT_TYPES,
  62     ContentTooShortError,
  63     DateRange,
  64     DownloadCancelled,
  65     DownloadError,
  66     EntryNotInPlaylist,
  67     ExistingVideoReached,
  68     ExtractorError,
  69     GeoRestrictedError,
  70     HEADRequest,
  71     ISO3166Utils,
  72     LazyList,
  73     MaxDownloadsReached,
  74     Namespace,
  75     PagedList,
  76     PerRequestProxyHandler,
  77     PlaylistEntries,
  78     Popen,
  79     PostProcessingError,
  80     ReExtractInfo,
  81     RejectedVideoReached,
  82     SameFileError,
  83     UnavailableVideoError,
  84     YoutubeDLCookieProcessor,
  85     YoutubeDLHandler,
  86     YoutubeDLRedirectHandler,
  87     age_restricted,
  88     args_to_str,
  89     bug_reports_message,
  90     date_from_str,
  91     determine_ext,
  92     determine_protocol,
  93     encode_compat_str,
  94     encodeFilename,
  95     error_to_compat_str,
  96     escapeHTML,
  97     expand_path,
  98     filter_dict,
  99     float_or_none,
 100     format_bytes,
 101     format_decimal_suffix,
 102     format_field,
 103     formatSeconds,
 104     get_domain,
 105     int_or_none,
 106     iri_to_uri,
 107     join_nonempty,
 108     locked_file,
 109     make_dir,
 110     make_HTTPS_handler,
 111     merge_headers,
 112     network_exceptions,
 113     number_of_digits,
 114     orderedSet,
 115     parse_filesize,
 116     preferredencoding,
 117     prepend_extension,
 118     register_socks_protocols,
 119     remove_terminal_sequences,
 120     render_table,
 121     replace_extension,
 122     sanitize_filename,
 123     sanitize_path,
 124     sanitize_url,
 125     sanitized_Request,
 126     std_headers,
 127     str_or_none,
 128     strftime_or_none,
 129     subtitles_filename,
 130     supports_terminal_sequences,
 131     system_identifier,
 132     timetuple_from_msec,
 133     to_high_limit_path,
 134     traverse_obj,
 135     try_get,
 136     url_basename,
 137     variadic,
 138     version_tuple,
 139     windows_enable_vt_mode,
 140     write_json_file,
 141     write_string,
 142 )
 143 from .version import RELEASE_GIT_HEAD, __version__
 144
 145 if compat_os_name == 'nt':
 146     import ctypes
 147
 148
 149 class YoutubeDL:
 150     """YoutubeDL class.
 151
 152     YoutubeDL objects are the ones responsible of downloading the
 153     actual video file and writing it to disk if the user has requested
 154     it, among some other tasks. In most cases there should be one per
 155     program. As, given a video URL, the downloader doesn't know how to
 156     extract all the needed information, task that InfoExtractors do, it
 157     has to pass the URL to one of them.
 158
 159     For this, YoutubeDL objects have a method that allows
 160     InfoExtractors to be registered in a given order. When it is passed
 161     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 162     finds that reports being able to handle it. The InfoExtractor extracts
 163     all the information about the video or videos the URL refers to, and
 164     YoutubeDL process the extracted information, possibly using a File
 165     Downloader to download the video.
 166
 167     YoutubeDL objects accept a lot of parameters. In order not to saturate
 168     the object constructor with arguments, it receives a dictionary of
 169     options instead. These options are available through the params
 170     attribute for the InfoExtractors to use. The YoutubeDL also
 171     registers itself as the downloader in charge for the InfoExtractors
 172     that are added to it, so this is a "mutual registration".
 173
 174     Available options:
 175
 176     username:          Username for authentication purposes.
 177     password:          Password for authentication purposes.
 178     videopassword:     Password for accessing a video.
 179     ap_mso:            Adobe Pass multiple-system operator identifier.
 180     ap_username:       Multiple-system operator account username.
 181     ap_password:       Multiple-system operator account password.
 182     usenetrc:          Use netrc for authentication instead.
 183     verbose:           Print additional info to stdout.
 184     quiet:             Do not print messages to stdout.
 185     no_warnings:       Do not print out anything for warnings.
 186     forceprint:        A dict with keys WHEN mapped to a list of templates to
 187                        print to stdout. The allowed keys are video or any of the
 188                        items in utils.POSTPROCESS_WHEN.
 189                        For compatibility, a single list is also accepted
 190     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 191                        a list of tuples with (template, filename)
 192     forcejson:         Force printing info_dict as JSON.
 193     dump_single_json:  Force printing the info_dict of the whole playlist
 194                        (or video) as a single JSON line.
 195     force_write_download_archive: Force writing download archive regardless
 196                        of 'skip_download' or 'simulate'.
 197     simulate:          Do not download the video files. If unset (or None),
 198                        simulate only if listsubtitles, listformats or list_thumbnails is used
 199     format:            Video format code. see "FORMAT SELECTION" for more details.
 200                        You can also pass a function. The function takes 'ctx' as
 201                        argument and returns the formats to download.
 202                        See "build_format_selector" for an implementation
 203     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 204     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 205                        extracting metadata even if the video is not actually
 206                        available for download (experimental)
 207     format_sort:       A list of fields by which to sort the video formats.
 208                        See "Sorting Formats" for more details.
 209     format_sort_force: Force the given format_sort. see "Sorting Formats"
 210                        for more details.
 211     prefer_free_formats: Whether to prefer video formats with free containers
 212                        over non-free ones of same quality.
 213     allow_multiple_video_streams:   Allow multiple video streams to be merged
 214                        into a single file
 215     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 216                        into a single file
 217     check_formats      Whether to test if the formats are downloadable.
 218                        Can be True (check all), False (check none),
 219                        'selected' (check selected formats),
 220                        or None (check only if requested by extractor)
 221     paths:             Dictionary of output paths. The allowed keys are 'home'
 222                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 223     outtmpl:           Dictionary of templates for output names. Allowed keys
 224                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 225                        For compatibility with youtube-dl, a single string can also be used
 226     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 227     restrictfilenames: Do not allow "&" and spaces in file names
 228     trim_file_name:    Limit length of filename (extension excluded)
 229     windowsfilenames:  Force the filenames to be windows compatible
 230     ignoreerrors:      Do not stop on download/postprocessing errors.
 231                        Can be 'only_download' to ignore only download errors.
 232                        Default is 'only_download' for CLI, but False for API
 233     skip_playlist_after_errors: Number of allowed failures until the rest of
 234                        the playlist is skipped
 235     force_generic_extractor: Force downloader to use the generic extractor
 236     overwrites:        Overwrite all video and metadata files if True,
 237                        overwrite only non-video files if None
 238                        and don't overwrite any file if False
 239                        For compatibility with youtube-dl,
 240                        "nooverwrites" may also be used instead
 241     playlist_items:    Specific indices of playlist to download.
 242     playlistrandom:    Download playlist items in random order.
 243     lazy_playlist:     Process playlist entries as they are received.
 244     matchtitle:        Download only matching titles.
 245     rejecttitle:       Reject downloads for matching titles.
 246     logger:            Log messages to a logging.Logger instance.
 247     logtostderr:       Log messages to stderr instead of stdout.
 248     consoletitle:       Display progress in console window's titlebar.
 249     writedescription:  Write the video description to a .description file
 250     writeinfojson:     Write the video description to a .info.json file
 251     clean_infojson:    Remove private fields from the infojson
 252     getcomments:       Extract video comments. This will not be written to disk
 253                        unless writeinfojson is also given
 254     writeannotations:  Write the video annotations to a .annotations.xml file
 255     writethumbnail:    Write the thumbnail image to a file
 256     allow_playlist_files: Whether to write playlists' description, infojson etc
 257                        also to disk when using the 'write*' options
 258     write_all_thumbnails:  Write all thumbnail formats to files
 259     writelink:         Write an internet shortcut file, depending on the
 260                        current platform (.url/.webloc/.desktop)
 261     writeurllink:      Write a Windows internet shortcut file (.url)
 262     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 263     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 264     writesubtitles:    Write the video subtitles to a file
 265     writeautomaticsub: Write the automatically generated subtitles to a file
 266     listsubtitles:     Lists all available subtitles for the video
 267     subtitlesformat:   The format code for subtitles
 268     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 269                        The list may contain "all" to refer to all the available
 270                        subtitles. The language can be prefixed with a "-" to
 271                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 272     keepvideo:         Keep the video file after post-processing
 273     daterange:         A DateRange object, download only if the upload_date is in the range.
 274     skip_download:     Skip the actual download of the video file
 275     cachedir:          Location of the cache files in the filesystem.
 276                        False to disable filesystem cache.
 277     noplaylist:        Download single video instead of a playlist if in doubt.
 278     age_limit:         An integer representing the user's age in years.
 279                        Unsuitable videos for the given age are skipped.
 280     min_views:         An integer representing the minimum view count the video
 281                        must have in order to not be skipped.
 282                        Videos without view count information are always
 283                        downloaded. None for no limit.
 284     max_views:         An integer representing the maximum view count.
 285                        Videos that are more popular than that are not
 286                        downloaded.
 287                        Videos without view count information are always
 288                        downloaded. None for no limit.
 289     download_archive:  File name of a file where all downloads are recorded.
 290                        Videos already present in the file are not downloaded
 291                        again.
 292     break_on_existing: Stop the download process after attempting to download a
 293                        file that is in the archive.
 294     break_on_reject:   Stop the download process when encountering a video that
 295                        has been filtered out.
 296     break_per_url:     Whether break_on_reject and break_on_existing
 297                        should act on each input URL as opposed to for the entire queue
 298     cookiefile:        File name or text stream from where cookies should be read and dumped to
 299     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 300                        name/pathfrom where cookies are loaded, and the name of the
 301                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 302     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 303                        support RFC 5746 secure renegotiation
 304     nocheckcertificate:  Do not verify SSL certificates
 305     client_certificate:  Path to client certificate file in PEM format. May include the private key
 306     client_certificate_key:  Path to private key file for client certificate
 307     client_certificate_password:  Password for client certificate private key, if encrypted.
 308                         If not provided and the key is encrypted, yt-dlp will ask interactively
 309     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 310                        At the moment, this is only supported by YouTube.
 311     http_headers:      A dictionary of custom headers to be used for all requests
 312     proxy:             URL of the proxy server to use
 313     geo_verification_proxy:  URL of the proxy to use for IP address verification
 314                        on geo-restricted sites.
 315     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 316     bidi_workaround:   Work around buggy terminals without bidirectional text
 317                        support, using fridibi
 318     debug_printtraffic:Print out sent and received HTTP traffic
 319     default_search:    Prepend this string if an input url is not valid.
 320                        'auto' for elaborate guessing
 321     encoding:          Use this encoding instead of the system-specified.
 322     extract_flat:      Do not resolve URLs, return the immediate result.
 323                        Pass in 'in_playlist' to only show this behavior for
 324                        playlist items.
 325     wait_for_video:    If given, wait for scheduled streams to become available.
 326                        The value should be a tuple containing the range
 327                        (min_secs, max_secs) to wait between retries
 328     postprocessors:    A list of dictionaries, each with an entry
 329                        * key:  The name of the postprocessor. See
 330                                yt_dlp/postprocessor/__init__.py for a list.
 331                        * when: When to run the postprocessor. Allowed values are
 332                                the entries of utils.POSTPROCESS_WHEN
 333                                Assumed to be 'post_process' if not given
 334     progress_hooks:    A list of functions that get called on download
 335                        progress, with a dictionary with the entries
 336                        * status: One of "downloading", "error", or "finished".
 337                                  Check this first and ignore unknown values.
 338                        * info_dict: The extracted info_dict
 339
 340                        If status is one of "downloading", or "finished", the
 341                        following properties may also be present:
 342                        * filename: The final filename (always present)
 343                        * tmpfilename: The filename we're currently writing to
 344                        * downloaded_bytes: Bytes on disk
 345                        * total_bytes: Size of the whole file, None if unknown
 346                        * total_bytes_estimate: Guess of the eventual file size,
 347                                                None if unavailable.
 348                        * elapsed: The number of seconds since download started.
 349                        * eta: The estimated time in seconds, None if unknown
 350                        * speed: The download speed in bytes/second, None if
 351                                 unknown
 352                        * fragment_index: The counter of the currently
 353                                          downloaded video fragment.
 354                        * fragment_count: The number of fragments (= individual
 355                                          files that will be merged)
 356
 357                        Progress hooks are guaranteed to be called at least once
 358                        (with status "finished") if the download is successful.
 359     postprocessor_hooks:  A list of functions that get called on postprocessing
 360                        progress, with a dictionary with the entries
 361                        * status: One of "started", "processing", or "finished".
 362                                  Check this first and ignore unknown values.
 363                        * postprocessor: Name of the postprocessor
 364                        * info_dict: The extracted info_dict
 365
 366                        Progress hooks are guaranteed to be called at least twice
 367                        (with status "started" and "finished") if the processing is successful.
 368     merge_output_format: Extension to use when merging formats.
 369     final_ext:         Expected final extension; used to detect when the file was
 370                        already downloaded and converted
 371     fixup:             Automatically correct known faults of the file.
 372                        One of:
 373                        - "never": do nothing
 374                        - "warn": only emit a warning
 375                        - "detect_or_warn": check whether we can do anything
 376                                            about it, warn otherwise (default)
 377     source_address:    Client-side IP address to bind to.
 378     sleep_interval_requests: Number of seconds to sleep between requests
 379                        during extraction
 380     sleep_interval:    Number of seconds to sleep before each download when
 381                        used alone or a lower bound of a range for randomized
 382                        sleep before each download (minimum possible number
 383                        of seconds to sleep) when used along with
 384                        max_sleep_interval.
 385     max_sleep_interval:Upper bound of a range for randomized sleep before each
 386                        download (maximum possible number of seconds to sleep).
 387                        Must only be used along with sleep_interval.
 388                        Actual sleep time will be a random float from range
 389                        [sleep_interval; max_sleep_interval].
 390     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 391     listformats:       Print an overview of available video formats and exit.
 392     list_thumbnails:   Print a table of all thumbnails and exit.
 393     match_filter:      A function that gets called for every video with the signature
 394                        (info_dict, *, incomplete: bool) -> Optional[str]
 395                        For backward compatibility with youtube-dl, the signature
 396                        (info_dict) -> Optional[str] is also allowed.
 397                        - If it returns a message, the video is ignored.
 398                        - If it returns None, the video is downloaded.
 399                        - If it returns utils.NO_DEFAULT, the user is interactively
 400                          asked whether to download the video.
 401                        match_filter_func in utils.py is one example for this.
 402     no_color:          Do not emit color codes in output.
 403     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 404                        HTTP header
 405     geo_bypass_country:
 406                        Two-letter ISO 3166-2 country code that will be used for
 407                        explicit geographic restriction bypassing via faking
 408                        X-Forwarded-For HTTP header
 409     geo_bypass_ip_block:
 410                        IP range in CIDR notation that will be used similarly to
 411                        geo_bypass_country
 412     external_downloader: A dictionary of protocol keys and the executable of the
 413                        external downloader to use for it. The allowed protocols
 414                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 415                        Set the value to 'native' to use the native downloader
 416     compat_opts:       Compatibility options. See "Differences in default behavior".
 417                        The following options do not work when used through the API:
 418                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 419                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 420                        Refer __init__.py for their implementation
 421     progress_template: Dictionary of templates for progress outputs.
 422                        Allowed keys are 'download', 'postprocess',
 423                        'download-title' (console title) and 'postprocess-title'.
 424                        The template is mapped on a dictionary with keys 'progress' and 'info'
 425     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 426                        as argument and returns the time to sleep in seconds.
 427                        Allowed keys are 'http', 'fragment', 'file_access'
 428     download_ranges:   A function that gets called for every video with the signature
 429                        (info_dict, *, ydl) -> Iterable[Section].
 430                        Only the returned sections will be downloaded. Each Section contains:
 431                        * start_time: Start time of the section in seconds
 432                        * end_time: End time of the section in seconds
 433                        * title: Section title (Optional)
 434                        * index: Section number (Optional)
 435
 436     The following parameters are not used by YoutubeDL itself, they are used by
 437     the downloader (see yt_dlp/downloader/common.py):
 438     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 439     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 440     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 441     external_downloader_args, concurrent_fragment_downloads.
 442
 443     The following options are used by the post processors:
 444     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 445                        to the binary or its containing directory.
 446     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 447                        and a list of additional command-line arguments for the
 448                        postprocessor/executable. The dict can also have "PP+EXE" keys
 449                        which are used when the given exe is used by the given PP.
 450                        Use 'default' as the name for arguments to passed to all PP
 451                        For compatibility with youtube-dl, a single list of args
 452                        can also be used
 453
 454     The following options are used by the extractors:
 455     extractor_retries: Number of times to retry for known errors
 456     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 457     hls_split_discontinuity: Split HLS playlists to different formats at
 458                        discontinuities such as ad breaks (default: False)
 459     extractor_args:    A dictionary of arguments to be passed to the extractors.
 460                        See "EXTRACTOR ARGUMENTS" for details.
 461                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 462     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 463
 464     The following options are deprecated and may be removed in the future:
 465
 466     playliststart:     - Use playlist_items
 467                        Playlist item to start at.
 468     playlistend:       - Use playlist_items
 469                        Playlist item to end at.
 470     playlistreverse:   - Use playlist_items
 471                        Download playlist items in reverse order.
 472     forceurl:          - Use forceprint
 473                        Force printing final URL.
 474     forcetitle:        - Use forceprint
 475                        Force printing title.
 476     forceid:           - Use forceprint
 477                        Force printing ID.
 478     forcethumbnail:    - Use forceprint
 479                        Force printing thumbnail URL.
 480     forcedescription:  - Use forceprint
 481                        Force printing description.
 482     forcefilename:     - Use forceprint
 483                        Force printing final filename.
 484     forceduration:     - Use forceprint
 485                        Force printing duration.
 486     allsubtitles:      - Use subtitleslangs = ['all']
 487                        Downloads all the subtitles of the video
 488                        (requires writesubtitles or writeautomaticsub)
 489     include_ads:       - Doesn't work
 490                        Download ads as well
 491     call_home:         - Not implemented
 492                        Boolean, true iff we are allowed to contact the
 493                        yt-dlp servers for debugging.
 494     post_hooks:        - Register a custom postprocessor
 495                        A list of functions that get called as the final step
 496                        for each video file, after all postprocessors have been
 497                        called. The filename will be passed as the only argument.
 498     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 499                        Use the native HLS downloader instead of ffmpeg/avconv
 500                        if True, otherwise use ffmpeg/avconv if False, otherwise
 501                        use downloader suggested by extractor if None.
 502     prefer_ffmpeg:     - avconv support is deprecated
 503                        If False, use avconv instead of ffmpeg if both are available,
 504                        otherwise prefer ffmpeg.
 505     youtube_include_dash_manifest: - Use extractor_args
 506                        If True (default), DASH manifests and related
 507                        data will be downloaded and processed by extractor.
 508                        You can reduce network I/O by disabling it if you don't
 509                        care about DASH. (only for youtube)
 510     youtube_include_hls_manifest: - Use extractor_args
 511                        If True (default), HLS manifests and related
 512                        data will be downloaded and processed by extractor.
 513                        You can reduce network I/O by disabling it if you don't
 514                        care about HLS. (only for youtube)
 515     """
 516
 517     _NUMERIC_FIELDS = {
 518         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 519         'timestamp', 'release_timestamp',
 520         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 521         'average_rating', 'comment_count', 'age_limit',
 522         'start_time', 'end_time',
 523         'chapter_number', 'season_number', 'episode_number',
 524         'track_number', 'disc_number', 'release_year',
 525     }
 526
 527     _format_fields = {
 528         # NB: Keep in sync with the docstring of extractor/common.py
 529         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 530         'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
 531         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
 532         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 533         'preference', 'language', 'language_preference', 'quality', 'source_preference',
 534         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
 535         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 536     }
 537     _format_selection_exts = {
 538         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 539         'video': {'mp4', 'flv', 'webm', '3gp'},
 540         'storyboards': {'mhtml'},
 541     }
 542
 543     def __init__(self, params=None, auto_init=True):
 544         """Create a FileDownloader object with the given options.
 545         @param auto_init    Whether to load the default extractors and print header (if verbose).
 546                             Set to 'no_verbose_header' to not print the header
 547         """
 548         if params is None:
 549             params = {}
 550         self.params = params
 551         self._ies = {}
 552         self._ies_instances = {}
 553         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 554         self._printed_messages = set()
 555         self._first_webpage_request = True
 556         self._post_hooks = []
 557         self._progress_hooks = []
 558         self._postprocessor_hooks = []
 559         self._download_retcode = 0
 560         self._num_downloads = 0
 561         self._num_videos = 0
 562         self._playlist_level = 0
 563         self._playlist_urls = set()
 564         self.cache = Cache(self)
 565
 566         windows_enable_vt_mode()
 567         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 568         self._out_files = Namespace(
 569             out=stdout,
 570             error=sys.stderr,
 571             screen=sys.stderr if self.params.get('quiet') else stdout,
 572             console=None if compat_os_name == 'nt' else next(
 573                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 574         )
 575         self._allow_colors = Namespace(**{
 576             type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
 577             for type_, stream in self._out_files.items_ if type_ != 'console'
 578         })
 579
 580         MIN_SUPPORTED, MIN_RECOMMENDED = (3, 6), (3, 7)
 581         current_version = sys.version_info[:2]
 582         if current_version < MIN_RECOMMENDED:
 583             msg = ('Support for Python version %d.%d has been deprecated. '
 584                    'See  https://github.com/yt-dlp/yt-dlp/issues/3764  for more details. '
 585                    'You will recieve only one more update on this version')
 586             if current_version < MIN_SUPPORTED:
 587                 msg = 'Python version %d.%d is no longer supported'
 588             self.deprecation_warning(
 589                 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
 590
 591         if self.params.get('allow_unplayable_formats'):
 592             self.report_warning(
 593                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 594                 'This is a developer option intended for debugging. \n'
 595                 '         If you experience any issues while using this option, '
 596                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 597
 598         def check_deprecated(param, option, suggestion):
 599             if self.params.get(param) is not None:
 600                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 601                 return True
 602             return False
 603
 604         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 605             if self.params.get('geo_verification_proxy') is None:
 606                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 607
 608         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 609         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 610         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 611
 612         for msg in self.params.get('_warnings', []):
 613             self.report_warning(msg)
 614         for msg in self.params.get('_deprecation_warnings', []):
 615             self.deprecation_warning(msg)
 616
 617         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 618         if not compat_has_legacy:
 619             self.params['compat_opts'].add('no-compat-legacy')
 620         if 'list-formats' in self.params['compat_opts']:
 621             self.params['listformats_table'] = False
 622
 623         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 624             # nooverwrites was unnecessarily changed to overwrites
 625             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 626             # This ensures compatibility with both keys
 627             self.params['overwrites'] = not self.params['nooverwrites']
 628         elif self.params.get('overwrites') is None:
 629             self.params.pop('overwrites', None)
 630         else:
 631             self.params['nooverwrites'] = not self.params['overwrites']
 632
 633         self.params.setdefault('forceprint', {})
 634         self.params.setdefault('print_to_file', {})
 635
 636         # Compatibility with older syntax
 637         if not isinstance(params['forceprint'], dict):
 638             self.params['forceprint'] = {'video': params['forceprint']}
 639
 640         if self.params.get('bidi_workaround', False):
 641             try:
 642                 import pty
 643                 master, slave = pty.openpty()
 644                 width = shutil.get_terminal_size().columns
 645                 width_args = [] if width is None else ['-w', str(width)]
 646                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 647                 try:
 648                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 649                 except OSError:
 650                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 651                 self._output_channel = os.fdopen(master, 'rb')
 652             except OSError as ose:
 653                 if ose.errno == errno.ENOENT:
 654                     self.report_warning(
 655                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 656                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 657                 else:
 658                     raise
 659
 660         if auto_init:
 661             if auto_init != 'no_verbose_header':
 662                 self.print_debug_header()
 663             self.add_default_info_extractors()
 664
 665         if (sys.platform != 'win32'
 666                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 667                 and not self.params.get('restrictfilenames', False)):
 668             # Unicode filesystem API will throw errors (#1474, #13027)
 669             self.report_warning(
 670                 'Assuming --restrict-filenames since file system encoding '
 671                 'cannot encode all characters. '
 672                 'Set the LC_ALL environment variable to fix this.')
 673             self.params['restrictfilenames'] = True
 674
 675         self._parse_outtmpl()
 676
 677         # Creating format selector here allows us to catch syntax errors before the extraction
 678         self.format_selector = (
 679             self.params.get('format') if self.params.get('format') in (None, '-')
 680             else self.params['format'] if callable(self.params['format'])
 681             else self.build_format_selector(self.params['format']))
 682
 683         # Set http_headers defaults according to std_headers
 684         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 685
 686         hooks = {
 687             'post_hooks': self.add_post_hook,
 688             'progress_hooks': self.add_progress_hook,
 689             'postprocessor_hooks': self.add_postprocessor_hook,
 690         }
 691         for opt, fn in hooks.items():
 692             for ph in self.params.get(opt, []):
 693                 fn(ph)
 694
 695         for pp_def_raw in self.params.get('postprocessors', []):
 696             pp_def = dict(pp_def_raw)
 697             when = pp_def.pop('when', 'post_process')
 698             self.add_post_processor(
 699                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 700                 when=when)
 701
 702         self._setup_opener()
 703         register_socks_protocols()
 704
 705         def preload_download_archive(fn):
 706             """Preload the archive, if any is specified"""
 707             if fn is None:
 708                 return False
 709             self.write_debug(f'Loading archive file {fn!r}')
 710             try:
 711                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 712                     for line in archive_file:
 713                         self.archive.add(line.strip())
 714             except OSError as ioe:
 715                 if ioe.errno != errno.ENOENT:
 716                     raise
 717                 return False
 718             return True
 719
 720         self.archive = set()
 721         preload_download_archive(self.params.get('download_archive'))
 722
 723     def warn_if_short_id(self, argv):
 724         # short YouTube ID starting with dash?
 725         idxs = [
 726             i for i, a in enumerate(argv)
 727             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 728         if idxs:
 729             correct_argv = (
 730                 ['yt-dlp']
 731                 + [a for i, a in enumerate(argv) if i not in idxs]
 732                 + ['--'] + [argv[i] for i in idxs]
 733             )
 734             self.report_warning(
 735                 'Long argument string detected. '
 736                 'Use -- to separate parameters and URLs, like this:\n%s' %
 737                 args_to_str(correct_argv))
 738
 739     def add_info_extractor(self, ie):
 740         """Add an InfoExtractor object to the end of the list."""
 741         ie_key = ie.ie_key()
 742         self._ies[ie_key] = ie
 743         if not isinstance(ie, type):
 744             self._ies_instances[ie_key] = ie
 745             ie.set_downloader(self)
 746
 747     def _get_info_extractor_class(self, ie_key):
 748         ie = self._ies.get(ie_key)
 749         if ie is None:
 750             ie = get_info_extractor(ie_key)
 751             self.add_info_extractor(ie)
 752         return ie
 753
 754     def get_info_extractor(self, ie_key):
 755         """
 756         Get an instance of an IE with name ie_key, it will try to get one from
 757         the _ies list, if there's no instance it will create a new one and add
 758         it to the extractor list.
 759         """
 760         ie = self._ies_instances.get(ie_key)
 761         if ie is None:
 762             ie = get_info_extractor(ie_key)()
 763             self.add_info_extractor(ie)
 764         return ie
 765
 766     def add_default_info_extractors(self):
 767         """
 768         Add the InfoExtractors returned by gen_extractors to the end of the list
 769         """
 770         for ie in gen_extractor_classes():
 771             self.add_info_extractor(ie)
 772
 773     def add_post_processor(self, pp, when='post_process'):
 774         """Add a PostProcessor object to the end of the chain."""
 775         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 776         self._pps[when].append(pp)
 777         pp.set_downloader(self)
 778
 779     def add_post_hook(self, ph):
 780         """Add the post hook"""
 781         self._post_hooks.append(ph)
 782
 783     def add_progress_hook(self, ph):
 784         """Add the download progress hook"""
 785         self._progress_hooks.append(ph)
 786
 787     def add_postprocessor_hook(self, ph):
 788         """Add the postprocessing progress hook"""
 789         self._postprocessor_hooks.append(ph)
 790         for pps in self._pps.values():
 791             for pp in pps:
 792                 pp.add_progress_hook(ph)
 793
 794     def _bidi_workaround(self, message):
 795         if not hasattr(self, '_output_channel'):
 796             return message
 797
 798         assert hasattr(self, '_output_process')
 799         assert isinstance(message, str)
 800         line_count = message.count('\n') + 1
 801         self._output_process.stdin.write((message + '\n').encode())
 802         self._output_process.stdin.flush()
 803         res = ''.join(self._output_channel.readline().decode()
 804                       for _ in range(line_count))
 805         return res[:-len('\n')]
 806
 807     def _write_string(self, message, out=None, only_once=False):
 808         if only_once:
 809             if message in self._printed_messages:
 810                 return
 811             self._printed_messages.add(message)
 812         write_string(message, out=out, encoding=self.params.get('encoding'))
 813
 814     def to_stdout(self, message, skip_eol=False, quiet=None):
 815         """Print message to stdout"""
 816         if quiet is not None:
 817             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
 818         if skip_eol is not False:
 819             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
 820         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 821
 822     def to_screen(self, message, skip_eol=False, quiet=None):
 823         """Print message to screen if not in quiet mode"""
 824         if self.params.get('logger'):
 825             self.params['logger'].debug(message)
 826             return
 827         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 828             return
 829         self._write_string(
 830             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 831             self._out_files.screen)
 832
 833     def to_stderr(self, message, only_once=False):
 834         """Print message to stderr"""
 835         assert isinstance(message, str)
 836         if self.params.get('logger'):
 837             self.params['logger'].error(message)
 838         else:
 839             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 840
 841     def _send_console_code(self, code):
 842         if compat_os_name == 'nt' or not self._out_files.console:
 843             return
 844         self._write_string(code, self._out_files.console)
 845
 846     def to_console_title(self, message):
 847         if not self.params.get('consoletitle', False):
 848             return
 849         message = remove_terminal_sequences(message)
 850         if compat_os_name == 'nt':
 851             if ctypes.windll.kernel32.GetConsoleWindow():
 852                 # c_wchar_p() might not be necessary if `message` is
 853                 # already of type unicode()
 854                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 855         else:
 856             self._send_console_code(f'\033]0;{message}\007')
 857
 858     def save_console_title(self):
 859         if not self.params.get('consoletitle') or self.params.get('simulate'):
 860             return
 861         self._send_console_code('\033[22;0t')  # Save the title on stack
 862
 863     def restore_console_title(self):
 864         if not self.params.get('consoletitle') or self.params.get('simulate'):
 865             return
 866         self._send_console_code('\033[23;0t')  # Restore the title from stack
 867
 868     def __enter__(self):
 869         self.save_console_title()
 870         return self
 871
 872     def __exit__(self, *args):
 873         self.restore_console_title()
 874
 875         if self.params.get('cookiefile') is not None:
 876             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 877
 878     def trouble(self, message=None, tb=None, is_error=True):
 879         """Determine action to take when a download problem appears.
 880
 881         Depending on if the downloader has been configured to ignore
 882         download errors or not, this method may throw an exception or
 883         not when errors are found, after printing the message.
 884
 885         @param tb          If given, is additional traceback information
 886         @param is_error    Whether to raise error according to ignorerrors
 887         """
 888         if message is not None:
 889             self.to_stderr(message)
 890         if self.params.get('verbose'):
 891             if tb is None:
 892                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 893                     tb = ''
 894                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 895                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 896                     tb += encode_compat_str(traceback.format_exc())
 897                 else:
 898                     tb_data = traceback.format_list(traceback.extract_stack())
 899                     tb = ''.join(tb_data)
 900             if tb:
 901                 self.to_stderr(tb)
 902         if not is_error:
 903             return
 904         if not self.params.get('ignoreerrors'):
 905             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 906                 exc_info = sys.exc_info()[1].exc_info
 907             else:
 908                 exc_info = sys.exc_info()
 909             raise DownloadError(message, exc_info)
 910         self._download_retcode = 1
 911
 912     Styles = Namespace(
 913         HEADERS='yellow',
 914         EMPHASIS='light blue',
 915         FILENAME='green',
 916         ID='green',
 917         DELIM='blue',
 918         ERROR='red',
 919         WARNING='yellow',
 920         SUPPRESS='light black',
 921     )
 922
 923     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 924         text = str(text)
 925         if test_encoding:
 926             original_text = text
 927             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 928             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 929             text = text.encode(encoding, 'ignore').decode(encoding)
 930             if fallback is not None and text != original_text:
 931                 text = fallback
 932         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 933
 934     def _format_out(self, *args, **kwargs):
 935         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
 936
 937     def _format_screen(self, *args, **kwargs):
 938         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
 939
 940     def _format_err(self, *args, **kwargs):
 941         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
 942
 943     def report_warning(self, message, only_once=False):
 944         '''
 945         Print the message to stderr, it will be prefixed with 'WARNING:'
 946         If stderr is a tty file the 'WARNING:' will be colored
 947         '''
 948         if self.params.get('logger') is not None:
 949             self.params['logger'].warning(message)
 950         else:
 951             if self.params.get('no_warnings'):
 952                 return
 953             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 954
 955     def deprecation_warning(self, message):
 956         if self.params.get('logger') is not None:
 957             self.params['logger'].warning(f'DeprecationWarning: {message}')
 958         else:
 959             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 960
 961     def report_error(self, message, *args, **kwargs):
 962         '''
 963         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 964         in red if stderr is a tty file.
 965         '''
 966         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 967
 968     def write_debug(self, message, only_once=False):
 969         '''Log debug message or Print message to stderr'''
 970         if not self.params.get('verbose', False):
 971             return
 972         message = f'[debug] {message}'
 973         if self.params.get('logger'):
 974             self.params['logger'].debug(message)
 975         else:
 976             self.to_stderr(message, only_once)
 977
 978     def report_file_already_downloaded(self, file_name):
 979         """Report file has already been fully downloaded."""
 980         try:
 981             self.to_screen('[download] %s has already been downloaded' % file_name)
 982         except UnicodeEncodeError:
 983             self.to_screen('[download] The file has already been downloaded')
 984
 985     def report_file_delete(self, file_name):
 986         """Report that existing file will be deleted."""
 987         try:
 988             self.to_screen('Deleting existing file %s' % file_name)
 989         except UnicodeEncodeError:
 990             self.to_screen('Deleting existing file')
 991
 992     def raise_no_formats(self, info, forced=False, *, msg=None):
 993         has_drm = info.get('_has_drm')
 994         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
 995         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
 996         if forced or not ignored:
 997             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 998                                  expected=has_drm or ignored or expected)
 999         else:
1000             self.report_warning(msg)
1001
1002     def parse_outtmpl(self):
1003         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1004         self._parse_outtmpl()
1005         return self.params['outtmpl']
1006
1007     def _parse_outtmpl(self):
1008         sanitize = IDENTITY
1009         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1010             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1011
1012         outtmpl = self.params.setdefault('outtmpl', {})
1013         if not isinstance(outtmpl, dict):
1014             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1015         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1016
1017     def get_output_path(self, dir_type='', filename=None):
1018         paths = self.params.get('paths', {})
1019         assert isinstance(paths, dict)
1020         path = os.path.join(
1021             expand_path(paths.get('home', '').strip()),
1022             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1023             filename or '')
1024         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1025
1026     @staticmethod
1027     def _outtmpl_expandpath(outtmpl):
1028         # expand_path translates '%%' into '%' and '$$' into '$'
1029         # correspondingly that is not what we want since we need to keep
1030         # '%%' intact for template dict substitution step. Working around
1031         # with boundary-alike separator hack.
1032         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1033         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1034
1035         # outtmpl should be expand_path'ed before template dict substitution
1036         # because meta fields may contain env variables we don't want to
1037         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1038         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1039         return expand_path(outtmpl).replace(sep, '')
1040
1041     @staticmethod
1042     def escape_outtmpl(outtmpl):
1043         ''' Escape any remaining strings like %s, %abc% etc. '''
1044         return re.sub(
1045             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1046             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1047             outtmpl)
1048
1049     @classmethod
1050     def validate_outtmpl(cls, outtmpl):
1051         ''' @return None or Exception object '''
1052         outtmpl = re.sub(
1053             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1054             lambda mobj: f'{mobj.group(0)[:-1]}s',
1055             cls._outtmpl_expandpath(outtmpl))
1056         try:
1057             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1058             return None
1059         except ValueError as err:
1060             return err
1061
1062     @staticmethod
1063     def _copy_infodict(info_dict):
1064         info_dict = dict(info_dict)
1065         info_dict.pop('__postprocessors', None)
1066         info_dict.pop('__pending_error', None)
1067         return info_dict
1068
1069     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1070         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1071         @param sanitize    Whether to sanitize the output as a filename.
1072                            For backward compatibility, a function can also be passed
1073         """
1074
1075         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1076
1077         info_dict = self._copy_infodict(info_dict)
1078         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1079             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1080             if info_dict.get('duration', None) is not None
1081             else None)
1082         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1083         info_dict['video_autonumber'] = self._num_videos
1084         if info_dict.get('resolution') is None:
1085             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1086
1087         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1088         # of %(field)s to %(field)0Nd for backward compatibility
1089         field_size_compat_map = {
1090             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1091             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1092             'autonumber': self.params.get('autonumber_size') or 5,
1093         }
1094
1095         TMPL_DICT = {}
1096         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1097         MATH_FUNCTIONS = {
1098             '+': float.__add__,
1099             '-': float.__sub__,
1100         }
1101         # Field is of the form key1.key2...
1102         # where keys (except first) can be string, int or slice
1103         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1104         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1105         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1106         INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
1107             (?P<negate>-)?
1108             (?P<fields>{FIELD_RE})
1109             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1110             (?:>(?P<strf_format>.+?))?
1111             (?P<remaining>
1112                 (?P<alternate>(?<!\\),[^|&)]+)?
1113                 (?:&(?P<replacement>.*?))?
1114                 (?:\|(?P<default>.*?))?
1115             )$''')
1116
1117         def _traverse_infodict(k):
1118             k = k.split('.')
1119             if k[0] == '':
1120                 k.pop(0)
1121             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1122
1123         def get_value(mdict):
1124             # Object traversal
1125             value = _traverse_infodict(mdict['fields'])
1126             # Negative
1127             if mdict['negate']:
1128                 value = float_or_none(value)
1129                 if value is not None:
1130                     value *= -1
1131             # Do maths
1132             offset_key = mdict['maths']
1133             if offset_key:
1134                 value = float_or_none(value)
1135                 operator = None
1136                 while offset_key:
1137                     item = re.match(
1138                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1139                         offset_key).group(0)
1140                     offset_key = offset_key[len(item):]
1141                     if operator is None:
1142                         operator = MATH_FUNCTIONS[item]
1143                         continue
1144                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1145                     offset = float_or_none(item)
1146                     if offset is None:
1147                         offset = float_or_none(_traverse_infodict(item))
1148                     try:
1149                         value = operator(value, multiplier * offset)
1150                     except (TypeError, ZeroDivisionError):
1151                         return None
1152                     operator = None
1153             # Datetime formatting
1154             if mdict['strf_format']:
1155                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1156
1157             return value
1158
1159         na = self.params.get('outtmpl_na_placeholder', 'NA')
1160
1161         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1162             return sanitize_filename(str(value), restricted=restricted, is_id=(
1163                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1164                 if 'filename-sanitization' in self.params['compat_opts']
1165                 else NO_DEFAULT))
1166
1167         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1168         sanitize = bool(sanitize)
1169
1170         def _dumpjson_default(obj):
1171             if isinstance(obj, (set, LazyList)):
1172                 return list(obj)
1173             return repr(obj)
1174
1175         def create_key(outer_mobj):
1176             if not outer_mobj.group('has_key'):
1177                 return outer_mobj.group(0)
1178             key = outer_mobj.group('key')
1179             mobj = re.match(INTERNAL_FORMAT_RE, key)
1180             initial_field = mobj.group('fields') if mobj else ''
1181             value, replacement, default = None, None, na
1182             while mobj:
1183                 mobj = mobj.groupdict()
1184                 default = mobj['default'] if mobj['default'] is not None else default
1185                 value = get_value(mobj)
1186                 replacement = mobj['replacement']
1187                 if value is None and mobj['alternate']:
1188                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1189                 else:
1190                     break
1191
1192             fmt = outer_mobj.group('format')
1193             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1194                 fmt = f'0{field_size_compat_map[key]:d}d'
1195
1196             value = default if value is None else value if replacement is None else replacement
1197
1198             flags = outer_mobj.group('conversion') or ''
1199             str_fmt = f'{fmt[:-1]}s'
1200             if fmt[-1] == 'l':  # list
1201                 delim = '\n' if '#' in flags else ', '
1202                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1203             elif fmt[-1] == 'j':  # json
1204                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1205             elif fmt[-1] == 'h':  # html
1206                 value, fmt = escapeHTML(value), str_fmt
1207             elif fmt[-1] == 'q':  # quoted
1208                 value = map(str, variadic(value) if '#' in flags else [value])
1209                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1210             elif fmt[-1] == 'B':  # bytes
1211                 value = f'%{str_fmt}'.encode() % str(value).encode()
1212                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1213             elif fmt[-1] == 'U':  # unicode normalized
1214                 value, fmt = unicodedata.normalize(
1215                     # "+" = compatibility equivalence, "#" = NFD
1216                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1217                     value), str_fmt
1218             elif fmt[-1] == 'D':  # decimal suffix
1219                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1220                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1221                                               factor=1024 if '#' in flags else 1000)
1222             elif fmt[-1] == 'S':  # filename sanitization
1223                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1224             elif fmt[-1] == 'c':
1225                 if value:
1226                     value = str(value)[0]
1227                 else:
1228                     fmt = str_fmt
1229             elif fmt[-1] not in 'rs':  # numeric
1230                 value = float_or_none(value)
1231                 if value is None:
1232                     value, fmt = default, 's'
1233
1234             if sanitize:
1235                 if fmt[-1] == 'r':
1236                     # If value is an object, sanitize might convert it to a string
1237                     # So we convert it to repr first
1238                     value, fmt = repr(value), str_fmt
1239                 if fmt[-1] in 'csr':
1240                     value = sanitizer(initial_field, value)
1241
1242             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1243             TMPL_DICT[key] = value
1244             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1245
1246         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1247
1248     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1249         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1250         return self.escape_outtmpl(outtmpl) % info_dict
1251
1252     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1253         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1254         if outtmpl is None:
1255             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1256         try:
1257             outtmpl = self._outtmpl_expandpath(outtmpl)
1258             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1259             if not filename:
1260                 return None
1261
1262             if tmpl_type in ('', 'temp'):
1263                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1264                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1265                     filename = replace_extension(filename, ext, final_ext)
1266             elif tmpl_type:
1267                 force_ext = OUTTMPL_TYPES[tmpl_type]
1268                 if force_ext:
1269                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1270
1271             # https://github.com/blackjack4494/youtube-dlc/issues/85
1272             trim_file_name = self.params.get('trim_file_name', False)
1273             if trim_file_name:
1274                 no_ext, *ext = filename.rsplit('.', 2)
1275                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1276
1277             return filename
1278         except ValueError as err:
1279             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1280             return None
1281
1282     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1283         """Generate the output filename"""
1284         if outtmpl:
1285             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1286             dir_type = None
1287         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1288         if not filename and dir_type not in ('', 'temp'):
1289             return ''
1290
1291         if warn:
1292             if not self.params.get('paths'):
1293                 pass
1294             elif filename == '-':
1295                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1296             elif os.path.isabs(filename):
1297                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1298         if filename == '-' or not filename:
1299             return filename
1300
1301         return self.get_output_path(dir_type, filename)
1302
1303     def _match_entry(self, info_dict, incomplete=False, silent=False):
1304         """ Returns None if the file should be downloaded """
1305
1306         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1307
1308         def check_filter():
1309             if 'title' in info_dict:
1310                 # This can happen when we're just evaluating the playlist
1311                 title = info_dict['title']
1312                 matchtitle = self.params.get('matchtitle', False)
1313                 if matchtitle:
1314                     if not re.search(matchtitle, title, re.IGNORECASE):
1315                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1316                 rejecttitle = self.params.get('rejecttitle', False)
1317                 if rejecttitle:
1318                     if re.search(rejecttitle, title, re.IGNORECASE):
1319                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1320             date = info_dict.get('upload_date')
1321             if date is not None:
1322                 dateRange = self.params.get('daterange', DateRange())
1323                 if date not in dateRange:
1324                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1325             view_count = info_dict.get('view_count')
1326             if view_count is not None:
1327                 min_views = self.params.get('min_views')
1328                 if min_views is not None and view_count < min_views:
1329                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1330                 max_views = self.params.get('max_views')
1331                 if max_views is not None and view_count > max_views:
1332                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1333             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1334                 return 'Skipping "%s" because it is age restricted' % video_title
1335
1336             match_filter = self.params.get('match_filter')
1337             if match_filter is not None:
1338                 try:
1339                     ret = match_filter(info_dict, incomplete=incomplete)
1340                 except TypeError:
1341                     # For backward compatibility
1342                     ret = None if incomplete else match_filter(info_dict)
1343                 if ret is NO_DEFAULT:
1344                     while True:
1345                         filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1346                         reply = input(self._format_screen(
1347                             f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1348                         if reply in {'y', ''}:
1349                             return None
1350                         elif reply == 'n':
1351                             return f'Skipping {video_title}'
1352                 elif ret is not None:
1353                     return ret
1354             return None
1355
1356         if self.in_download_archive(info_dict):
1357             reason = '%s has already been recorded in the archive' % video_title
1358             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1359         else:
1360             reason = check_filter()
1361             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1362         if reason is not None:
1363             if not silent:
1364                 self.to_screen('[download] ' + reason)
1365             if self.params.get(break_opt, False):
1366                 raise break_err()
1367         return reason
1368
1369     @staticmethod
1370     def add_extra_info(info_dict, extra_info):
1371         '''Set the keys from extra_info in info dict if they are missing'''
1372         for key, value in extra_info.items():
1373             info_dict.setdefault(key, value)
1374
1375     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1376                      process=True, force_generic_extractor=False):
1377         """
1378         Return a list with a dictionary for each video extracted.
1379
1380         Arguments:
1381         url -- URL to extract
1382
1383         Keyword arguments:
1384         download -- whether to download videos during extraction
1385         ie_key -- extractor key hint
1386         extra_info -- dictionary containing the extra values to add to each result
1387         process -- whether to resolve all unresolved references (URLs, playlist items),
1388             must be True for download to work.
1389         force_generic_extractor -- force using the generic extractor
1390         """
1391
1392         if extra_info is None:
1393             extra_info = {}
1394
1395         if not ie_key and force_generic_extractor:
1396             ie_key = 'Generic'
1397
1398         if ie_key:
1399             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1400         else:
1401             ies = self._ies
1402
1403         for ie_key, ie in ies.items():
1404             if not ie.suitable(url):
1405                 continue
1406
1407             if not ie.working():
1408                 self.report_warning('The program functionality for this site has been marked as broken, '
1409                                     'and will probably not work.')
1410
1411             temp_id = ie.get_temp_id(url)
1412             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1413                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1414                 if self.params.get('break_on_existing', False):
1415                     raise ExistingVideoReached()
1416                 break
1417             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1418         else:
1419             self.report_error('no suitable InfoExtractor for URL %s' % url)
1420
1421     def _handle_extraction_exceptions(func):
1422         @functools.wraps(func)
1423         def wrapper(self, *args, **kwargs):
1424             while True:
1425                 try:
1426                     return func(self, *args, **kwargs)
1427                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1428                     raise
1429                 except ReExtractInfo as e:
1430                     if e.expected:
1431                         self.to_screen(f'{e}; Re-extracting data')
1432                     else:
1433                         self.to_stderr('\r')
1434                         self.report_warning(f'{e}; Re-extracting data')
1435                     continue
1436                 except GeoRestrictedError as e:
1437                     msg = e.msg
1438                     if e.countries:
1439                         msg += '\nThis video is available in %s.' % ', '.join(
1440                             map(ISO3166Utils.short2full, e.countries))
1441                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1442                     self.report_error(msg)
1443                 except ExtractorError as e:  # An error we somewhat expected
1444                     self.report_error(str(e), e.format_traceback())
1445                 except Exception as e:
1446                     if self.params.get('ignoreerrors'):
1447                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1448                     else:
1449                         raise
1450                 break
1451         return wrapper
1452
1453     def _wait_for_video(self, ie_result):
1454         if (not self.params.get('wait_for_video')
1455                 or ie_result.get('_type', 'video') != 'video'
1456                 or ie_result.get('formats') or ie_result.get('url')):
1457             return
1458
1459         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1460         last_msg = ''
1461
1462         def progress(msg):
1463             nonlocal last_msg
1464             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1465             last_msg = msg
1466
1467         min_wait, max_wait = self.params.get('wait_for_video')
1468         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1469         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1470             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1471             self.report_warning('Release time of video is not known')
1472         elif (diff or 0) <= 0:
1473             self.report_warning('Video should already be available according to extracted info')
1474         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1475         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1476
1477         wait_till = time.time() + diff
1478         try:
1479             while True:
1480                 diff = wait_till - time.time()
1481                 if diff <= 0:
1482                     progress('')
1483                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1484                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1485                 time.sleep(1)
1486         except KeyboardInterrupt:
1487             progress('')
1488             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1489         except BaseException as e:
1490             if not isinstance(e, ReExtractInfo):
1491                 self.to_screen('')
1492             raise
1493
1494     @_handle_extraction_exceptions
1495     def __extract_info(self, url, ie, download, extra_info, process):
1496         ie_result = ie.extract(url)
1497         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1498             self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1499             return
1500         if isinstance(ie_result, list):
1501             # Backwards compatibility: old IE result format
1502             ie_result = {
1503                 '_type': 'compat_list',
1504                 'entries': ie_result,
1505             }
1506         if extra_info.get('original_url'):
1507             ie_result.setdefault('original_url', extra_info['original_url'])
1508         self.add_default_extra_info(ie_result, ie, url)
1509         if process:
1510             self._wait_for_video(ie_result)
1511             return self.process_ie_result(ie_result, download, extra_info)
1512         else:
1513             return ie_result
1514
1515     def add_default_extra_info(self, ie_result, ie, url):
1516         if url is not None:
1517             self.add_extra_info(ie_result, {
1518                 'webpage_url': url,
1519                 'original_url': url,
1520             })
1521         webpage_url = ie_result.get('webpage_url')
1522         if webpage_url:
1523             self.add_extra_info(ie_result, {
1524                 'webpage_url_basename': url_basename(webpage_url),
1525                 'webpage_url_domain': get_domain(webpage_url),
1526             })
1527         if ie is not None:
1528             self.add_extra_info(ie_result, {
1529                 'extractor': ie.IE_NAME,
1530                 'extractor_key': ie.ie_key(),
1531             })
1532
1533     def process_ie_result(self, ie_result, download=True, extra_info=None):
1534         """
1535         Take the result of the ie(may be modified) and resolve all unresolved
1536         references (URLs, playlist items).
1537
1538         It will also download the videos if 'download'.
1539         Returns the resolved ie_result.
1540         """
1541         if extra_info is None:
1542             extra_info = {}
1543         result_type = ie_result.get('_type', 'video')
1544
1545         if result_type in ('url', 'url_transparent'):
1546             ie_result['url'] = sanitize_url(ie_result['url'])
1547             if ie_result.get('original_url'):
1548                 extra_info.setdefault('original_url', ie_result['original_url'])
1549
1550             extract_flat = self.params.get('extract_flat', False)
1551             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1552                     or extract_flat is True):
1553                 info_copy = ie_result.copy()
1554                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1555                 if ie and not ie_result.get('id'):
1556                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1557                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1558                 self.add_extra_info(info_copy, extra_info)
1559                 info_copy, _ = self.pre_process(info_copy)
1560                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1561                 self._raise_pending_errors(info_copy)
1562                 if self.params.get('force_write_download_archive', False):
1563                     self.record_download_archive(info_copy)
1564                 return ie_result
1565
1566         if result_type == 'video':
1567             self.add_extra_info(ie_result, extra_info)
1568             ie_result = self.process_video_result(ie_result, download=download)
1569             self._raise_pending_errors(ie_result)
1570             additional_urls = (ie_result or {}).get('additional_urls')
1571             if additional_urls:
1572                 # TODO: Improve MetadataParserPP to allow setting a list
1573                 if isinstance(additional_urls, str):
1574                     additional_urls = [additional_urls]
1575                 self.to_screen(
1576                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1577                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1578                 ie_result['additional_entries'] = [
1579                     self.extract_info(
1580                         url, download, extra_info=extra_info,
1581                         force_generic_extractor=self.params.get('force_generic_extractor'))
1582                     for url in additional_urls
1583                 ]
1584             return ie_result
1585         elif result_type == 'url':
1586             # We have to add extra_info to the results because it may be
1587             # contained in a playlist
1588             return self.extract_info(
1589                 ie_result['url'], download,
1590                 ie_key=ie_result.get('ie_key'),
1591                 extra_info=extra_info)
1592         elif result_type == 'url_transparent':
1593             # Use the information from the embedding page
1594             info = self.extract_info(
1595                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1596                 extra_info=extra_info, download=False, process=False)
1597
1598             # extract_info may return None when ignoreerrors is enabled and
1599             # extraction failed with an error, don't crash and return early
1600             # in this case
1601             if not info:
1602                 return info
1603
1604             exempted_fields = {'_type', 'url', 'ie_key'}
1605             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1606                 # For video clips, the id etc of the clip extractor should be used
1607                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1608
1609             new_result = info.copy()
1610             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1611
1612             # Extracted info may not be a video result (i.e.
1613             # info.get('_type', 'video') != video) but rather an url or
1614             # url_transparent. In such cases outer metadata (from ie_result)
1615             # should be propagated to inner one (info). For this to happen
1616             # _type of info should be overridden with url_transparent. This
1617             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1618             if new_result.get('_type') == 'url':
1619                 new_result['_type'] = 'url_transparent'
1620
1621             return self.process_ie_result(
1622                 new_result, download=download, extra_info=extra_info)
1623         elif result_type in ('playlist', 'multi_video'):
1624             # Protect from infinite recursion due to recursively nested playlists
1625             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1626             webpage_url = ie_result['webpage_url']
1627             if webpage_url in self._playlist_urls:
1628                 self.to_screen(
1629                     '[download] Skipping already downloaded playlist: %s'
1630                     % ie_result.get('title') or ie_result.get('id'))
1631                 return
1632
1633             self._playlist_level += 1
1634             self._playlist_urls.add(webpage_url)
1635             self._fill_common_fields(ie_result, False)
1636             self._sanitize_thumbnails(ie_result)
1637             try:
1638                 return self.__process_playlist(ie_result, download)
1639             finally:
1640                 self._playlist_level -= 1
1641                 if not self._playlist_level:
1642                     self._playlist_urls.clear()
1643         elif result_type == 'compat_list':
1644             self.report_warning(
1645                 'Extractor %s returned a compat_list result. '
1646                 'It needs to be updated.' % ie_result.get('extractor'))
1647
1648             def _fixup(r):
1649                 self.add_extra_info(r, {
1650                     'extractor': ie_result['extractor'],
1651                     'webpage_url': ie_result['webpage_url'],
1652                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1653                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1654                     'extractor_key': ie_result['extractor_key'],
1655                 })
1656                 return r
1657             ie_result['entries'] = [
1658                 self.process_ie_result(_fixup(r), download, extra_info)
1659                 for r in ie_result['entries']
1660             ]
1661             return ie_result
1662         else:
1663             raise Exception('Invalid result type: %s' % result_type)
1664
1665     def _ensure_dir_exists(self, path):
1666         return make_dir(path, self.report_error)
1667
1668     @staticmethod
1669     def _playlist_infodict(ie_result, **kwargs):
1670         return {
1671             **ie_result,
1672             'playlist': ie_result.get('title') or ie_result.get('id'),
1673             'playlist_id': ie_result.get('id'),
1674             'playlist_title': ie_result.get('title'),
1675             'playlist_uploader': ie_result.get('uploader'),
1676             'playlist_uploader_id': ie_result.get('uploader_id'),
1677             'playlist_index': 0,
1678             **kwargs,
1679         }
1680
1681     def __process_playlist(self, ie_result, download):
1682         """Process each entry in the playlist"""
1683         title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
1684         self.to_screen(f'[download] Downloading playlist: {title}')
1685
1686         all_entries = PlaylistEntries(self, ie_result)
1687         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1688
1689         lazy = self.params.get('lazy_playlist')
1690         if lazy:
1691             resolved_entries, n_entries = [], 'N/A'
1692             ie_result['requested_entries'], ie_result['entries'] = None, None
1693         else:
1694             entries = resolved_entries = list(entries)
1695             n_entries = len(resolved_entries)
1696             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1697         if not ie_result.get('playlist_count'):
1698             # Better to do this after potentially exhausting entries
1699             ie_result['playlist_count'] = all_entries.get_full_count()
1700
1701         _infojson_written = False
1702         write_playlist_files = self.params.get('allow_playlist_files', True)
1703         if write_playlist_files and self.params.get('list_thumbnails'):
1704             self.list_thumbnails(ie_result)
1705         if write_playlist_files and not self.params.get('simulate'):
1706             ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1707             _infojson_written = self._write_info_json(
1708                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1709             if _infojson_written is None:
1710                 return
1711             if self._write_description('playlist', ie_result,
1712                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1713                 return
1714             # TODO: This should be passed to ThumbnailsConvertor if necessary
1715             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1716
1717         if lazy:
1718             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1719                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1720         elif self.params.get('playlistreverse'):
1721             entries.reverse()
1722         elif self.params.get('playlistrandom'):
1723             random.shuffle(entries)
1724
1725         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
1726                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1727
1728         failures = 0
1729         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1730         for i, (playlist_index, entry) in enumerate(entries):
1731             if lazy:
1732                 resolved_entries.append((playlist_index, entry))
1733
1734             # TODO: Add auto-generated fields
1735             if not entry or self._match_entry(entry, incomplete=True) is not None:
1736                 continue
1737
1738             self.to_screen('[download] Downloading video %s of %s' % (
1739                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1740
1741             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1742             if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1743                 playlist_index = ie_result['requested_entries'][i]
1744
1745             entry_result = self.__process_iterable_entry(entry, download, {
1746                 'n_entries': int_or_none(n_entries),
1747                 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1748                 'playlist_count': ie_result.get('playlist_count'),
1749                 'playlist_index': playlist_index,
1750                 'playlist_autonumber': i + 1,
1751                 'playlist': title,
1752                 'playlist_id': ie_result.get('id'),
1753                 'playlist_title': ie_result.get('title'),
1754                 'playlist_uploader': ie_result.get('uploader'),
1755                 'playlist_uploader_id': ie_result.get('uploader_id'),
1756                 'extractor': ie_result['extractor'],
1757                 'webpage_url': ie_result['webpage_url'],
1758                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1759                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1760                 'extractor_key': ie_result['extractor_key'],
1761             })
1762             if not entry_result:
1763                 failures += 1
1764             if failures >= max_failures:
1765                 self.report_error(
1766                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1767                 break
1768             resolved_entries[i] = (playlist_index, entry_result)
1769
1770         # Update with processed data
1771         ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1772
1773         # Write the updated info to json
1774         if _infojson_written is True and self._write_info_json(
1775                 'updated playlist', ie_result,
1776                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1777             return
1778
1779         ie_result = self.run_all_pps('playlist', ie_result)
1780         self.to_screen(f'[download] Finished downloading playlist: {title}')
1781         return ie_result
1782
1783     @_handle_extraction_exceptions
1784     def __process_iterable_entry(self, entry, download, extra_info):
1785         return self.process_ie_result(
1786             entry, download=download, extra_info=extra_info)
1787
1788     def _build_format_filter(self, filter_spec):
1789         " Returns a function to filter the formats according to the filter_spec "
1790
1791         OPERATORS = {
1792             '<': operator.lt,
1793             '<=': operator.le,
1794             '>': operator.gt,
1795             '>=': operator.ge,
1796             '=': operator.eq,
1797             '!=': operator.ne,
1798         }
1799         operator_rex = re.compile(r'''(?x)\s*
1800             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1801             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1802             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1803             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1804         m = operator_rex.fullmatch(filter_spec)
1805         if m:
1806             try:
1807                 comparison_value = int(m.group('value'))
1808             except ValueError:
1809                 comparison_value = parse_filesize(m.group('value'))
1810                 if comparison_value is None:
1811                     comparison_value = parse_filesize(m.group('value') + 'B')
1812                 if comparison_value is None:
1813                     raise ValueError(
1814                         'Invalid value %r in format specification %r' % (
1815                             m.group('value'), filter_spec))
1816             op = OPERATORS[m.group('op')]
1817
1818         if not m:
1819             STR_OPERATORS = {
1820                 '=': operator.eq,
1821                 '^=': lambda attr, value: attr.startswith(value),
1822                 '$=': lambda attr, value: attr.endswith(value),
1823                 '*=': lambda attr, value: value in attr,
1824                 '~=': lambda attr, value: value.search(attr) is not None
1825             }
1826             str_operator_rex = re.compile(r'''(?x)\s*
1827                 (?P<key>[a-zA-Z0-9._-]+)\s*
1828                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1829                 (?P<quote>["'])?
1830                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1831                 (?(quote)(?P=quote))\s*
1832                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1833             m = str_operator_rex.fullmatch(filter_spec)
1834             if m:
1835                 if m.group('op') == '~=':
1836                     comparison_value = re.compile(m.group('value'))
1837                 else:
1838                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1839                 str_op = STR_OPERATORS[m.group('op')]
1840                 if m.group('negation'):
1841                     op = lambda attr, value: not str_op(attr, value)
1842                 else:
1843                     op = str_op
1844
1845         if not m:
1846             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1847
1848         def _filter(f):
1849             actual_value = f.get(m.group('key'))
1850             if actual_value is None:
1851                 return m.group('none_inclusive')
1852             return op(actual_value, comparison_value)
1853         return _filter
1854
1855     def _check_formats(self, formats):
1856         for f in formats:
1857             self.to_screen('[info] Testing format %s' % f['format_id'])
1858             path = self.get_output_path('temp')
1859             if not self._ensure_dir_exists(f'{path}/'):
1860                 continue
1861             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1862             temp_file.close()
1863             try:
1864                 success, _ = self.dl(temp_file.name, f, test=True)
1865             except (DownloadError, OSError, ValueError) + network_exceptions:
1866                 success = False
1867             finally:
1868                 if os.path.exists(temp_file.name):
1869                     try:
1870                         os.remove(temp_file.name)
1871                     except OSError:
1872                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1873             if success:
1874                 yield f
1875             else:
1876                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1877
1878     def _default_format_spec(self, info_dict, download=True):
1879
1880         def can_merge():
1881             merger = FFmpegMergerPP(self)
1882             return merger.available and merger.can_merge()
1883
1884         prefer_best = (
1885             not self.params.get('simulate')
1886             and download
1887             and (
1888                 not can_merge()
1889                 or info_dict.get('is_live') and not self.params.get('live_from_start')
1890                 or self.params['outtmpl']['default'] == '-'))
1891         compat = (
1892             prefer_best
1893             or self.params.get('allow_multiple_audio_streams', False)
1894             or 'format-spec' in self.params['compat_opts'])
1895
1896         return (
1897             'best/bestvideo+bestaudio' if prefer_best
1898             else 'bestvideo*+bestaudio/best' if not compat
1899             else 'bestvideo+bestaudio/best')
1900
1901     def build_format_selector(self, format_spec):
1902         def syntax_error(note, start):
1903             message = (
1904                 'Invalid format specification: '
1905                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
1906             return SyntaxError(message)
1907
1908         PICKFIRST = 'PICKFIRST'
1909         MERGE = 'MERGE'
1910         SINGLE = 'SINGLE'
1911         GROUP = 'GROUP'
1912         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1913
1914         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1915                                   'video': self.params.get('allow_multiple_video_streams', False)}
1916
1917         check_formats = self.params.get('check_formats') == 'selected'
1918
1919         def _parse_filter(tokens):
1920             filter_parts = []
1921             for type, string, start, _, _ in tokens:
1922                 if type == tokenize.OP and string == ']':
1923                     return ''.join(filter_parts)
1924                 else:
1925                     filter_parts.append(string)
1926
1927         def _remove_unused_ops(tokens):
1928             # Remove operators that we don't use and join them with the surrounding strings
1929             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1930             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1931             last_string, last_start, last_end, last_line = None, None, None, None
1932             for type, string, start, end, line in tokens:
1933                 if type == tokenize.OP and string == '[':
1934                     if last_string:
1935                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1936                         last_string = None
1937                     yield type, string, start, end, line
1938                     # everything inside brackets will be handled by _parse_filter
1939                     for type, string, start, end, line in tokens:
1940                         yield type, string, start, end, line
1941                         if type == tokenize.OP and string == ']':
1942                             break
1943                 elif type == tokenize.OP and string in ALLOWED_OPS:
1944                     if last_string:
1945                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1946                         last_string = None
1947                     yield type, string, start, end, line
1948                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1949                     if not last_string:
1950                         last_string = string
1951                         last_start = start
1952                         last_end = end
1953                     else:
1954                         last_string += string
1955             if last_string:
1956                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1957
1958         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1959             selectors = []
1960             current_selector = None
1961             for type, string, start, _, _ in tokens:
1962                 # ENCODING is only defined in python 3.x
1963                 if type == getattr(tokenize, 'ENCODING', None):
1964                     continue
1965                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1966                     current_selector = FormatSelector(SINGLE, string, [])
1967                 elif type == tokenize.OP:
1968                     if string == ')':
1969                         if not inside_group:
1970                             # ')' will be handled by the parentheses group
1971                             tokens.restore_last_token()
1972                         break
1973                     elif inside_merge and string in ['/', ',']:
1974                         tokens.restore_last_token()
1975                         break
1976                     elif inside_choice and string == ',':
1977                         tokens.restore_last_token()
1978                         break
1979                     elif string == ',':
1980                         if not current_selector:
1981                             raise syntax_error('"," must follow a format selector', start)
1982                         selectors.append(current_selector)
1983                         current_selector = None
1984                     elif string == '/':
1985                         if not current_selector:
1986                             raise syntax_error('"/" must follow a format selector', start)
1987                         first_choice = current_selector
1988                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1989                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1990                     elif string == '[':
1991                         if not current_selector:
1992                             current_selector = FormatSelector(SINGLE, 'best', [])
1993                         format_filter = _parse_filter(tokens)
1994                         current_selector.filters.append(format_filter)
1995                     elif string == '(':
1996                         if current_selector:
1997                             raise syntax_error('Unexpected "("', start)
1998                         group = _parse_format_selection(tokens, inside_group=True)
1999                         current_selector = FormatSelector(GROUP, group, [])
2000                     elif string == '+':
2001                         if not current_selector:
2002                             raise syntax_error('Unexpected "+"', start)
2003                         selector_1 = current_selector
2004                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2005                         if not selector_2:
2006                             raise syntax_error('Expected a selector', start)
2007                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2008                     else:
2009                         raise syntax_error(f'Operator not recognized: "{string}"', start)
2010                 elif type == tokenize.ENDMARKER:
2011                     break
2012             if current_selector:
2013                 selectors.append(current_selector)
2014             return selectors
2015
2016         def _merge(formats_pair):
2017             format_1, format_2 = formats_pair
2018
2019             formats_info = []
2020             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2021             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2022
2023             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2024                 get_no_more = {'video': False, 'audio': False}
2025                 for (i, fmt_info) in enumerate(formats_info):
2026                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2027                         formats_info.pop(i)
2028                         continue
2029                     for aud_vid in ['audio', 'video']:
2030                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2031                             if get_no_more[aud_vid]:
2032                                 formats_info.pop(i)
2033                                 break
2034                             get_no_more[aud_vid] = True
2035
2036             if len(formats_info) == 1:
2037                 return formats_info[0]
2038
2039             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2040             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2041
2042             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2043             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2044
2045             output_ext = self.params.get('merge_output_format')
2046             if not output_ext:
2047                 if the_only_video:
2048                     output_ext = the_only_video['ext']
2049                 elif the_only_audio and not video_fmts:
2050                     output_ext = the_only_audio['ext']
2051                 else:
2052                     output_ext = 'mkv'
2053
2054             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2055
2056             new_dict = {
2057                 'requested_formats': formats_info,
2058                 'format': '+'.join(filtered('format')),
2059                 'format_id': '+'.join(filtered('format_id')),
2060                 'ext': output_ext,
2061                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2062                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2063                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2064                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2065                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2066             }
2067
2068             if the_only_video:
2069                 new_dict.update({
2070                     'width': the_only_video.get('width'),
2071                     'height': the_only_video.get('height'),
2072                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2073                     'fps': the_only_video.get('fps'),
2074                     'dynamic_range': the_only_video.get('dynamic_range'),
2075                     'vcodec': the_only_video.get('vcodec'),
2076                     'vbr': the_only_video.get('vbr'),
2077                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2078                 })
2079
2080             if the_only_audio:
2081                 new_dict.update({
2082                     'acodec': the_only_audio.get('acodec'),
2083                     'abr': the_only_audio.get('abr'),
2084                     'asr': the_only_audio.get('asr'),
2085                 })
2086
2087             return new_dict
2088
2089         def _check_formats(formats):
2090             if not check_formats:
2091                 yield from formats
2092                 return
2093             yield from self._check_formats(formats)
2094
2095         def _build_selector_function(selector):
2096             if isinstance(selector, list):  # ,
2097                 fs = [_build_selector_function(s) for s in selector]
2098
2099                 def selector_function(ctx):
2100                     for f in fs:
2101                         yield from f(ctx)
2102                 return selector_function
2103
2104             elif selector.type == GROUP:  # ()
2105                 selector_function = _build_selector_function(selector.selector)
2106
2107             elif selector.type == PICKFIRST:  # /
2108                 fs = [_build_selector_function(s) for s in selector.selector]
2109
2110                 def selector_function(ctx):
2111                     for f in fs:
2112                         picked_formats = list(f(ctx))
2113                         if picked_formats:
2114                             return picked_formats
2115                     return []
2116
2117             elif selector.type == MERGE:  # +
2118                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2119
2120                 def selector_function(ctx):
2121                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2122                         yield _merge(pair)
2123
2124             elif selector.type == SINGLE:  # atom
2125                 format_spec = selector.selector or 'best'
2126
2127                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2128                 if format_spec == 'all':
2129                     def selector_function(ctx):
2130                         yield from _check_formats(ctx['formats'][::-1])
2131                 elif format_spec == 'mergeall':
2132                     def selector_function(ctx):
2133                         formats = list(_check_formats(
2134                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2135                         if not formats:
2136                             return
2137                         merged_format = formats[-1]
2138                         for f in formats[-2::-1]:
2139                             merged_format = _merge((merged_format, f))
2140                         yield merged_format
2141
2142                 else:
2143                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2144                     mobj = re.match(
2145                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2146                         format_spec)
2147                     if mobj is not None:
2148                         format_idx = int_or_none(mobj.group('n'), default=1)
2149                         format_reverse = mobj.group('bw')[0] == 'b'
2150                         format_type = (mobj.group('type') or [None])[0]
2151                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2152                         format_modified = mobj.group('mod') is not None
2153
2154                         format_fallback = not format_type and not format_modified  # for b, w
2155                         _filter_f = (
2156                             (lambda f: f.get('%scodec' % format_type) != 'none')
2157                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2158                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2159                             if format_type  # bv, ba, wv, wa
2160                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2161                             if not format_modified  # b, w
2162                             else lambda f: True)  # b*, w*
2163                         filter_f = lambda f: _filter_f(f) and (
2164                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2165                     else:
2166                         if format_spec in self._format_selection_exts['audio']:
2167                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2168                         elif format_spec in self._format_selection_exts['video']:
2169                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2170                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2171                         elif format_spec in self._format_selection_exts['storyboards']:
2172                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2173                         else:
2174                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2175
2176                     def selector_function(ctx):
2177                         formats = list(ctx['formats'])
2178                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2179                         if not matches:
2180                             if format_fallback and ctx['incomplete_formats']:
2181                                 # for extractors with incomplete formats (audio only (soundcloud)
2182                                 # or video only (imgur)) best/worst will fallback to
2183                                 # best/worst {video,audio}-only format
2184                                 matches = formats
2185                             elif seperate_fallback and not ctx['has_merged_format']:
2186                                 # for compatibility with youtube-dl when there is no pre-merged format
2187                                 matches = list(filter(seperate_fallback, formats))
2188                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2189                         try:
2190                             yield matches[format_idx - 1]
2191                         except LazyList.IndexError:
2192                             return
2193
2194             filters = [self._build_format_filter(f) for f in selector.filters]
2195
2196             def final_selector(ctx):
2197                 ctx_copy = dict(ctx)
2198                 for _filter in filters:
2199                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2200                 return selector_function(ctx_copy)
2201             return final_selector
2202
2203         stream = io.BytesIO(format_spec.encode())
2204         try:
2205             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2206         except tokenize.TokenError:
2207             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2208
2209         class TokenIterator:
2210             def __init__(self, tokens):
2211                 self.tokens = tokens
2212                 self.counter = 0
2213
2214             def __iter__(self):
2215                 return self
2216
2217             def __next__(self):
2218                 if self.counter >= len(self.tokens):
2219                     raise StopIteration()
2220                 value = self.tokens[self.counter]
2221                 self.counter += 1
2222                 return value
2223
2224             next = __next__
2225
2226             def restore_last_token(self):
2227                 self.counter -= 1
2228
2229         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2230         return _build_selector_function(parsed_selector)
2231
2232     def _calc_headers(self, info_dict):
2233         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2234
2235         cookies = self._calc_cookies(info_dict['url'])
2236         if cookies:
2237             res['Cookie'] = cookies
2238
2239         if 'X-Forwarded-For' not in res:
2240             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2241             if x_forwarded_for_ip:
2242                 res['X-Forwarded-For'] = x_forwarded_for_ip
2243
2244         return res
2245
2246     def _calc_cookies(self, url):
2247         pr = sanitized_Request(url)
2248         self.cookiejar.add_cookie_header(pr)
2249         return pr.get_header('Cookie')
2250
2251     def _sort_thumbnails(self, thumbnails):
2252         thumbnails.sort(key=lambda t: (
2253             t.get('preference') if t.get('preference') is not None else -1,
2254             t.get('width') if t.get('width') is not None else -1,
2255             t.get('height') if t.get('height') is not None else -1,
2256             t.get('id') if t.get('id') is not None else '',
2257             t.get('url')))
2258
2259     def _sanitize_thumbnails(self, info_dict):
2260         thumbnails = info_dict.get('thumbnails')
2261         if thumbnails is None:
2262             thumbnail = info_dict.get('thumbnail')
2263             if thumbnail:
2264                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2265         if not thumbnails:
2266             return
2267
2268         def check_thumbnails(thumbnails):
2269             for t in thumbnails:
2270                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2271                 try:
2272                     self.urlopen(HEADRequest(t['url']))
2273                 except network_exceptions as err:
2274                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2275                     continue
2276                 yield t
2277
2278         self._sort_thumbnails(thumbnails)
2279         for i, t in enumerate(thumbnails):
2280             if t.get('id') is None:
2281                 t['id'] = '%d' % i
2282             if t.get('width') and t.get('height'):
2283                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2284             t['url'] = sanitize_url(t['url'])
2285
2286         if self.params.get('check_formats') is True:
2287             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2288         else:
2289             info_dict['thumbnails'] = thumbnails
2290
2291     def _fill_common_fields(self, info_dict, is_video=True):
2292         # TODO: move sanitization here
2293         if is_video:
2294             # playlists are allowed to lack "title"
2295             title = info_dict.get('title', NO_DEFAULT)
2296             if title is NO_DEFAULT:
2297                 raise ExtractorError('Missing "title" field in extractor result',
2298                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2299             info_dict['fulltitle'] = title
2300             if not title:
2301                 if title == '':
2302                     self.write_debug('Extractor gave empty title. Creating a generic title')
2303                 else:
2304                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2305                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2306
2307         if info_dict.get('duration') is not None:
2308             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2309
2310         for ts_key, date_key in (
2311                 ('timestamp', 'upload_date'),
2312                 ('release_timestamp', 'release_date'),
2313                 ('modified_timestamp', 'modified_date'),
2314         ):
2315             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2316                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2317                 # see http://bugs.python.org/issue1646728)
2318                 with contextlib.suppress(ValueError, OverflowError, OSError):
2319                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2320                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2321
2322         live_keys = ('is_live', 'was_live')
2323         live_status = info_dict.get('live_status')
2324         if live_status is None:
2325             for key in live_keys:
2326                 if info_dict.get(key) is False:
2327                     continue
2328                 if info_dict.get(key):
2329                     live_status = key
2330                 break
2331             if all(info_dict.get(key) is False for key in live_keys):
2332                 live_status = 'not_live'
2333         if live_status:
2334             info_dict['live_status'] = live_status
2335             for key in live_keys:
2336                 if info_dict.get(key) is None:
2337                     info_dict[key] = (live_status == key)
2338
2339         # Auto generate title fields corresponding to the *_number fields when missing
2340         # in order to always have clean titles. This is very common for TV series.
2341         for field in ('chapter', 'season', 'episode'):
2342             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2343                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2344
2345     def _raise_pending_errors(self, info):
2346         err = info.pop('__pending_error', None)
2347         if err:
2348             self.report_error(err, tb=False)
2349
2350     def process_video_result(self, info_dict, download=True):
2351         assert info_dict.get('_type', 'video') == 'video'
2352         self._num_videos += 1
2353
2354         if 'id' not in info_dict:
2355             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2356         elif not info_dict.get('id'):
2357             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2358
2359         def report_force_conversion(field, field_not, conversion):
2360             self.report_warning(
2361                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2362                 % (field, field_not, conversion))
2363
2364         def sanitize_string_field(info, string_field):
2365             field = info.get(string_field)
2366             if field is None or isinstance(field, str):
2367                 return
2368             report_force_conversion(string_field, 'a string', 'string')
2369             info[string_field] = str(field)
2370
2371         def sanitize_numeric_fields(info):
2372             for numeric_field in self._NUMERIC_FIELDS:
2373                 field = info.get(numeric_field)
2374                 if field is None or isinstance(field, (int, float)):
2375                     continue
2376                 report_force_conversion(numeric_field, 'numeric', 'int')
2377                 info[numeric_field] = int_or_none(field)
2378
2379         sanitize_string_field(info_dict, 'id')
2380         sanitize_numeric_fields(info_dict)
2381         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2382             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2383         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2384             self.report_warning('"duration" field is negative, there is an error in extractor')
2385
2386         chapters = info_dict.get('chapters') or []
2387         if chapters and chapters[0].get('start_time'):
2388             chapters.insert(0, {'start_time': 0})
2389
2390         dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2391         for idx, (prev, current, next_) in enumerate(zip(
2392                 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2393             if current.get('start_time') is None:
2394                 current['start_time'] = prev.get('end_time')
2395             if not current.get('end_time'):
2396                 current['end_time'] = next_.get('start_time')
2397             if not current.get('title'):
2398                 current['title'] = f'<Untitled Chapter {idx}>'
2399
2400         if 'playlist' not in info_dict:
2401             # It isn't part of a playlist
2402             info_dict['playlist'] = None
2403             info_dict['playlist_index'] = None
2404
2405         self._sanitize_thumbnails(info_dict)
2406
2407         thumbnail = info_dict.get('thumbnail')
2408         thumbnails = info_dict.get('thumbnails')
2409         if thumbnail:
2410             info_dict['thumbnail'] = sanitize_url(thumbnail)
2411         elif thumbnails:
2412             info_dict['thumbnail'] = thumbnails[-1]['url']
2413
2414         if info_dict.get('display_id') is None and 'id' in info_dict:
2415             info_dict['display_id'] = info_dict['id']
2416
2417         self._fill_common_fields(info_dict)
2418
2419         for cc_kind in ('subtitles', 'automatic_captions'):
2420             cc = info_dict.get(cc_kind)
2421             if cc:
2422                 for _, subtitle in cc.items():
2423                     for subtitle_format in subtitle:
2424                         if subtitle_format.get('url'):
2425                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2426                         if subtitle_format.get('ext') is None:
2427                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2428
2429         automatic_captions = info_dict.get('automatic_captions')
2430         subtitles = info_dict.get('subtitles')
2431
2432         info_dict['requested_subtitles'] = self.process_subtitles(
2433             info_dict['id'], subtitles, automatic_captions)
2434
2435         if info_dict.get('formats') is None:
2436             # There's only one format available
2437             formats = [info_dict]
2438         else:
2439             formats = info_dict['formats']
2440
2441         # or None ensures --clean-infojson removes it
2442         info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
2443         if not self.params.get('allow_unplayable_formats'):
2444             formats = [f for f in formats if not f.get('has_drm')]
2445             if info_dict['_has_drm'] and all(
2446                     f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2447                 self.report_warning(
2448                     'This video is DRM protected and only images are available for download. '
2449                     'Use --list-formats to see them')
2450
2451         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2452         if not get_from_start:
2453             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2454         if info_dict.get('is_live') and formats:
2455             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2456             if get_from_start and not formats:
2457                 self.raise_no_formats(info_dict, msg=(
2458                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2459                     'If you want to download from the current time, use --no-live-from-start'))
2460
2461         if not formats:
2462             self.raise_no_formats(info_dict)
2463
2464         def is_wellformed(f):
2465             url = f.get('url')
2466             if not url:
2467                 self.report_warning(
2468                     '"url" field is missing or empty - skipping format, '
2469                     'there is an error in extractor')
2470                 return False
2471             if isinstance(url, bytes):
2472                 sanitize_string_field(f, 'url')
2473             return True
2474
2475         # Filter out malformed formats for better extraction robustness
2476         formats = list(filter(is_wellformed, formats))
2477
2478         formats_dict = {}
2479
2480         # We check that all the formats have the format and format_id fields
2481         for i, format in enumerate(formats):
2482             sanitize_string_field(format, 'format_id')
2483             sanitize_numeric_fields(format)
2484             format['url'] = sanitize_url(format['url'])
2485             if not format.get('format_id'):
2486                 format['format_id'] = str(i)
2487             else:
2488                 # Sanitize format_id from characters used in format selector expression
2489                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2490             format_id = format['format_id']
2491             if format_id not in formats_dict:
2492                 formats_dict[format_id] = []
2493             formats_dict[format_id].append(format)
2494
2495         # Make sure all formats have unique format_id
2496         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2497         for format_id, ambiguous_formats in formats_dict.items():
2498             ambigious_id = len(ambiguous_formats) > 1
2499             for i, format in enumerate(ambiguous_formats):
2500                 if ambigious_id:
2501                     format['format_id'] = '%s-%d' % (format_id, i)
2502                 if format.get('ext') is None:
2503                     format['ext'] = determine_ext(format['url']).lower()
2504                 # Ensure there is no conflict between id and ext in format selection
2505                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2506                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2507                     format['format_id'] = 'f%s' % format['format_id']
2508
2509         for i, format in enumerate(formats):
2510             if format.get('format') is None:
2511                 format['format'] = '{id} - {res}{note}'.format(
2512                     id=format['format_id'],
2513                     res=self.format_resolution(format),
2514                     note=format_field(format, 'format_note', ' (%s)'),
2515                 )
2516             if format.get('protocol') is None:
2517                 format['protocol'] = determine_protocol(format)
2518             if format.get('resolution') is None:
2519                 format['resolution'] = self.format_resolution(format, default=None)
2520             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2521                 format['dynamic_range'] = 'SDR'
2522             if (info_dict.get('duration') and format.get('tbr')
2523                     and not format.get('filesize') and not format.get('filesize_approx')):
2524                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2525
2526             # Add HTTP headers, so that external programs can use them from the
2527             # json output
2528             full_format_info = info_dict.copy()
2529             full_format_info.update(format)
2530             format['http_headers'] = self._calc_headers(full_format_info)
2531         # Remove private housekeeping stuff
2532         if '__x_forwarded_for_ip' in info_dict:
2533             del info_dict['__x_forwarded_for_ip']
2534
2535         if self.params.get('check_formats') is True:
2536             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2537
2538         if not formats or formats[0] is not info_dict:
2539             # only set the 'formats' fields if the original info_dict list them
2540             # otherwise we end up with a circular reference, the first (and unique)
2541             # element in the 'formats' field in info_dict is info_dict itself,
2542             # which can't be exported to json
2543             info_dict['formats'] = formats
2544
2545         info_dict, _ = self.pre_process(info_dict)
2546
2547         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2548             return info_dict
2549
2550         self.post_extract(info_dict)
2551         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2552
2553         # The pre-processors may have modified the formats
2554         formats = info_dict.get('formats', [info_dict])
2555
2556         list_only = self.params.get('simulate') is None and (
2557             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2558         interactive_format_selection = not list_only and self.format_selector == '-'
2559         if self.params.get('list_thumbnails'):
2560             self.list_thumbnails(info_dict)
2561         if self.params.get('listsubtitles'):
2562             if 'automatic_captions' in info_dict:
2563                 self.list_subtitles(
2564                     info_dict['id'], automatic_captions, 'automatic captions')
2565             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2566         if self.params.get('listformats') or interactive_format_selection:
2567             self.list_formats(info_dict)
2568         if list_only:
2569             # Without this printing, -F --print-json will not work
2570             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2571             return info_dict
2572
2573         format_selector = self.format_selector
2574         if format_selector is None:
2575             req_format = self._default_format_spec(info_dict, download=download)
2576             self.write_debug('Default format spec: %s' % req_format)
2577             format_selector = self.build_format_selector(req_format)
2578
2579         while True:
2580             if interactive_format_selection:
2581                 req_format = input(
2582                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2583                 try:
2584                     format_selector = self.build_format_selector(req_format)
2585                 except SyntaxError as err:
2586                     self.report_error(err, tb=False, is_error=False)
2587                     continue
2588
2589             formats_to_download = list(format_selector({
2590                 'formats': formats,
2591                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2592                 'incomplete_formats': (
2593                     # All formats are video-only or
2594                     all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2595                     # all formats are audio-only
2596                     or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2597             }))
2598             if interactive_format_selection and not formats_to_download:
2599                 self.report_error('Requested format is not available', tb=False, is_error=False)
2600                 continue
2601             break
2602
2603         if not formats_to_download:
2604             if not self.params.get('ignore_no_formats_error'):
2605                 raise ExtractorError(
2606                     'Requested format is not available. Use --list-formats for a list of available formats',
2607                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2608             self.report_warning('Requested format is not available')
2609             # Process what we can, even without any available formats.
2610             formats_to_download = [{}]
2611
2612         requested_ranges = self.params.get('download_ranges')
2613         if requested_ranges:
2614             requested_ranges = tuple(requested_ranges(info_dict, self))
2615
2616         best_format, downloaded_formats = formats_to_download[-1], []
2617         if download:
2618             if best_format:
2619                 def to_screen(*msg):
2620                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2621
2622                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2623                           (f['format_id'] for f in formats_to_download))
2624                 if requested_ranges:
2625                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2626                               (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
2627             max_downloads_reached = False
2628
2629             for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
2630                 new_info = self._copy_infodict(info_dict)
2631                 new_info.update(fmt)
2632                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2633                 if chapter or offset:
2634                     new_info.update({
2635                         'section_start': offset + chapter.get('start_time', 0),
2636                         'section_end': offset + min(chapter.get('end_time', duration), duration),
2637                         'section_title': chapter.get('title'),
2638                         'section_number': chapter.get('index'),
2639                     })
2640                 downloaded_formats.append(new_info)
2641                 try:
2642                     self.process_info(new_info)
2643                 except MaxDownloadsReached:
2644                     max_downloads_reached = True
2645                 self._raise_pending_errors(new_info)
2646                 # Remove copied info
2647                 for key, val in tuple(new_info.items()):
2648                     if info_dict.get(key) == val:
2649                         new_info.pop(key)
2650                 if max_downloads_reached:
2651                     break
2652
2653             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2654             assert write_archive.issubset({True, False, 'ignore'})
2655             if True in write_archive and False not in write_archive:
2656                 self.record_download_archive(info_dict)
2657
2658             info_dict['requested_downloads'] = downloaded_formats
2659             info_dict = self.run_all_pps('after_video', info_dict)
2660             if max_downloads_reached:
2661                 raise MaxDownloadsReached()
2662
2663         # We update the info dict with the selected best quality format (backwards compatibility)
2664         info_dict.update(best_format)
2665         return info_dict
2666
2667     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2668         """Select the requested subtitles and their format"""
2669         available_subs, normal_sub_langs = {}, []
2670         if normal_subtitles and self.params.get('writesubtitles'):
2671             available_subs.update(normal_subtitles)
2672             normal_sub_langs = tuple(normal_subtitles.keys())
2673         if automatic_captions and self.params.get('writeautomaticsub'):
2674             for lang, cap_info in automatic_captions.items():
2675                 if lang not in available_subs:
2676                     available_subs[lang] = cap_info
2677
2678         if (not self.params.get('writesubtitles') and not
2679                 self.params.get('writeautomaticsub') or not
2680                 available_subs):
2681             return None
2682
2683         all_sub_langs = tuple(available_subs.keys())
2684         if self.params.get('allsubtitles', False):
2685             requested_langs = all_sub_langs
2686         elif self.params.get('subtitleslangs', False):
2687             # A list is used so that the order of languages will be the same as
2688             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2689             requested_langs = []
2690             for lang_re in self.params.get('subtitleslangs'):
2691                 discard = lang_re[0] == '-'
2692                 if discard:
2693                     lang_re = lang_re[1:]
2694                 if lang_re == 'all':
2695                     if discard:
2696                         requested_langs = []
2697                     else:
2698                         requested_langs.extend(all_sub_langs)
2699                     continue
2700                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2701                 if discard:
2702                     for lang in current_langs:
2703                         while lang in requested_langs:
2704                             requested_langs.remove(lang)
2705                 else:
2706                     requested_langs.extend(current_langs)
2707             requested_langs = orderedSet(requested_langs)
2708         elif normal_sub_langs:
2709             requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
2710         else:
2711             requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
2712         if requested_langs:
2713             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2714
2715         formats_query = self.params.get('subtitlesformat', 'best')
2716         formats_preference = formats_query.split('/') if formats_query else []
2717         subs = {}
2718         for lang in requested_langs:
2719             formats = available_subs.get(lang)
2720             if formats is None:
2721                 self.report_warning(f'{lang} subtitles not available for {video_id}')
2722                 continue
2723             for ext in formats_preference:
2724                 if ext == 'best':
2725                     f = formats[-1]
2726                     break
2727                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2728                 if matches:
2729                     f = matches[-1]
2730                     break
2731             else:
2732                 f = formats[-1]
2733                 self.report_warning(
2734                     'No subtitle format found matching "%s" for language %s, '
2735                     'using %s' % (formats_query, lang, f['ext']))
2736             subs[lang] = f
2737         return subs
2738
2739     def _forceprint(self, key, info_dict):
2740         if info_dict is None:
2741             return
2742         info_copy = info_dict.copy()
2743         info_copy['formats_table'] = self.render_formats_table(info_dict)
2744         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2745         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2746         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2747
2748         def format_tmpl(tmpl):
2749             mobj = re.match(r'\w+(=?)$', tmpl)
2750             if mobj and mobj.group(1):
2751                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2752             elif mobj:
2753                 return f'%({tmpl})s'
2754             return tmpl
2755
2756         for tmpl in self.params['forceprint'].get(key, []):
2757             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2758
2759         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2760             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2761             tmpl = format_tmpl(tmpl)
2762             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2763             if self._ensure_dir_exists(filename):
2764                 with open(filename, 'a', encoding='utf-8') as f:
2765                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2766
2767     def __forced_printings(self, info_dict, filename, incomplete):
2768         def print_mandatory(field, actual_field=None):
2769             if actual_field is None:
2770                 actual_field = field
2771             if (self.params.get('force%s' % field, False)
2772                     and (not incomplete or info_dict.get(actual_field) is not None)):
2773                 self.to_stdout(info_dict[actual_field])
2774
2775         def print_optional(field):
2776             if (self.params.get('force%s' % field, False)
2777                     and info_dict.get(field) is not None):
2778                 self.to_stdout(info_dict[field])
2779
2780         info_dict = info_dict.copy()
2781         if filename is not None:
2782             info_dict['filename'] = filename
2783         if info_dict.get('requested_formats') is not None:
2784             # For RTMP URLs, also include the playpath
2785             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2786         elif info_dict.get('url'):
2787             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2788
2789         if (self.params.get('forcejson')
2790                 or self.params['forceprint'].get('video')
2791                 or self.params['print_to_file'].get('video')):
2792             self.post_extract(info_dict)
2793         self._forceprint('video', info_dict)
2794
2795         print_mandatory('title')
2796         print_mandatory('id')
2797         print_mandatory('url', 'urls')
2798         print_optional('thumbnail')
2799         print_optional('description')
2800         print_optional('filename')
2801         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2802             self.to_stdout(formatSeconds(info_dict['duration']))
2803         print_mandatory('format')
2804
2805         if self.params.get('forcejson'):
2806             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2807
2808     def dl(self, name, info, subtitle=False, test=False):
2809         if not info.get('url'):
2810             self.raise_no_formats(info, True)
2811
2812         if test:
2813             verbose = self.params.get('verbose')
2814             params = {
2815                 'test': True,
2816                 'quiet': self.params.get('quiet') or not verbose,
2817                 'verbose': verbose,
2818                 'noprogress': not verbose,
2819                 'nopart': True,
2820                 'skip_unavailable_fragments': False,
2821                 'keep_fragments': False,
2822                 'overwrites': True,
2823                 '_no_ytdl_file': True,
2824             }
2825         else:
2826             params = self.params
2827         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2828         if not test:
2829             for ph in self._progress_hooks:
2830                 fd.add_progress_hook(ph)
2831             urls = '", "'.join(
2832                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2833                 for f in info.get('requested_formats', []) or [info])
2834             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
2835
2836         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2837         # But it may contain objects that are not deep-copyable
2838         new_info = self._copy_infodict(info)
2839         if new_info.get('http_headers') is None:
2840             new_info['http_headers'] = self._calc_headers(new_info)
2841         return fd.download(name, new_info, subtitle)
2842
2843     def existing_file(self, filepaths, *, default_overwrite=True):
2844         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2845         if existing_files and not self.params.get('overwrites', default_overwrite):
2846             return existing_files[0]
2847
2848         for file in existing_files:
2849             self.report_file_delete(file)
2850             os.remove(file)
2851         return None
2852
2853     def process_info(self, info_dict):
2854         """Process a single resolved IE result. (Modifies it in-place)"""
2855
2856         assert info_dict.get('_type', 'video') == 'video'
2857         original_infodict = info_dict
2858
2859         if 'format' not in info_dict and 'ext' in info_dict:
2860             info_dict['format'] = info_dict['ext']
2861
2862         # This is mostly just for backward compatibility of process_info
2863         # As a side-effect, this allows for format-specific filters
2864         if self._match_entry(info_dict) is not None:
2865             info_dict['__write_download_archive'] = 'ignore'
2866             return
2867
2868         # Does nothing under normal operation - for backward compatibility of process_info
2869         self.post_extract(info_dict)
2870         self._num_downloads += 1
2871
2872         # info_dict['_filename'] needs to be set for backward compatibility
2873         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2874         temp_filename = self.prepare_filename(info_dict, 'temp')
2875         files_to_move = {}
2876
2877         # Forced printings
2878         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2879
2880         def check_max_downloads():
2881             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
2882                 raise MaxDownloadsReached()
2883
2884         if self.params.get('simulate'):
2885             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2886             check_max_downloads()
2887             return
2888
2889         if full_filename is None:
2890             return
2891         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2892             return
2893         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2894             return
2895
2896         if self._write_description('video', info_dict,
2897                                    self.prepare_filename(info_dict, 'description')) is None:
2898             return
2899
2900         sub_files = self._write_subtitles(info_dict, temp_filename)
2901         if sub_files is None:
2902             return
2903         files_to_move.update(dict(sub_files))
2904
2905         thumb_files = self._write_thumbnails(
2906             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2907         if thumb_files is None:
2908             return
2909         files_to_move.update(dict(thumb_files))
2910
2911         infofn = self.prepare_filename(info_dict, 'infojson')
2912         _infojson_written = self._write_info_json('video', info_dict, infofn)
2913         if _infojson_written:
2914             info_dict['infojson_filename'] = infofn
2915             # For backward compatibility, even though it was a private field
2916             info_dict['__infojson_filename'] = infofn
2917         elif _infojson_written is None:
2918             return
2919
2920         # Note: Annotations are deprecated
2921         annofn = None
2922         if self.params.get('writeannotations', False):
2923             annofn = self.prepare_filename(info_dict, 'annotation')
2924         if annofn:
2925             if not self._ensure_dir_exists(encodeFilename(annofn)):
2926                 return
2927             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2928                 self.to_screen('[info] Video annotations are already present')
2929             elif not info_dict.get('annotations'):
2930                 self.report_warning('There are no annotations to write.')
2931             else:
2932                 try:
2933                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2934                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2935                         annofile.write(info_dict['annotations'])
2936                 except (KeyError, TypeError):
2937                     self.report_warning('There are no annotations to write.')
2938                 except OSError:
2939                     self.report_error('Cannot write annotations file: ' + annofn)
2940                     return
2941
2942         # Write internet shortcut files
2943         def _write_link_file(link_type):
2944             url = try_get(info_dict['webpage_url'], iri_to_uri)
2945             if not url:
2946                 self.report_warning(
2947                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2948                 return True
2949             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2950             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2951                 return False
2952             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2953                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2954                 return True
2955             try:
2956                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2957                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2958                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2959                     template_vars = {'url': url}
2960                     if link_type == 'desktop':
2961                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2962                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2963             except OSError:
2964                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2965                 return False
2966             return True
2967
2968         write_links = {
2969             'url': self.params.get('writeurllink'),
2970             'webloc': self.params.get('writewebloclink'),
2971             'desktop': self.params.get('writedesktoplink'),
2972         }
2973         if self.params.get('writelink'):
2974             link_type = ('webloc' if sys.platform == 'darwin'
2975                          else 'desktop' if sys.platform.startswith('linux')
2976                          else 'url')
2977             write_links[link_type] = True
2978
2979         if any(should_write and not _write_link_file(link_type)
2980                for link_type, should_write in write_links.items()):
2981             return
2982
2983         def replace_info_dict(new_info):
2984             nonlocal info_dict
2985             if new_info == info_dict:
2986                 return
2987             info_dict.clear()
2988             info_dict.update(new_info)
2989
2990         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2991         replace_info_dict(new_info)
2992
2993         if self.params.get('skip_download'):
2994             info_dict['filepath'] = temp_filename
2995             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2996             info_dict['__files_to_move'] = files_to_move
2997             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2998             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2999         else:
3000             # Download
3001             info_dict.setdefault('__postprocessors', [])
3002             try:
3003
3004                 def existing_video_file(*filepaths):
3005                     ext = info_dict.get('ext')
3006                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3007                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3008                                               default_overwrite=False)
3009                     if file:
3010                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3011                     return file
3012
3013                 fd, success = None, True
3014                 if info_dict.get('protocol') or info_dict.get('url'):
3015                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3016                     if fd is not FFmpegFD and (
3017                             info_dict.get('section_start') or info_dict.get('section_end')):
3018                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3019                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
3020                         self.report_error(f'{msg}. Aborting')
3021                         return
3022
3023                 if info_dict.get('requested_formats') is not None:
3024
3025                     def compatible_formats(formats):
3026                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3027                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
3028                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
3029                         if len(video_formats) > 2 or len(audio_formats) > 2:
3030                             return False
3031
3032                         # Check extension
3033                         exts = {format.get('ext') for format in formats}
3034                         COMPATIBLE_EXTS = (
3035                             {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
3036                             {'webm'},
3037                         )
3038                         for ext_sets in COMPATIBLE_EXTS:
3039                             if ext_sets.issuperset(exts):
3040                                 return True
3041                         # TODO: Check acodec/vcodec
3042                         return False
3043
3044                     requested_formats = info_dict['requested_formats']
3045                     old_ext = info_dict['ext']
3046                     if self.params.get('merge_output_format') is None:
3047                         if not compatible_formats(requested_formats):
3048                             info_dict['ext'] = 'mkv'
3049                             self.report_warning(
3050                                 'Requested formats are incompatible for merge and will be merged into mkv')
3051                         if (info_dict['ext'] == 'webm'
3052                                 and info_dict.get('thumbnails')
3053                                 # check with type instead of pp_key, __name__, or isinstance
3054                                 # since we dont want any custom PPs to trigger this
3055                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3056                             info_dict['ext'] = 'mkv'
3057                             self.report_warning(
3058                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3059                     new_ext = info_dict['ext']
3060
3061                     def correct_ext(filename, ext=new_ext):
3062                         if filename == '-':
3063                             return filename
3064                         filename_real_ext = os.path.splitext(filename)[1][1:]
3065                         filename_wo_ext = (
3066                             os.path.splitext(filename)[0]
3067                             if filename_real_ext in (old_ext, new_ext)
3068                             else filename)
3069                         return f'{filename_wo_ext}.{ext}'
3070
3071                     # Ensure filename always has a correct extension for successful merge
3072                     full_filename = correct_ext(full_filename)
3073                     temp_filename = correct_ext(temp_filename)
3074                     dl_filename = existing_video_file(full_filename, temp_filename)
3075                     info_dict['__real_download'] = False
3076
3077                     merger = FFmpegMergerPP(self)
3078                     downloaded = []
3079                     if dl_filename is not None:
3080                         self.report_file_already_downloaded(dl_filename)
3081                     elif fd:
3082                         for f in requested_formats if fd != FFmpegFD else []:
3083                             f['filepath'] = fname = prepend_extension(
3084                                 correct_ext(temp_filename, info_dict['ext']),
3085                                 'f%s' % f['format_id'], info_dict['ext'])
3086                             downloaded.append(fname)
3087                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3088                         success, real_download = self.dl(temp_filename, info_dict)
3089                         info_dict['__real_download'] = real_download
3090                     else:
3091                         if self.params.get('allow_unplayable_formats'):
3092                             self.report_warning(
3093                                 'You have requested merging of multiple formats '
3094                                 'while also allowing unplayable formats to be downloaded. '
3095                                 'The formats won\'t be merged to prevent data corruption.')
3096                         elif not merger.available:
3097                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3098                             if not self.params.get('ignoreerrors'):
3099                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3100                                 return
3101                             self.report_warning(f'{msg}. The formats won\'t be merged')
3102
3103                         if temp_filename == '-':
3104                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3105                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3106                                       else 'but ffmpeg is not installed')
3107                             self.report_warning(
3108                                 f'You have requested downloading multiple formats to stdout {reason}. '
3109                                 'The formats will be streamed one after the other')
3110                             fname = temp_filename
3111                         for f in requested_formats:
3112                             new_info = dict(info_dict)
3113                             del new_info['requested_formats']
3114                             new_info.update(f)
3115                             if temp_filename != '-':
3116                                 fname = prepend_extension(
3117                                     correct_ext(temp_filename, new_info['ext']),
3118                                     'f%s' % f['format_id'], new_info['ext'])
3119                                 if not self._ensure_dir_exists(fname):
3120                                     return
3121                                 f['filepath'] = fname
3122                                 downloaded.append(fname)
3123                             partial_success, real_download = self.dl(fname, new_info)
3124                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3125                             success = success and partial_success
3126
3127                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3128                         info_dict['__postprocessors'].append(merger)
3129                         info_dict['__files_to_merge'] = downloaded
3130                         # Even if there were no downloads, it is being merged only now
3131                         info_dict['__real_download'] = True
3132                     else:
3133                         for file in downloaded:
3134                             files_to_move[file] = None
3135                 else:
3136                     # Just a single file
3137                     dl_filename = existing_video_file(full_filename, temp_filename)
3138                     if dl_filename is None or dl_filename == temp_filename:
3139                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3140                         # So we should try to resume the download
3141                         success, real_download = self.dl(temp_filename, info_dict)
3142                         info_dict['__real_download'] = real_download
3143                     else:
3144                         self.report_file_already_downloaded(dl_filename)
3145
3146                 dl_filename = dl_filename or temp_filename
3147                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3148
3149             except network_exceptions as err:
3150                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3151                 return
3152             except OSError as err:
3153                 raise UnavailableVideoError(err)
3154             except (ContentTooShortError, ) as err:
3155                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3156                 return
3157
3158             self._raise_pending_errors(info_dict)
3159             if success and full_filename != '-':
3160
3161                 def fixup():
3162                     do_fixup = True
3163                     fixup_policy = self.params.get('fixup')
3164                     vid = info_dict['id']
3165
3166                     if fixup_policy in ('ignore', 'never'):
3167                         return
3168                     elif fixup_policy == 'warn':
3169                         do_fixup = 'warn'
3170                     elif fixup_policy != 'force':
3171                         assert fixup_policy in ('detect_or_warn', None)
3172                         if not info_dict.get('__real_download'):
3173                             do_fixup = False
3174
3175                     def ffmpeg_fixup(cndn, msg, cls):
3176                         if not (do_fixup and cndn):
3177                             return
3178                         elif do_fixup == 'warn':
3179                             self.report_warning(f'{vid}: {msg}')
3180                             return
3181                         pp = cls(self)
3182                         if pp.available:
3183                             info_dict['__postprocessors'].append(pp)
3184                         else:
3185                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3186
3187                     stretched_ratio = info_dict.get('stretched_ratio')
3188                     ffmpeg_fixup(stretched_ratio not in (1, None),
3189                                  f'Non-uniform pixel ratio {stretched_ratio}',
3190                                  FFmpegFixupStretchedPP)
3191
3192                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3193                     downloader = downloader.FD_NAME if downloader else None
3194
3195                     ext = info_dict.get('ext')
3196                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3197                         isinstance(pp, FFmpegVideoConvertorPP)
3198                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3199                     ) for pp in self._pps['post_process'])
3200
3201                     if not postprocessed_by_ffmpeg:
3202                         ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3203                                      'writing DASH m4a. Only some players support this container',
3204                                      FFmpegFixupM4aPP)
3205                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3206                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3207                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3208                                      FFmpegFixupM3u8PP)
3209                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3210                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3211
3212                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3213                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3214
3215                 fixup()
3216                 try:
3217                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3218                 except PostProcessingError as err:
3219                     self.report_error('Postprocessing: %s' % str(err))
3220                     return
3221                 try:
3222                     for ph in self._post_hooks:
3223                         ph(info_dict['filepath'])
3224                 except Exception as err:
3225                     self.report_error('post hooks: %s' % str(err))
3226                     return
3227                 info_dict['__write_download_archive'] = True
3228
3229         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3230         if self.params.get('force_write_download_archive'):
3231             info_dict['__write_download_archive'] = True
3232         check_max_downloads()
3233
3234     def __download_wrapper(self, func):
3235         @functools.wraps(func)
3236         def wrapper(*args, **kwargs):
3237             try:
3238                 res = func(*args, **kwargs)
3239             except UnavailableVideoError as e:
3240                 self.report_error(e)
3241             except DownloadCancelled as e:
3242                 self.to_screen(f'[info] {e}')
3243                 if not self.params.get('break_per_url'):
3244                     raise
3245             else:
3246                 if self.params.get('dump_single_json', False):
3247                     self.post_extract(res)
3248                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3249         return wrapper
3250
3251     def download(self, url_list):
3252         """Download a given list of URLs."""
3253         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3254         outtmpl = self.params['outtmpl']['default']
3255         if (len(url_list) > 1
3256                 and outtmpl != '-'
3257                 and '%' not in outtmpl
3258                 and self.params.get('max_downloads') != 1):
3259             raise SameFileError(outtmpl)
3260
3261         for url in url_list:
3262             self.__download_wrapper(self.extract_info)(
3263                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3264
3265         return self._download_retcode
3266
3267     def download_with_info_file(self, info_filename):
3268         with contextlib.closing(fileinput.FileInput(
3269                 [info_filename], mode='r',
3270                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3271             # FileInput doesn't have a read method, we can't call json.load
3272             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3273         try:
3274             self.__download_wrapper(self.process_ie_result)(info, download=True)
3275         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3276             if not isinstance(e, EntryNotInPlaylist):
3277                 self.to_stderr('\r')
3278             webpage_url = info.get('webpage_url')
3279             if webpage_url is not None:
3280                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3281                 return self.download([webpage_url])
3282             else:
3283                 raise
3284         return self._download_retcode
3285
3286     @staticmethod
3287     def sanitize_info(info_dict, remove_private_keys=False):
3288         ''' Sanitize the infodict for converting to json '''
3289         if info_dict is None:
3290             return info_dict
3291         info_dict.setdefault('epoch', int(time.time()))
3292         info_dict.setdefault('_type', 'video')
3293
3294         if remove_private_keys:
3295             reject = lambda k, v: v is None or k.startswith('__') or k in {
3296                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3297                 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3298             }
3299         else:
3300             reject = lambda k, v: False
3301
3302         def filter_fn(obj):
3303             if isinstance(obj, dict):
3304                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3305             elif isinstance(obj, (list, tuple, set, LazyList)):
3306                 return list(map(filter_fn, obj))
3307             elif obj is None or isinstance(obj, (str, int, float, bool)):
3308                 return obj
3309             else:
3310                 return repr(obj)
3311
3312         return filter_fn(info_dict)
3313
3314     @staticmethod
3315     def filter_requested_info(info_dict, actually_filter=True):
3316         ''' Alias of sanitize_info for backward compatibility '''
3317         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3318
3319     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3320         for filename in set(filter(None, files_to_delete)):
3321             if msg:
3322                 self.to_screen(msg % filename)
3323             try:
3324                 os.remove(filename)
3325             except OSError:
3326                 self.report_warning(f'Unable to delete file {filename}')
3327             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3328                 del info['__files_to_move'][filename]
3329
3330     @staticmethod
3331     def post_extract(info_dict):
3332         def actual_post_extract(info_dict):
3333             if info_dict.get('_type') in ('playlist', 'multi_video'):
3334                 for video_dict in info_dict.get('entries', {}):
3335                     actual_post_extract(video_dict or {})
3336                 return
3337
3338             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3339             info_dict.update(post_extractor())
3340
3341         actual_post_extract(info_dict or {})
3342
3343     def run_pp(self, pp, infodict):
3344         files_to_delete = []
3345         if '__files_to_move' not in infodict:
3346             infodict['__files_to_move'] = {}
3347         try:
3348             files_to_delete, infodict = pp.run(infodict)
3349         except PostProcessingError as e:
3350             # Must be True and not 'only_download'
3351             if self.params.get('ignoreerrors') is True:
3352                 self.report_error(e)
3353                 return infodict
3354             raise
3355
3356         if not files_to_delete:
3357             return infodict
3358         if self.params.get('keepvideo', False):
3359             for f in files_to_delete:
3360                 infodict['__files_to_move'].setdefault(f, '')
3361         else:
3362             self._delete_downloaded_files(
3363                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3364         return infodict
3365
3366     def run_all_pps(self, key, info, *, additional_pps=None):
3367         self._forceprint(key, info)
3368         for pp in (additional_pps or []) + self._pps[key]:
3369             info = self.run_pp(pp, info)
3370         return info
3371
3372     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3373         info = dict(ie_info)
3374         info['__files_to_move'] = files_to_move or {}
3375         try:
3376             info = self.run_all_pps(key, info)
3377         except PostProcessingError as err:
3378             msg = f'Preprocessing: {err}'
3379             info.setdefault('__pending_error', msg)
3380             self.report_error(msg, is_error=False)
3381         return info, info.pop('__files_to_move', None)
3382
3383     def post_process(self, filename, info, files_to_move=None):
3384         """Run all the postprocessors on the given file."""
3385         info['filepath'] = filename
3386         info['__files_to_move'] = files_to_move or {}
3387         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3388         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3389         del info['__files_to_move']
3390         return self.run_all_pps('after_move', info)
3391
3392     def _make_archive_id(self, info_dict):
3393         video_id = info_dict.get('id')
3394         if not video_id:
3395             return
3396         # Future-proof against any change in case
3397         # and backwards compatibility with prior versions
3398         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3399         if extractor is None:
3400             url = str_or_none(info_dict.get('url'))
3401             if not url:
3402                 return
3403             # Try to find matching extractor for the URL and take its ie_key
3404             for ie_key, ie in self._ies.items():
3405                 if ie.suitable(url):
3406                     extractor = ie_key
3407                     break
3408             else:
3409                 return
3410         return f'{extractor.lower()} {video_id}'
3411
3412     def in_download_archive(self, info_dict):
3413         fn = self.params.get('download_archive')
3414         if fn is None:
3415             return False
3416
3417         vid_id = self._make_archive_id(info_dict)
3418         if not vid_id:
3419             return False  # Incomplete video information
3420
3421         return vid_id in self.archive
3422
3423     def record_download_archive(self, info_dict):
3424         fn = self.params.get('download_archive')
3425         if fn is None:
3426             return
3427         vid_id = self._make_archive_id(info_dict)
3428         assert vid_id
3429         self.write_debug(f'Adding to archive: {vid_id}')
3430         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3431             archive_file.write(vid_id + '\n')
3432         self.archive.add(vid_id)
3433
3434     @staticmethod
3435     def format_resolution(format, default='unknown'):
3436         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3437             return 'audio only'
3438         if format.get('resolution') is not None:
3439             return format['resolution']
3440         if format.get('width') and format.get('height'):
3441             return '%dx%d' % (format['width'], format['height'])
3442         elif format.get('height'):
3443             return '%sp' % format['height']
3444         elif format.get('width'):
3445             return '%dx?' % format['width']
3446         return default
3447
3448     def _list_format_headers(self, *headers):
3449         if self.params.get('listformats_table', True) is not False:
3450             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3451         return headers
3452
3453     def _format_note(self, fdict):
3454         res = ''
3455         if fdict.get('ext') in ['f4f', 'f4m']:
3456             res += '(unsupported)'
3457         if fdict.get('language'):
3458             if res:
3459                 res += ' '
3460             res += '[%s]' % fdict['language']
3461         if fdict.get('format_note') is not None:
3462             if res:
3463                 res += ' '
3464             res += fdict['format_note']
3465         if fdict.get('tbr') is not None:
3466             if res:
3467                 res += ', '
3468             res += '%4dk' % fdict['tbr']
3469         if fdict.get('container') is not None:
3470             if res:
3471                 res += ', '
3472             res += '%s container' % fdict['container']
3473         if (fdict.get('vcodec') is not None
3474                 and fdict.get('vcodec') != 'none'):
3475             if res:
3476                 res += ', '
3477             res += fdict['vcodec']
3478             if fdict.get('vbr') is not None:
3479                 res += '@'
3480         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3481             res += 'video@'
3482         if fdict.get('vbr') is not None:
3483             res += '%4dk' % fdict['vbr']
3484         if fdict.get('fps') is not None:
3485             if res:
3486                 res += ', '
3487             res += '%sfps' % fdict['fps']
3488         if fdict.get('acodec') is not None:
3489             if res:
3490                 res += ', '
3491             if fdict['acodec'] == 'none':
3492                 res += 'video only'
3493             else:
3494                 res += '%-5s' % fdict['acodec']
3495         elif fdict.get('abr') is not None:
3496             if res:
3497                 res += ', '
3498             res += 'audio'
3499         if fdict.get('abr') is not None:
3500             res += '@%3dk' % fdict['abr']
3501         if fdict.get('asr') is not None:
3502             res += ' (%5dHz)' % fdict['asr']
3503         if fdict.get('filesize') is not None:
3504             if res:
3505                 res += ', '
3506             res += format_bytes(fdict['filesize'])
3507         elif fdict.get('filesize_approx') is not None:
3508             if res:
3509                 res += ', '
3510             res += '~' + format_bytes(fdict['filesize_approx'])
3511         return res
3512
3513     def render_formats_table(self, info_dict):
3514         if not info_dict.get('formats') and not info_dict.get('url'):
3515             return None
3516
3517         formats = info_dict.get('formats', [info_dict])
3518         if not self.params.get('listformats_table', True) is not False:
3519             table = [
3520                 [
3521                     format_field(f, 'format_id'),
3522                     format_field(f, 'ext'),
3523                     self.format_resolution(f),
3524                     self._format_note(f)
3525                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3526             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3527
3528         def simplified_codec(f, field):
3529             assert field in ('acodec', 'vcodec')
3530             codec = f.get(field, 'unknown')
3531             if codec != 'none':
3532                 return '.'.join(codec.split('.')[:4])
3533
3534             if field == 'vcodec' and f.get('acodec') == 'none':
3535                 return 'images'
3536             elif field == 'acodec' and f.get('vcodec') == 'none':
3537                 return ''
3538             return self._format_out('audio only' if field == 'vcodec' else 'video only',
3539                                     self.Styles.SUPPRESS)
3540
3541         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3542         table = [
3543             [
3544                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3545                 format_field(f, 'ext'),
3546                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3547                 format_field(f, 'fps', '\t%d', func=round),
3548                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3549                 delim,
3550                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3551                 format_field(f, 'tbr', '\t%dk', func=round),
3552                 shorten_protocol_name(f.get('protocol', '')),
3553                 delim,
3554                 simplified_codec(f, 'vcodec'),
3555                 format_field(f, 'vbr', '\t%dk', func=round),
3556                 simplified_codec(f, 'acodec'),
3557                 format_field(f, 'abr', '\t%dk', func=round),
3558                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3559                 join_nonempty(
3560                     self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3561                     format_field(f, 'language', '[%s]'),
3562                     join_nonempty(format_field(f, 'format_note'),
3563                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3564                                   delim=', '),
3565                     delim=' '),
3566             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3567         header_line = self._list_format_headers(
3568             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3569             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3570
3571         return render_table(
3572             header_line, table, hide_empty=True,
3573             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3574
3575     def render_thumbnails_table(self, info_dict):
3576         thumbnails = list(info_dict.get('thumbnails') or [])
3577         if not thumbnails:
3578             return None
3579         return render_table(
3580             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3581             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3582
3583     def render_subtitles_table(self, video_id, subtitles):
3584         def _row(lang, formats):
3585             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3586             if len(set(names)) == 1:
3587                 names = [] if names[0] == 'unknown' else names[:1]
3588             return [lang, ', '.join(names), ', '.join(exts)]
3589
3590         if not subtitles:
3591             return None
3592         return render_table(
3593             self._list_format_headers('Language', 'Name', 'Formats'),
3594             [_row(lang, formats) for lang, formats in subtitles.items()],
3595             hide_empty=True)
3596
3597     def __list_table(self, video_id, name, func, *args):
3598         table = func(*args)
3599         if not table:
3600             self.to_screen(f'{video_id} has no {name}')
3601             return
3602         self.to_screen(f'[info] Available {name} for {video_id}:')
3603         self.to_stdout(table)
3604
3605     def list_formats(self, info_dict):
3606         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3607
3608     def list_thumbnails(self, info_dict):
3609         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3610
3611     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3612         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3613
3614     def urlopen(self, req):
3615         """ Start an HTTP download """
3616         if isinstance(req, str):
3617             req = sanitized_Request(req)
3618         return self._opener.open(req, timeout=self._socket_timeout)
3619
3620     def print_debug_header(self):
3621         if not self.params.get('verbose'):
3622             return
3623
3624         # These imports can be slow. So import them only as needed
3625         from .extractor.extractors import _LAZY_LOADER
3626         from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3627
3628         def get_encoding(stream):
3629             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3630             if not supports_terminal_sequences(stream):
3631                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3632                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3633             return ret
3634
3635         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3636             locale.getpreferredencoding(),
3637             sys.getfilesystemencoding(),
3638             self.get_encoding(),
3639             ', '.join(
3640                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3641                 if stream is not None and key != 'console')
3642         )
3643
3644         logger = self.params.get('logger')
3645         if logger:
3646             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3647             write_debug(encoding_str)
3648         else:
3649             write_string(f'[debug] {encoding_str}\n', encoding=None)
3650             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3651
3652         source = detect_variant()
3653         write_debug(join_nonempty(
3654             'yt-dlp version', __version__,
3655             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3656             '' if source == 'unknown' else f'({source})',
3657             delim=' '))
3658         if not _LAZY_LOADER:
3659             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3660                 write_debug('Lazy loading extractors is forcibly disabled')
3661             else:
3662                 write_debug('Lazy loading extractors is disabled')
3663         if plugin_extractors or plugin_postprocessors:
3664             write_debug('Plugins: %s' % [
3665                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3666                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3667         if self.params['compat_opts']:
3668             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3669
3670         if source == 'source':
3671             try:
3672                 stdout, _, _ = Popen.run(
3673                     ['git', 'rev-parse', '--short', 'HEAD'],
3674                     text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
3675                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3676                 if re.fullmatch('[0-9a-f]+', stdout.strip()):
3677                     write_debug(f'Git HEAD: {stdout.strip()}')
3678             except Exception:
3679                 with contextlib.suppress(Exception):
3680                     sys.exc_clear()
3681
3682         write_debug(system_identifier())
3683
3684         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3685         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3686         if ffmpeg_features:
3687             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3688
3689         exe_versions['rtmpdump'] = rtmpdump_version()
3690         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3691         exe_str = ', '.join(
3692             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3693         ) or 'none'
3694         write_debug('exe versions: %s' % exe_str)
3695
3696         from .compat.compat_utils import get_package_info
3697         from .dependencies import available_dependencies
3698
3699         write_debug('Optional libraries: %s' % (', '.join(sorted({
3700             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3701         })) or 'none'))
3702
3703         self._setup_opener()
3704         proxy_map = {}
3705         for handler in self._opener.handlers:
3706             if hasattr(handler, 'proxies'):
3707                 proxy_map.update(handler.proxies)
3708         write_debug(f'Proxy map: {proxy_map}')
3709
3710         # Not implemented
3711         if False and self.params.get('call_home'):
3712             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3713             write_debug('Public IP address: %s' % ipaddr)
3714             latest_version = self.urlopen(
3715                 'https://yt-dl.org/latest/version').read().decode()
3716             if version_tuple(latest_version) > version_tuple(__version__):
3717                 self.report_warning(
3718                     'You are using an outdated version (newest version: %s)! '
3719                     'See https://yt-dl.org/update if you need help updating.' %
3720                     latest_version)
3721
3722     def _setup_opener(self):
3723         if hasattr(self, '_opener'):
3724             return
3725         timeout_val = self.params.get('socket_timeout')
3726         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3727
3728         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3729         opts_cookiefile = self.params.get('cookiefile')
3730         opts_proxy = self.params.get('proxy')
3731
3732         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3733
3734         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3735         if opts_proxy is not None:
3736             if opts_proxy == '':
3737                 proxies = {}
3738             else:
3739                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3740         else:
3741             proxies = urllib.request.getproxies()
3742             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3743             if 'http' in proxies and 'https' not in proxies:
3744                 proxies['https'] = proxies['http']
3745         proxy_handler = PerRequestProxyHandler(proxies)
3746
3747         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3748         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3749         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3750         redirect_handler = YoutubeDLRedirectHandler()
3751         data_handler = urllib.request.DataHandler()
3752
3753         # When passing our own FileHandler instance, build_opener won't add the
3754         # default FileHandler and allows us to disable the file protocol, which
3755         # can be used for malicious purposes (see
3756         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3757         file_handler = urllib.request.FileHandler()
3758
3759         def file_open(*args, **kwargs):
3760             raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3761         file_handler.file_open = file_open
3762
3763         opener = urllib.request.build_opener(
3764             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3765
3766         # Delete the default user-agent header, which would otherwise apply in
3767         # cases where our custom HTTP handler doesn't come into play
3768         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3769         opener.addheaders = []
3770         self._opener = opener
3771
3772     def encode(self, s):
3773         if isinstance(s, bytes):
3774             return s  # Already encoded
3775
3776         try:
3777             return s.encode(self.get_encoding())
3778         except UnicodeEncodeError as err:
3779             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3780             raise
3781
3782     def get_encoding(self):
3783         encoding = self.params.get('encoding')
3784         if encoding is None:
3785             encoding = preferredencoding()
3786         return encoding
3787
3788     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3789         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3790         if overwrite is None:
3791             overwrite = self.params.get('overwrites', True)
3792         if not self.params.get('writeinfojson'):
3793             return False
3794         elif not infofn:
3795             self.write_debug(f'Skipping writing {label} infojson')
3796             return False
3797         elif not self._ensure_dir_exists(infofn):
3798             return None
3799         elif not overwrite and os.path.exists(infofn):
3800             self.to_screen(f'[info] {label.title()} metadata is already present')
3801             return 'exists'
3802
3803         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3804         try:
3805             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3806             return True
3807         except OSError:
3808             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3809             return None
3810
3811     def _write_description(self, label, ie_result, descfn):
3812         ''' Write description and returns True = written, False = skip, None = error '''
3813         if not self.params.get('writedescription'):
3814             return False
3815         elif not descfn:
3816             self.write_debug(f'Skipping writing {label} description')
3817             return False
3818         elif not self._ensure_dir_exists(descfn):
3819             return None
3820         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3821             self.to_screen(f'[info] {label.title()} description is already present')
3822         elif ie_result.get('description') is None:
3823             self.report_warning(f'There\'s no {label} description to write')
3824             return False
3825         else:
3826             try:
3827                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3828                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3829                     descfile.write(ie_result['description'])
3830             except OSError:
3831                 self.report_error(f'Cannot write {label} description file {descfn}')
3832                 return None
3833         return True
3834
3835     def _write_subtitles(self, info_dict, filename):
3836         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3837         ret = []
3838         subtitles = info_dict.get('requested_subtitles')
3839         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3840             # subtitles download errors are already managed as troubles in relevant IE
3841             # that way it will silently go on when used with unsupporting IE
3842             return ret
3843
3844         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3845         if not sub_filename_base:
3846             self.to_screen('[info] Skipping writing video subtitles')
3847             return ret
3848         for sub_lang, sub_info in subtitles.items():
3849             sub_format = sub_info['ext']
3850             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3851             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3852             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3853             if existing_sub:
3854                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3855                 sub_info['filepath'] = existing_sub
3856                 ret.append((existing_sub, sub_filename_final))
3857                 continue
3858
3859             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3860             if sub_info.get('data') is not None:
3861                 try:
3862                     # Use newline='' to prevent conversion of newline characters
3863                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3864                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3865                         subfile.write(sub_info['data'])
3866                     sub_info['filepath'] = sub_filename
3867                     ret.append((sub_filename, sub_filename_final))
3868                     continue
3869                 except OSError:
3870                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3871                     return None
3872
3873             try:
3874                 sub_copy = sub_info.copy()
3875                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3876                 self.dl(sub_filename, sub_copy, subtitle=True)
3877                 sub_info['filepath'] = sub_filename
3878                 ret.append((sub_filename, sub_filename_final))
3879             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3880                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3881                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3882                     if not self.params.get('ignoreerrors'):
3883                         self.report_error(msg)
3884                     raise DownloadError(msg)
3885                 self.report_warning(msg)
3886         return ret
3887
3888     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3889         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3890         write_all = self.params.get('write_all_thumbnails', False)
3891         thumbnails, ret = [], []
3892         if write_all or self.params.get('writethumbnail', False):
3893             thumbnails = info_dict.get('thumbnails') or []
3894         multiple = write_all and len(thumbnails) > 1
3895
3896         if thumb_filename_base is None:
3897             thumb_filename_base = filename
3898         if thumbnails and not thumb_filename_base:
3899             self.write_debug(f'Skipping writing {label} thumbnail')
3900             return ret
3901
3902         for idx, t in list(enumerate(thumbnails))[::-1]:
3903             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3904             thumb_display_id = f'{label} thumbnail {t["id"]}'
3905             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3906             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3907
3908             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3909             if existing_thumb:
3910                 self.to_screen('[info] %s is already present' % (
3911                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3912                 t['filepath'] = existing_thumb
3913                 ret.append((existing_thumb, thumb_filename_final))
3914             else:
3915                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3916                 try:
3917                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3918                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3919                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3920                         shutil.copyfileobj(uf, thumbf)
3921                     ret.append((thumb_filename, thumb_filename_final))
3922                     t['filepath'] = thumb_filename
3923                 except network_exceptions as err:
3924                     thumbnails.pop(idx)
3925                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3926             if ret and not write_all:
3927                 break
3928         return ret