yt_dlp/YoutubeDL.py

   1 import collections
   2 import contextlib
   3 import datetime
   4 import errno
   5 import fileinput
   6 import functools
   7 import io
   8 import itertools
   9 import json
  10 import locale
  11 import operator
  12 import os
  13 import random
  14 import re
  15 import shutil
  16 import subprocess
  17 import sys
  18 import tempfile
  19 import time
  20 import tokenize
  21 import traceback
  22 import unicodedata
  23 import urllib.request
  24 from string import ascii_letters
  25
  26 from .cache import Cache
  27 from .compat import HAS_LEGACY as compat_has_legacy
  28 from .compat import compat_os_name, compat_shlex_quote
  29 from .cookies import load_cookies
  30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  31 from .downloader.rtmp import rtmpdump_version
  32 from .extractor import gen_extractor_classes, get_info_extractor
  33 from .extractor.openload import PhantomJSwrapper
  34 from .minicurses import format_text
  35 from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
  36 from .postprocessor import (
  37     EmbedThumbnailPP,
  38     FFmpegFixupDuplicateMoovPP,
  39     FFmpegFixupDurationPP,
  40     FFmpegFixupM3u8PP,
  41     FFmpegFixupM4aPP,
  42     FFmpegFixupStretchedPP,
  43     FFmpegFixupTimestampPP,
  44     FFmpegMergerPP,
  45     FFmpegPostProcessor,
  46     FFmpegVideoConvertorPP,
  47     MoveFilesAfterDownloadPP,
  48     get_postprocessor,
  49 )
  50 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
  51 from .update import detect_variant
  52 from .utils import (
  53     DEFAULT_OUTTMPL,
  54     IDENTITY,
  55     LINK_TEMPLATES,
  56     NO_DEFAULT,
  57     NUMBER_RE,
  58     OUTTMPL_TYPES,
  59     POSTPROCESS_WHEN,
  60     STR_FORMAT_RE_TMPL,
  61     STR_FORMAT_TYPES,
  62     ContentTooShortError,
  63     DateRange,
  64     DownloadCancelled,
  65     DownloadError,
  66     EntryNotInPlaylist,
  67     ExistingVideoReached,
  68     ExtractorError,
  69     GeoRestrictedError,
  70     HEADRequest,
  71     ISO3166Utils,
  72     LazyList,
  73     MaxDownloadsReached,
  74     Namespace,
  75     PagedList,
  76     PerRequestProxyHandler,
  77     PlaylistEntries,
  78     Popen,
  79     PostProcessingError,
  80     ReExtractInfo,
  81     RejectedVideoReached,
  82     SameFileError,
  83     UnavailableVideoError,
  84     YoutubeDLCookieProcessor,
  85     YoutubeDLHandler,
  86     YoutubeDLRedirectHandler,
  87     age_restricted,
  88     args_to_str,
  89     date_from_str,
  90     determine_ext,
  91     determine_protocol,
  92     encode_compat_str,
  93     encodeFilename,
  94     error_to_compat_str,
  95     escapeHTML,
  96     expand_path,
  97     filter_dict,
  98     float_or_none,
  99     format_bytes,
 100     format_decimal_suffix,
 101     format_field,
 102     formatSeconds,
 103     get_domain,
 104     int_or_none,
 105     iri_to_uri,
 106     join_nonempty,
 107     locked_file,
 108     make_dir,
 109     make_HTTPS_handler,
 110     merge_headers,
 111     network_exceptions,
 112     number_of_digits,
 113     orderedSet,
 114     parse_filesize,
 115     preferredencoding,
 116     prepend_extension,
 117     register_socks_protocols,
 118     remove_terminal_sequences,
 119     render_table,
 120     replace_extension,
 121     sanitize_filename,
 122     sanitize_path,
 123     sanitize_url,
 124     sanitized_Request,
 125     std_headers,
 126     str_or_none,
 127     strftime_or_none,
 128     subtitles_filename,
 129     supports_terminal_sequences,
 130     system_identifier,
 131     timetuple_from_msec,
 132     to_high_limit_path,
 133     traverse_obj,
 134     try_get,
 135     url_basename,
 136     variadic,
 137     version_tuple,
 138     windows_enable_vt_mode,
 139     write_json_file,
 140     write_string,
 141 )
 142 from .version import RELEASE_GIT_HEAD, __version__
 143
 144 if compat_os_name == 'nt':
 145     import ctypes
 146
 147
 148 class YoutubeDL:
 149     """YoutubeDL class.
 150
 151     YoutubeDL objects are the ones responsible of downloading the
 152     actual video file and writing it to disk if the user has requested
 153     it, among some other tasks. In most cases there should be one per
 154     program. As, given a video URL, the downloader doesn't know how to
 155     extract all the needed information, task that InfoExtractors do, it
 156     has to pass the URL to one of them.
 157
 158     For this, YoutubeDL objects have a method that allows
 159     InfoExtractors to be registered in a given order. When it is passed
 160     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 161     finds that reports being able to handle it. The InfoExtractor extracts
 162     all the information about the video or videos the URL refers to, and
 163     YoutubeDL process the extracted information, possibly using a File
 164     Downloader to download the video.
 165
 166     YoutubeDL objects accept a lot of parameters. In order not to saturate
 167     the object constructor with arguments, it receives a dictionary of
 168     options instead. These options are available through the params
 169     attribute for the InfoExtractors to use. The YoutubeDL also
 170     registers itself as the downloader in charge for the InfoExtractors
 171     that are added to it, so this is a "mutual registration".
 172
 173     Available options:
 174
 175     username:          Username for authentication purposes.
 176     password:          Password for authentication purposes.
 177     videopassword:     Password for accessing a video.
 178     ap_mso:            Adobe Pass multiple-system operator identifier.
 179     ap_username:       Multiple-system operator account username.
 180     ap_password:       Multiple-system operator account password.
 181     usenetrc:          Use netrc for authentication instead.
 182     verbose:           Print additional info to stdout.
 183     quiet:             Do not print messages to stdout.
 184     no_warnings:       Do not print out anything for warnings.
 185     forceprint:        A dict with keys WHEN mapped to a list of templates to
 186                        print to stdout. The allowed keys are video or any of the
 187                        items in utils.POSTPROCESS_WHEN.
 188                        For compatibility, a single list is also accepted
 189     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 190                        a list of tuples with (template, filename)
 191     forcejson:         Force printing info_dict as JSON.
 192     dump_single_json:  Force printing the info_dict of the whole playlist
 193                        (or video) as a single JSON line.
 194     force_write_download_archive: Force writing download archive regardless
 195                        of 'skip_download' or 'simulate'.
 196     simulate:          Do not download the video files. If unset (or None),
 197                        simulate only if listsubtitles, listformats or list_thumbnails is used
 198     format:            Video format code. see "FORMAT SELECTION" for more details.
 199                        You can also pass a function. The function takes 'ctx' as
 200                        argument and returns the formats to download.
 201                        See "build_format_selector" for an implementation
 202     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 203     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 204                        extracting metadata even if the video is not actually
 205                        available for download (experimental)
 206     format_sort:       A list of fields by which to sort the video formats.
 207                        See "Sorting Formats" for more details.
 208     format_sort_force: Force the given format_sort. see "Sorting Formats"
 209                        for more details.
 210     prefer_free_formats: Whether to prefer video formats with free containers
 211                        over non-free ones of same quality.
 212     allow_multiple_video_streams:   Allow multiple video streams to be merged
 213                        into a single file
 214     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 215                        into a single file
 216     check_formats      Whether to test if the formats are downloadable.
 217                        Can be True (check all), False (check none),
 218                        'selected' (check selected formats),
 219                        or None (check only if requested by extractor)
 220     paths:             Dictionary of output paths. The allowed keys are 'home'
 221                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 222     outtmpl:           Dictionary of templates for output names. Allowed keys
 223                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 224                        For compatibility with youtube-dl, a single string can also be used
 225     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 226     restrictfilenames: Do not allow "&" and spaces in file names
 227     trim_file_name:    Limit length of filename (extension excluded)
 228     windowsfilenames:  Force the filenames to be windows compatible
 229     ignoreerrors:      Do not stop on download/postprocessing errors.
 230                        Can be 'only_download' to ignore only download errors.
 231                        Default is 'only_download' for CLI, but False for API
 232     skip_playlist_after_errors: Number of allowed failures until the rest of
 233                        the playlist is skipped
 234     force_generic_extractor: Force downloader to use the generic extractor
 235     overwrites:        Overwrite all video and metadata files if True,
 236                        overwrite only non-video files if None
 237                        and don't overwrite any file if False
 238                        For compatibility with youtube-dl,
 239                        "nooverwrites" may also be used instead
 240     playlist_items:    Specific indices of playlist to download.
 241     playlistrandom:    Download playlist items in random order.
 242     lazy_playlist:     Process playlist entries as they are received.
 243     matchtitle:        Download only matching titles.
 244     rejecttitle:       Reject downloads for matching titles.
 245     logger:            Log messages to a logging.Logger instance.
 246     logtostderr:       Log messages to stderr instead of stdout.
 247     consoletitle:       Display progress in console window's titlebar.
 248     writedescription:  Write the video description to a .description file
 249     writeinfojson:     Write the video description to a .info.json file
 250     clean_infojson:    Remove private fields from the infojson
 251     getcomments:       Extract video comments. This will not be written to disk
 252                        unless writeinfojson is also given
 253     writeannotations:  Write the video annotations to a .annotations.xml file
 254     writethumbnail:    Write the thumbnail image to a file
 255     allow_playlist_files: Whether to write playlists' description, infojson etc
 256                        also to disk when using the 'write*' options
 257     write_all_thumbnails:  Write all thumbnail formats to files
 258     writelink:         Write an internet shortcut file, depending on the
 259                        current platform (.url/.webloc/.desktop)
 260     writeurllink:      Write a Windows internet shortcut file (.url)
 261     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 262     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 263     writesubtitles:    Write the video subtitles to a file
 264     writeautomaticsub: Write the automatically generated subtitles to a file
 265     listsubtitles:     Lists all available subtitles for the video
 266     subtitlesformat:   The format code for subtitles
 267     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 268                        The list may contain "all" to refer to all the available
 269                        subtitles. The language can be prefixed with a "-" to
 270                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 271     keepvideo:         Keep the video file after post-processing
 272     daterange:         A DateRange object, download only if the upload_date is in the range.
 273     skip_download:     Skip the actual download of the video file
 274     cachedir:          Location of the cache files in the filesystem.
 275                        False to disable filesystem cache.
 276     noplaylist:        Download single video instead of a playlist if in doubt.
 277     age_limit:         An integer representing the user's age in years.
 278                        Unsuitable videos for the given age are skipped.
 279     min_views:         An integer representing the minimum view count the video
 280                        must have in order to not be skipped.
 281                        Videos without view count information are always
 282                        downloaded. None for no limit.
 283     max_views:         An integer representing the maximum view count.
 284                        Videos that are more popular than that are not
 285                        downloaded.
 286                        Videos without view count information are always
 287                        downloaded. None for no limit.
 288     download_archive:  File name of a file where all downloads are recorded.
 289                        Videos already present in the file are not downloaded
 290                        again.
 291     break_on_existing: Stop the download process after attempting to download a
 292                        file that is in the archive.
 293     break_on_reject:   Stop the download process when encountering a video that
 294                        has been filtered out.
 295     break_per_url:     Whether break_on_reject and break_on_existing
 296                        should act on each input URL as opposed to for the entire queue
 297     cookiefile:        File name or text stream from where cookies should be read and dumped to
 298     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 299                        name/pathfrom where cookies are loaded, and the name of the
 300                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 301     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 302                        support RFC 5746 secure renegotiation
 303     nocheckcertificate:  Do not verify SSL certificates
 304     client_certificate:  Path to client certificate file in PEM format. May include the private key
 305     client_certificate_key:  Path to private key file for client certificate
 306     client_certificate_password:  Password for client certificate private key, if encrypted.
 307                         If not provided and the key is encrypted, yt-dlp will ask interactively
 308     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 309                        At the moment, this is only supported by YouTube.
 310     http_headers:      A dictionary of custom headers to be used for all requests
 311     proxy:             URL of the proxy server to use
 312     geo_verification_proxy:  URL of the proxy to use for IP address verification
 313                        on geo-restricted sites.
 314     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 315     bidi_workaround:   Work around buggy terminals without bidirectional text
 316                        support, using fridibi
 317     debug_printtraffic:Print out sent and received HTTP traffic
 318     default_search:    Prepend this string if an input url is not valid.
 319                        'auto' for elaborate guessing
 320     encoding:          Use this encoding instead of the system-specified.
 321     extract_flat:      Do not resolve URLs, return the immediate result.
 322                        Pass in 'in_playlist' to only show this behavior for
 323                        playlist items.
 324     wait_for_video:    If given, wait for scheduled streams to become available.
 325                        The value should be a tuple containing the range
 326                        (min_secs, max_secs) to wait between retries
 327     postprocessors:    A list of dictionaries, each with an entry
 328                        * key:  The name of the postprocessor. See
 329                                yt_dlp/postprocessor/__init__.py for a list.
 330                        * when: When to run the postprocessor. Allowed values are
 331                                the entries of utils.POSTPROCESS_WHEN
 332                                Assumed to be 'post_process' if not given
 333     progress_hooks:    A list of functions that get called on download
 334                        progress, with a dictionary with the entries
 335                        * status: One of "downloading", "error", or "finished".
 336                                  Check this first and ignore unknown values.
 337                        * info_dict: The extracted info_dict
 338
 339                        If status is one of "downloading", or "finished", the
 340                        following properties may also be present:
 341                        * filename: The final filename (always present)
 342                        * tmpfilename: The filename we're currently writing to
 343                        * downloaded_bytes: Bytes on disk
 344                        * total_bytes: Size of the whole file, None if unknown
 345                        * total_bytes_estimate: Guess of the eventual file size,
 346                                                None if unavailable.
 347                        * elapsed: The number of seconds since download started.
 348                        * eta: The estimated time in seconds, None if unknown
 349                        * speed: The download speed in bytes/second, None if
 350                                 unknown
 351                        * fragment_index: The counter of the currently
 352                                          downloaded video fragment.
 353                        * fragment_count: The number of fragments (= individual
 354                                          files that will be merged)
 355
 356                        Progress hooks are guaranteed to be called at least once
 357                        (with status "finished") if the download is successful.
 358     postprocessor_hooks:  A list of functions that get called on postprocessing
 359                        progress, with a dictionary with the entries
 360                        * status: One of "started", "processing", or "finished".
 361                                  Check this first and ignore unknown values.
 362                        * postprocessor: Name of the postprocessor
 363                        * info_dict: The extracted info_dict
 364
 365                        Progress hooks are guaranteed to be called at least twice
 366                        (with status "started" and "finished") if the processing is successful.
 367     merge_output_format: Extension to use when merging formats.
 368     final_ext:         Expected final extension; used to detect when the file was
 369                        already downloaded and converted
 370     fixup:             Automatically correct known faults of the file.
 371                        One of:
 372                        - "never": do nothing
 373                        - "warn": only emit a warning
 374                        - "detect_or_warn": check whether we can do anything
 375                                            about it, warn otherwise (default)
 376     source_address:    Client-side IP address to bind to.
 377     sleep_interval_requests: Number of seconds to sleep between requests
 378                        during extraction
 379     sleep_interval:    Number of seconds to sleep before each download when
 380                        used alone or a lower bound of a range for randomized
 381                        sleep before each download (minimum possible number
 382                        of seconds to sleep) when used along with
 383                        max_sleep_interval.
 384     max_sleep_interval:Upper bound of a range for randomized sleep before each
 385                        download (maximum possible number of seconds to sleep).
 386                        Must only be used along with sleep_interval.
 387                        Actual sleep time will be a random float from range
 388                        [sleep_interval; max_sleep_interval].
 389     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 390     listformats:       Print an overview of available video formats and exit.
 391     list_thumbnails:   Print a table of all thumbnails and exit.
 392     match_filter:      A function that gets called for every video with the signature
 393                        (info_dict, *, incomplete: bool) -> Optional[str]
 394                        For backward compatibility with youtube-dl, the signature
 395                        (info_dict) -> Optional[str] is also allowed.
 396                        - If it returns a message, the video is ignored.
 397                        - If it returns None, the video is downloaded.
 398                        - If it returns utils.NO_DEFAULT, the user is interactively
 399                          asked whether to download the video.
 400                        match_filter_func in utils.py is one example for this.
 401     no_color:          Do not emit color codes in output.
 402     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 403                        HTTP header
 404     geo_bypass_country:
 405                        Two-letter ISO 3166-2 country code that will be used for
 406                        explicit geographic restriction bypassing via faking
 407                        X-Forwarded-For HTTP header
 408     geo_bypass_ip_block:
 409                        IP range in CIDR notation that will be used similarly to
 410                        geo_bypass_country
 411     external_downloader: A dictionary of protocol keys and the executable of the
 412                        external downloader to use for it. The allowed protocols
 413                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 414                        Set the value to 'native' to use the native downloader
 415     compat_opts:       Compatibility options. See "Differences in default behavior".
 416                        The following options do not work when used through the API:
 417                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 418                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 419                        Refer __init__.py for their implementation
 420     progress_template: Dictionary of templates for progress outputs.
 421                        Allowed keys are 'download', 'postprocess',
 422                        'download-title' (console title) and 'postprocess-title'.
 423                        The template is mapped on a dictionary with keys 'progress' and 'info'
 424     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 425                        as argument and returns the time to sleep in seconds.
 426                        Allowed keys are 'http', 'fragment', 'file_access'
 427     download_ranges:   A function that gets called for every video with the signature
 428                        (info_dict, *, ydl) -> Iterable[Section].
 429                        Only the returned sections will be downloaded. Each Section contains:
 430                        * start_time: Start time of the section in seconds
 431                        * end_time: End time of the section in seconds
 432                        * title: Section title (Optional)
 433                        * index: Section number (Optional)
 434
 435     The following parameters are not used by YoutubeDL itself, they are used by
 436     the downloader (see yt_dlp/downloader/common.py):
 437     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 438     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 439     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 440     external_downloader_args, concurrent_fragment_downloads.
 441
 442     The following options are used by the post processors:
 443     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 444                        to the binary or its containing directory.
 445     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 446                        and a list of additional command-line arguments for the
 447                        postprocessor/executable. The dict can also have "PP+EXE" keys
 448                        which are used when the given exe is used by the given PP.
 449                        Use 'default' as the name for arguments to passed to all PP
 450                        For compatibility with youtube-dl, a single list of args
 451                        can also be used
 452
 453     The following options are used by the extractors:
 454     extractor_retries: Number of times to retry for known errors
 455     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 456     hls_split_discontinuity: Split HLS playlists to different formats at
 457                        discontinuities such as ad breaks (default: False)
 458     extractor_args:    A dictionary of arguments to be passed to the extractors.
 459                        See "EXTRACTOR ARGUMENTS" for details.
 460                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 461     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 462
 463     The following options are deprecated and may be removed in the future:
 464
 465     playliststart:     - Use playlist_items
 466                        Playlist item to start at.
 467     playlistend:       - Use playlist_items
 468                        Playlist item to end at.
 469     playlistreverse:   - Use playlist_items
 470                        Download playlist items in reverse order.
 471     forceurl:          - Use forceprint
 472                        Force printing final URL.
 473     forcetitle:        - Use forceprint
 474                        Force printing title.
 475     forceid:           - Use forceprint
 476                        Force printing ID.
 477     forcethumbnail:    - Use forceprint
 478                        Force printing thumbnail URL.
 479     forcedescription:  - Use forceprint
 480                        Force printing description.
 481     forcefilename:     - Use forceprint
 482                        Force printing final filename.
 483     forceduration:     - Use forceprint
 484                        Force printing duration.
 485     allsubtitles:      - Use subtitleslangs = ['all']
 486                        Downloads all the subtitles of the video
 487                        (requires writesubtitles or writeautomaticsub)
 488     include_ads:       - Doesn't work
 489                        Download ads as well
 490     call_home:         - Not implemented
 491                        Boolean, true iff we are allowed to contact the
 492                        yt-dlp servers for debugging.
 493     post_hooks:        - Register a custom postprocessor
 494                        A list of functions that get called as the final step
 495                        for each video file, after all postprocessors have been
 496                        called. The filename will be passed as the only argument.
 497     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 498                        Use the native HLS downloader instead of ffmpeg/avconv
 499                        if True, otherwise use ffmpeg/avconv if False, otherwise
 500                        use downloader suggested by extractor if None.
 501     prefer_ffmpeg:     - avconv support is deprecated
 502                        If False, use avconv instead of ffmpeg if both are available,
 503                        otherwise prefer ffmpeg.
 504     youtube_include_dash_manifest: - Use extractor_args
 505                        If True (default), DASH manifests and related
 506                        data will be downloaded and processed by extractor.
 507                        You can reduce network I/O by disabling it if you don't
 508                        care about DASH. (only for youtube)
 509     youtube_include_hls_manifest: - Use extractor_args
 510                        If True (default), HLS manifests and related
 511                        data will be downloaded and processed by extractor.
 512                        You can reduce network I/O by disabling it if you don't
 513                        care about HLS. (only for youtube)
 514     """
 515
 516     _NUMERIC_FIELDS = {
 517         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 518         'timestamp', 'release_timestamp',
 519         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 520         'average_rating', 'comment_count', 'age_limit',
 521         'start_time', 'end_time',
 522         'chapter_number', 'season_number', 'episode_number',
 523         'track_number', 'disc_number', 'release_year',
 524     }
 525
 526     _format_fields = {
 527         # NB: Keep in sync with the docstring of extractor/common.py
 528         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 529         'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
 530         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
 531         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 532         'preference', 'language', 'language_preference', 'quality', 'source_preference',
 533         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
 534         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 535     }
 536     _format_selection_exts = {
 537         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 538         'video': {'mp4', 'flv', 'webm', '3gp'},
 539         'storyboards': {'mhtml'},
 540     }
 541
 542     def __init__(self, params=None, auto_init=True):
 543         """Create a FileDownloader object with the given options.
 544         @param auto_init    Whether to load the default extractors and print header (if verbose).
 545                             Set to 'no_verbose_header' to not print the header
 546         """
 547         if params is None:
 548             params = {}
 549         self.params = params
 550         self._ies = {}
 551         self._ies_instances = {}
 552         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 553         self._printed_messages = set()
 554         self._first_webpage_request = True
 555         self._post_hooks = []
 556         self._progress_hooks = []
 557         self._postprocessor_hooks = []
 558         self._download_retcode = 0
 559         self._num_downloads = 0
 560         self._num_videos = 0
 561         self._playlist_level = 0
 562         self._playlist_urls = set()
 563         self.cache = Cache(self)
 564
 565         windows_enable_vt_mode()
 566         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 567         self._out_files = Namespace(
 568             out=stdout,
 569             error=sys.stderr,
 570             screen=sys.stderr if self.params.get('quiet') else stdout,
 571             console=None if compat_os_name == 'nt' else next(
 572                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 573         )
 574         self._allow_colors = Namespace(**{
 575             type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
 576             for type_, stream in self._out_files.items_ if type_ != 'console'
 577         })
 578
 579         MIN_SUPPORTED, MIN_RECOMMENDED = (3, 6), (3, 7)
 580         current_version = sys.version_info[:2]
 581         if current_version < MIN_RECOMMENDED:
 582             msg = ('Support for Python version %d.%d has been deprecated. '
 583                    'See  https://github.com/yt-dlp/yt-dlp/issues/3764  for more details. '
 584                    'You will recieve only one more update on this version')
 585             if current_version < MIN_SUPPORTED:
 586                 msg = 'Python version %d.%d is no longer supported'
 587             self.deprecation_warning(
 588                 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
 589
 590         if self.params.get('allow_unplayable_formats'):
 591             self.report_warning(
 592                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 593                 'This is a developer option intended for debugging. \n'
 594                 '         If you experience any issues while using this option, '
 595                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 596
 597         def check_deprecated(param, option, suggestion):
 598             if self.params.get(param) is not None:
 599                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 600                 return True
 601             return False
 602
 603         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 604             if self.params.get('geo_verification_proxy') is None:
 605                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 606
 607         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 608         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 609         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 610
 611         for msg in self.params.get('_warnings', []):
 612             self.report_warning(msg)
 613         for msg in self.params.get('_deprecation_warnings', []):
 614             self.deprecation_warning(msg)
 615
 616         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 617         if not compat_has_legacy:
 618             self.params['compat_opts'].add('no-compat-legacy')
 619         if 'list-formats' in self.params['compat_opts']:
 620             self.params['listformats_table'] = False
 621
 622         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 623             # nooverwrites was unnecessarily changed to overwrites
 624             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 625             # This ensures compatibility with both keys
 626             self.params['overwrites'] = not self.params['nooverwrites']
 627         elif self.params.get('overwrites') is None:
 628             self.params.pop('overwrites', None)
 629         else:
 630             self.params['nooverwrites'] = not self.params['overwrites']
 631
 632         self.params.setdefault('forceprint', {})
 633         self.params.setdefault('print_to_file', {})
 634
 635         # Compatibility with older syntax
 636         if not isinstance(params['forceprint'], dict):
 637             self.params['forceprint'] = {'video': params['forceprint']}
 638
 639         if self.params.get('bidi_workaround', False):
 640             try:
 641                 import pty
 642                 master, slave = pty.openpty()
 643                 width = shutil.get_terminal_size().columns
 644                 width_args = [] if width is None else ['-w', str(width)]
 645                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 646                 try:
 647                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 648                 except OSError:
 649                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 650                 self._output_channel = os.fdopen(master, 'rb')
 651             except OSError as ose:
 652                 if ose.errno == errno.ENOENT:
 653                     self.report_warning(
 654                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 655                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 656                 else:
 657                     raise
 658
 659         if auto_init:
 660             if auto_init != 'no_verbose_header':
 661                 self.print_debug_header()
 662             self.add_default_info_extractors()
 663
 664         if (sys.platform != 'win32'
 665                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 666                 and not self.params.get('restrictfilenames', False)):
 667             # Unicode filesystem API will throw errors (#1474, #13027)
 668             self.report_warning(
 669                 'Assuming --restrict-filenames since file system encoding '
 670                 'cannot encode all characters. '
 671                 'Set the LC_ALL environment variable to fix this.')
 672             self.params['restrictfilenames'] = True
 673
 674         self._parse_outtmpl()
 675
 676         # Creating format selector here allows us to catch syntax errors before the extraction
 677         self.format_selector = (
 678             self.params.get('format') if self.params.get('format') in (None, '-')
 679             else self.params['format'] if callable(self.params['format'])
 680             else self.build_format_selector(self.params['format']))
 681
 682         # Set http_headers defaults according to std_headers
 683         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 684
 685         hooks = {
 686             'post_hooks': self.add_post_hook,
 687             'progress_hooks': self.add_progress_hook,
 688             'postprocessor_hooks': self.add_postprocessor_hook,
 689         }
 690         for opt, fn in hooks.items():
 691             for ph in self.params.get(opt, []):
 692                 fn(ph)
 693
 694         for pp_def_raw in self.params.get('postprocessors', []):
 695             pp_def = dict(pp_def_raw)
 696             when = pp_def.pop('when', 'post_process')
 697             self.add_post_processor(
 698                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 699                 when=when)
 700
 701         self._setup_opener()
 702         register_socks_protocols()
 703
 704         def preload_download_archive(fn):
 705             """Preload the archive, if any is specified"""
 706             if fn is None:
 707                 return False
 708             self.write_debug(f'Loading archive file {fn!r}')
 709             try:
 710                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 711                     for line in archive_file:
 712                         self.archive.add(line.strip())
 713             except OSError as ioe:
 714                 if ioe.errno != errno.ENOENT:
 715                     raise
 716                 return False
 717             return True
 718
 719         self.archive = set()
 720         preload_download_archive(self.params.get('download_archive'))
 721
 722     def warn_if_short_id(self, argv):
 723         # short YouTube ID starting with dash?
 724         idxs = [
 725             i for i, a in enumerate(argv)
 726             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 727         if idxs:
 728             correct_argv = (
 729                 ['yt-dlp']
 730                 + [a for i, a in enumerate(argv) if i not in idxs]
 731                 + ['--'] + [argv[i] for i in idxs]
 732             )
 733             self.report_warning(
 734                 'Long argument string detected. '
 735                 'Use -- to separate parameters and URLs, like this:\n%s' %
 736                 args_to_str(correct_argv))
 737
 738     def add_info_extractor(self, ie):
 739         """Add an InfoExtractor object to the end of the list."""
 740         ie_key = ie.ie_key()
 741         self._ies[ie_key] = ie
 742         if not isinstance(ie, type):
 743             self._ies_instances[ie_key] = ie
 744             ie.set_downloader(self)
 745
 746     def _get_info_extractor_class(self, ie_key):
 747         ie = self._ies.get(ie_key)
 748         if ie is None:
 749             ie = get_info_extractor(ie_key)
 750             self.add_info_extractor(ie)
 751         return ie
 752
 753     def get_info_extractor(self, ie_key):
 754         """
 755         Get an instance of an IE with name ie_key, it will try to get one from
 756         the _ies list, if there's no instance it will create a new one and add
 757         it to the extractor list.
 758         """
 759         ie = self._ies_instances.get(ie_key)
 760         if ie is None:
 761             ie = get_info_extractor(ie_key)()
 762             self.add_info_extractor(ie)
 763         return ie
 764
 765     def add_default_info_extractors(self):
 766         """
 767         Add the InfoExtractors returned by gen_extractors to the end of the list
 768         """
 769         for ie in gen_extractor_classes():
 770             self.add_info_extractor(ie)
 771
 772     def add_post_processor(self, pp, when='post_process'):
 773         """Add a PostProcessor object to the end of the chain."""
 774         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 775         self._pps[when].append(pp)
 776         pp.set_downloader(self)
 777
 778     def add_post_hook(self, ph):
 779         """Add the post hook"""
 780         self._post_hooks.append(ph)
 781
 782     def add_progress_hook(self, ph):
 783         """Add the download progress hook"""
 784         self._progress_hooks.append(ph)
 785
 786     def add_postprocessor_hook(self, ph):
 787         """Add the postprocessing progress hook"""
 788         self._postprocessor_hooks.append(ph)
 789         for pps in self._pps.values():
 790             for pp in pps:
 791                 pp.add_progress_hook(ph)
 792
 793     def _bidi_workaround(self, message):
 794         if not hasattr(self, '_output_channel'):
 795             return message
 796
 797         assert hasattr(self, '_output_process')
 798         assert isinstance(message, str)
 799         line_count = message.count('\n') + 1
 800         self._output_process.stdin.write((message + '\n').encode())
 801         self._output_process.stdin.flush()
 802         res = ''.join(self._output_channel.readline().decode()
 803                       for _ in range(line_count))
 804         return res[:-len('\n')]
 805
 806     def _write_string(self, message, out=None, only_once=False):
 807         if only_once:
 808             if message in self._printed_messages:
 809                 return
 810             self._printed_messages.add(message)
 811         write_string(message, out=out, encoding=self.params.get('encoding'))
 812
 813     def to_stdout(self, message, skip_eol=False, quiet=None):
 814         """Print message to stdout"""
 815         if quiet is not None:
 816             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
 817         if skip_eol is not False:
 818             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
 819         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 820
 821     def to_screen(self, message, skip_eol=False, quiet=None):
 822         """Print message to screen if not in quiet mode"""
 823         if self.params.get('logger'):
 824             self.params['logger'].debug(message)
 825             return
 826         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 827             return
 828         self._write_string(
 829             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 830             self._out_files.screen)
 831
 832     def to_stderr(self, message, only_once=False):
 833         """Print message to stderr"""
 834         assert isinstance(message, str)
 835         if self.params.get('logger'):
 836             self.params['logger'].error(message)
 837         else:
 838             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 839
 840     def _send_console_code(self, code):
 841         if compat_os_name == 'nt' or not self._out_files.console:
 842             return
 843         self._write_string(code, self._out_files.console)
 844
 845     def to_console_title(self, message):
 846         if not self.params.get('consoletitle', False):
 847             return
 848         message = remove_terminal_sequences(message)
 849         if compat_os_name == 'nt':
 850             if ctypes.windll.kernel32.GetConsoleWindow():
 851                 # c_wchar_p() might not be necessary if `message` is
 852                 # already of type unicode()
 853                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 854         else:
 855             self._send_console_code(f'\033]0;{message}\007')
 856
 857     def save_console_title(self):
 858         if not self.params.get('consoletitle') or self.params.get('simulate'):
 859             return
 860         self._send_console_code('\033[22;0t')  # Save the title on stack
 861
 862     def restore_console_title(self):
 863         if not self.params.get('consoletitle') or self.params.get('simulate'):
 864             return
 865         self._send_console_code('\033[23;0t')  # Restore the title from stack
 866
 867     def __enter__(self):
 868         self.save_console_title()
 869         return self
 870
 871     def __exit__(self, *args):
 872         self.restore_console_title()
 873
 874         if self.params.get('cookiefile') is not None:
 875             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 876
 877     def trouble(self, message=None, tb=None, is_error=True):
 878         """Determine action to take when a download problem appears.
 879
 880         Depending on if the downloader has been configured to ignore
 881         download errors or not, this method may throw an exception or
 882         not when errors are found, after printing the message.
 883
 884         @param tb          If given, is additional traceback information
 885         @param is_error    Whether to raise error according to ignorerrors
 886         """
 887         if message is not None:
 888             self.to_stderr(message)
 889         if self.params.get('verbose'):
 890             if tb is None:
 891                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 892                     tb = ''
 893                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 894                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 895                     tb += encode_compat_str(traceback.format_exc())
 896                 else:
 897                     tb_data = traceback.format_list(traceback.extract_stack())
 898                     tb = ''.join(tb_data)
 899             if tb:
 900                 self.to_stderr(tb)
 901         if not is_error:
 902             return
 903         if not self.params.get('ignoreerrors'):
 904             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 905                 exc_info = sys.exc_info()[1].exc_info
 906             else:
 907                 exc_info = sys.exc_info()
 908             raise DownloadError(message, exc_info)
 909         self._download_retcode = 1
 910
 911     Styles = Namespace(
 912         HEADERS='yellow',
 913         EMPHASIS='light blue',
 914         FILENAME='green',
 915         ID='green',
 916         DELIM='blue',
 917         ERROR='red',
 918         WARNING='yellow',
 919         SUPPRESS='light black',
 920     )
 921
 922     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 923         text = str(text)
 924         if test_encoding:
 925             original_text = text
 926             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 927             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 928             text = text.encode(encoding, 'ignore').decode(encoding)
 929             if fallback is not None and text != original_text:
 930                 text = fallback
 931         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 932
 933     def _format_out(self, *args, **kwargs):
 934         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
 935
 936     def _format_screen(self, *args, **kwargs):
 937         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
 938
 939     def _format_err(self, *args, **kwargs):
 940         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
 941
 942     def report_warning(self, message, only_once=False):
 943         '''
 944         Print the message to stderr, it will be prefixed with 'WARNING:'
 945         If stderr is a tty file the 'WARNING:' will be colored
 946         '''
 947         if self.params.get('logger') is not None:
 948             self.params['logger'].warning(message)
 949         else:
 950             if self.params.get('no_warnings'):
 951                 return
 952             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 953
 954     def deprecation_warning(self, message):
 955         if self.params.get('logger') is not None:
 956             self.params['logger'].warning(f'DeprecationWarning: {message}')
 957         else:
 958             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 959
 960     def report_error(self, message, *args, **kwargs):
 961         '''
 962         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 963         in red if stderr is a tty file.
 964         '''
 965         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 966
 967     def write_debug(self, message, only_once=False):
 968         '''Log debug message or Print message to stderr'''
 969         if not self.params.get('verbose', False):
 970             return
 971         message = f'[debug] {message}'
 972         if self.params.get('logger'):
 973             self.params['logger'].debug(message)
 974         else:
 975             self.to_stderr(message, only_once)
 976
 977     def report_file_already_downloaded(self, file_name):
 978         """Report file has already been fully downloaded."""
 979         try:
 980             self.to_screen('[download] %s has already been downloaded' % file_name)
 981         except UnicodeEncodeError:
 982             self.to_screen('[download] The file has already been downloaded')
 983
 984     def report_file_delete(self, file_name):
 985         """Report that existing file will be deleted."""
 986         try:
 987             self.to_screen('Deleting existing file %s' % file_name)
 988         except UnicodeEncodeError:
 989             self.to_screen('Deleting existing file')
 990
 991     def raise_no_formats(self, info, forced=False, *, msg=None):
 992         has_drm = info.get('_has_drm')
 993         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
 994         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
 995         if forced or not ignored:
 996             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 997                                  expected=has_drm or ignored or expected)
 998         else:
 999             self.report_warning(msg)
1000
1001     def parse_outtmpl(self):
1002         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1003         self._parse_outtmpl()
1004         return self.params['outtmpl']
1005
1006     def _parse_outtmpl(self):
1007         sanitize = IDENTITY
1008         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1009             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1010
1011         outtmpl = self.params.setdefault('outtmpl', {})
1012         if not isinstance(outtmpl, dict):
1013             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1014         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1015
1016     def get_output_path(self, dir_type='', filename=None):
1017         paths = self.params.get('paths', {})
1018         assert isinstance(paths, dict)
1019         path = os.path.join(
1020             expand_path(paths.get('home', '').strip()),
1021             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1022             filename or '')
1023         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1024
1025     @staticmethod
1026     def _outtmpl_expandpath(outtmpl):
1027         # expand_path translates '%%' into '%' and '$$' into '$'
1028         # correspondingly that is not what we want since we need to keep
1029         # '%%' intact for template dict substitution step. Working around
1030         # with boundary-alike separator hack.
1031         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1032         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1033
1034         # outtmpl should be expand_path'ed before template dict substitution
1035         # because meta fields may contain env variables we don't want to
1036         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1037         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1038         return expand_path(outtmpl).replace(sep, '')
1039
1040     @staticmethod
1041     def escape_outtmpl(outtmpl):
1042         ''' Escape any remaining strings like %s, %abc% etc. '''
1043         return re.sub(
1044             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1045             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1046             outtmpl)
1047
1048     @classmethod
1049     def validate_outtmpl(cls, outtmpl):
1050         ''' @return None or Exception object '''
1051         outtmpl = re.sub(
1052             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1053             lambda mobj: f'{mobj.group(0)[:-1]}s',
1054             cls._outtmpl_expandpath(outtmpl))
1055         try:
1056             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1057             return None
1058         except ValueError as err:
1059             return err
1060
1061     @staticmethod
1062     def _copy_infodict(info_dict):
1063         info_dict = dict(info_dict)
1064         info_dict.pop('__postprocessors', None)
1065         info_dict.pop('__pending_error', None)
1066         return info_dict
1067
1068     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1069         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1070         @param sanitize    Whether to sanitize the output as a filename.
1071                            For backward compatibility, a function can also be passed
1072         """
1073
1074         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1075
1076         info_dict = self._copy_infodict(info_dict)
1077         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1078             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1079             if info_dict.get('duration', None) is not None
1080             else None)
1081         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1082         info_dict['video_autonumber'] = self._num_videos
1083         if info_dict.get('resolution') is None:
1084             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1085
1086         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1087         # of %(field)s to %(field)0Nd for backward compatibility
1088         field_size_compat_map = {
1089             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1090             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1091             'autonumber': self.params.get('autonumber_size') or 5,
1092         }
1093
1094         TMPL_DICT = {}
1095         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1096         MATH_FUNCTIONS = {
1097             '+': float.__add__,
1098             '-': float.__sub__,
1099         }
1100         # Field is of the form key1.key2...
1101         # where keys (except first) can be string, int or slice
1102         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1103         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1104         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1105         INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
1106             (?P<negate>-)?
1107             (?P<fields>{FIELD_RE})
1108             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1109             (?:>(?P<strf_format>.+?))?
1110             (?P<remaining>
1111                 (?P<alternate>(?<!\\),[^|&)]+)?
1112                 (?:&(?P<replacement>.*?))?
1113                 (?:\|(?P<default>.*?))?
1114             )$''')
1115
1116         def _traverse_infodict(k):
1117             k = k.split('.')
1118             if k[0] == '':
1119                 k.pop(0)
1120             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1121
1122         def get_value(mdict):
1123             # Object traversal
1124             value = _traverse_infodict(mdict['fields'])
1125             # Negative
1126             if mdict['negate']:
1127                 value = float_or_none(value)
1128                 if value is not None:
1129                     value *= -1
1130             # Do maths
1131             offset_key = mdict['maths']
1132             if offset_key:
1133                 value = float_or_none(value)
1134                 operator = None
1135                 while offset_key:
1136                     item = re.match(
1137                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1138                         offset_key).group(0)
1139                     offset_key = offset_key[len(item):]
1140                     if operator is None:
1141                         operator = MATH_FUNCTIONS[item]
1142                         continue
1143                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1144                     offset = float_or_none(item)
1145                     if offset is None:
1146                         offset = float_or_none(_traverse_infodict(item))
1147                     try:
1148                         value = operator(value, multiplier * offset)
1149                     except (TypeError, ZeroDivisionError):
1150                         return None
1151                     operator = None
1152             # Datetime formatting
1153             if mdict['strf_format']:
1154                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1155
1156             return value
1157
1158         na = self.params.get('outtmpl_na_placeholder', 'NA')
1159
1160         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1161             return sanitize_filename(str(value), restricted=restricted, is_id=(
1162                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1163                 if 'filename-sanitization' in self.params['compat_opts']
1164                 else NO_DEFAULT))
1165
1166         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1167         sanitize = bool(sanitize)
1168
1169         def _dumpjson_default(obj):
1170             if isinstance(obj, (set, LazyList)):
1171                 return list(obj)
1172             return repr(obj)
1173
1174         def create_key(outer_mobj):
1175             if not outer_mobj.group('has_key'):
1176                 return outer_mobj.group(0)
1177             key = outer_mobj.group('key')
1178             mobj = re.match(INTERNAL_FORMAT_RE, key)
1179             initial_field = mobj.group('fields') if mobj else ''
1180             value, replacement, default = None, None, na
1181             while mobj:
1182                 mobj = mobj.groupdict()
1183                 default = mobj['default'] if mobj['default'] is not None else default
1184                 value = get_value(mobj)
1185                 replacement = mobj['replacement']
1186                 if value is None and mobj['alternate']:
1187                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1188                 else:
1189                     break
1190
1191             fmt = outer_mobj.group('format')
1192             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1193                 fmt = f'0{field_size_compat_map[key]:d}d'
1194
1195             value = default if value is None else value if replacement is None else replacement
1196
1197             flags = outer_mobj.group('conversion') or ''
1198             str_fmt = f'{fmt[:-1]}s'
1199             if fmt[-1] == 'l':  # list
1200                 delim = '\n' if '#' in flags else ', '
1201                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1202             elif fmt[-1] == 'j':  # json
1203                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1204             elif fmt[-1] == 'h':  # html
1205                 value, fmt = escapeHTML(value), str_fmt
1206             elif fmt[-1] == 'q':  # quoted
1207                 value = map(str, variadic(value) if '#' in flags else [value])
1208                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1209             elif fmt[-1] == 'B':  # bytes
1210                 value = f'%{str_fmt}'.encode() % str(value).encode()
1211                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1212             elif fmt[-1] == 'U':  # unicode normalized
1213                 value, fmt = unicodedata.normalize(
1214                     # "+" = compatibility equivalence, "#" = NFD
1215                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1216                     value), str_fmt
1217             elif fmt[-1] == 'D':  # decimal suffix
1218                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1219                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1220                                               factor=1024 if '#' in flags else 1000)
1221             elif fmt[-1] == 'S':  # filename sanitization
1222                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1223             elif fmt[-1] == 'c':
1224                 if value:
1225                     value = str(value)[0]
1226                 else:
1227                     fmt = str_fmt
1228             elif fmt[-1] not in 'rs':  # numeric
1229                 value = float_or_none(value)
1230                 if value is None:
1231                     value, fmt = default, 's'
1232
1233             if sanitize:
1234                 if fmt[-1] == 'r':
1235                     # If value is an object, sanitize might convert it to a string
1236                     # So we convert it to repr first
1237                     value, fmt = repr(value), str_fmt
1238                 if fmt[-1] in 'csr':
1239                     value = sanitizer(initial_field, value)
1240
1241             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1242             TMPL_DICT[key] = value
1243             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1244
1245         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1246
1247     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1248         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1249         return self.escape_outtmpl(outtmpl) % info_dict
1250
1251     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1252         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1253         if outtmpl is None:
1254             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1255         try:
1256             outtmpl = self._outtmpl_expandpath(outtmpl)
1257             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1258             if not filename:
1259                 return None
1260
1261             if tmpl_type in ('', 'temp'):
1262                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1263                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1264                     filename = replace_extension(filename, ext, final_ext)
1265             elif tmpl_type:
1266                 force_ext = OUTTMPL_TYPES[tmpl_type]
1267                 if force_ext:
1268                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1269
1270             # https://github.com/blackjack4494/youtube-dlc/issues/85
1271             trim_file_name = self.params.get('trim_file_name', False)
1272             if trim_file_name:
1273                 no_ext, *ext = filename.rsplit('.', 2)
1274                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1275
1276             return filename
1277         except ValueError as err:
1278             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1279             return None
1280
1281     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1282         """Generate the output filename"""
1283         if outtmpl:
1284             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1285             dir_type = None
1286         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1287         if not filename and dir_type not in ('', 'temp'):
1288             return ''
1289
1290         if warn:
1291             if not self.params.get('paths'):
1292                 pass
1293             elif filename == '-':
1294                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1295             elif os.path.isabs(filename):
1296                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1297         if filename == '-' or not filename:
1298             return filename
1299
1300         return self.get_output_path(dir_type, filename)
1301
1302     def _match_entry(self, info_dict, incomplete=False, silent=False):
1303         """ Returns None if the file should be downloaded """
1304
1305         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1306
1307         def check_filter():
1308             if 'title' in info_dict:
1309                 # This can happen when we're just evaluating the playlist
1310                 title = info_dict['title']
1311                 matchtitle = self.params.get('matchtitle', False)
1312                 if matchtitle:
1313                     if not re.search(matchtitle, title, re.IGNORECASE):
1314                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1315                 rejecttitle = self.params.get('rejecttitle', False)
1316                 if rejecttitle:
1317                     if re.search(rejecttitle, title, re.IGNORECASE):
1318                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1319             date = info_dict.get('upload_date')
1320             if date is not None:
1321                 dateRange = self.params.get('daterange', DateRange())
1322                 if date not in dateRange:
1323                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1324             view_count = info_dict.get('view_count')
1325             if view_count is not None:
1326                 min_views = self.params.get('min_views')
1327                 if min_views is not None and view_count < min_views:
1328                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1329                 max_views = self.params.get('max_views')
1330                 if max_views is not None and view_count > max_views:
1331                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1332             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1333                 return 'Skipping "%s" because it is age restricted' % video_title
1334
1335             match_filter = self.params.get('match_filter')
1336             if match_filter is not None:
1337                 try:
1338                     ret = match_filter(info_dict, incomplete=incomplete)
1339                 except TypeError:
1340                     # For backward compatibility
1341                     ret = None if incomplete else match_filter(info_dict)
1342                 if ret is NO_DEFAULT:
1343                     while True:
1344                         filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1345                         reply = input(self._format_screen(
1346                             f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1347                         if reply in {'y', ''}:
1348                             return None
1349                         elif reply == 'n':
1350                             return f'Skipping {video_title}'
1351                 elif ret is not None:
1352                     return ret
1353             return None
1354
1355         if self.in_download_archive(info_dict):
1356             reason = '%s has already been recorded in the archive' % video_title
1357             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1358         else:
1359             reason = check_filter()
1360             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1361         if reason is not None:
1362             if not silent:
1363                 self.to_screen('[download] ' + reason)
1364             if self.params.get(break_opt, False):
1365                 raise break_err()
1366         return reason
1367
1368     @staticmethod
1369     def add_extra_info(info_dict, extra_info):
1370         '''Set the keys from extra_info in info dict if they are missing'''
1371         for key, value in extra_info.items():
1372             info_dict.setdefault(key, value)
1373
1374     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1375                      process=True, force_generic_extractor=False):
1376         """
1377         Return a list with a dictionary for each video extracted.
1378
1379         Arguments:
1380         url -- URL to extract
1381
1382         Keyword arguments:
1383         download -- whether to download videos during extraction
1384         ie_key -- extractor key hint
1385         extra_info -- dictionary containing the extra values to add to each result
1386         process -- whether to resolve all unresolved references (URLs, playlist items),
1387             must be True for download to work.
1388         force_generic_extractor -- force using the generic extractor
1389         """
1390
1391         if extra_info is None:
1392             extra_info = {}
1393
1394         if not ie_key and force_generic_extractor:
1395             ie_key = 'Generic'
1396
1397         if ie_key:
1398             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1399         else:
1400             ies = self._ies
1401
1402         for ie_key, ie in ies.items():
1403             if not ie.suitable(url):
1404                 continue
1405
1406             if not ie.working():
1407                 self.report_warning('The program functionality for this site has been marked as broken, '
1408                                     'and will probably not work.')
1409
1410             temp_id = ie.get_temp_id(url)
1411             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1412                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1413                 if self.params.get('break_on_existing', False):
1414                     raise ExistingVideoReached()
1415                 break
1416             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1417         else:
1418             self.report_error('no suitable InfoExtractor for URL %s' % url)
1419
1420     def _handle_extraction_exceptions(func):
1421         @functools.wraps(func)
1422         def wrapper(self, *args, **kwargs):
1423             while True:
1424                 try:
1425                     return func(self, *args, **kwargs)
1426                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1427                     raise
1428                 except ReExtractInfo as e:
1429                     if e.expected:
1430                         self.to_screen(f'{e}; Re-extracting data')
1431                     else:
1432                         self.to_stderr('\r')
1433                         self.report_warning(f'{e}; Re-extracting data')
1434                     continue
1435                 except GeoRestrictedError as e:
1436                     msg = e.msg
1437                     if e.countries:
1438                         msg += '\nThis video is available in %s.' % ', '.join(
1439                             map(ISO3166Utils.short2full, e.countries))
1440                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1441                     self.report_error(msg)
1442                 except ExtractorError as e:  # An error we somewhat expected
1443                     self.report_error(str(e), e.format_traceback())
1444                 except Exception as e:
1445                     if self.params.get('ignoreerrors'):
1446                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1447                     else:
1448                         raise
1449                 break
1450         return wrapper
1451
1452     def _wait_for_video(self, ie_result):
1453         if (not self.params.get('wait_for_video')
1454                 or ie_result.get('_type', 'video') != 'video'
1455                 or ie_result.get('formats') or ie_result.get('url')):
1456             return
1457
1458         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1459         last_msg = ''
1460
1461         def progress(msg):
1462             nonlocal last_msg
1463             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1464             last_msg = msg
1465
1466         min_wait, max_wait = self.params.get('wait_for_video')
1467         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1468         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1469             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1470             self.report_warning('Release time of video is not known')
1471         elif (diff or 0) <= 0:
1472             self.report_warning('Video should already be available according to extracted info')
1473         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1474         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1475
1476         wait_till = time.time() + diff
1477         try:
1478             while True:
1479                 diff = wait_till - time.time()
1480                 if diff <= 0:
1481                     progress('')
1482                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1483                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1484                 time.sleep(1)
1485         except KeyboardInterrupt:
1486             progress('')
1487             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1488         except BaseException as e:
1489             if not isinstance(e, ReExtractInfo):
1490                 self.to_screen('')
1491             raise
1492
1493     @_handle_extraction_exceptions
1494     def __extract_info(self, url, ie, download, extra_info, process):
1495         ie_result = ie.extract(url)
1496         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1497             return
1498         if isinstance(ie_result, list):
1499             # Backwards compatibility: old IE result format
1500             ie_result = {
1501                 '_type': 'compat_list',
1502                 'entries': ie_result,
1503             }
1504         if extra_info.get('original_url'):
1505             ie_result.setdefault('original_url', extra_info['original_url'])
1506         self.add_default_extra_info(ie_result, ie, url)
1507         if process:
1508             self._wait_for_video(ie_result)
1509             return self.process_ie_result(ie_result, download, extra_info)
1510         else:
1511             return ie_result
1512
1513     def add_default_extra_info(self, ie_result, ie, url):
1514         if url is not None:
1515             self.add_extra_info(ie_result, {
1516                 'webpage_url': url,
1517                 'original_url': url,
1518             })
1519         webpage_url = ie_result.get('webpage_url')
1520         if webpage_url:
1521             self.add_extra_info(ie_result, {
1522                 'webpage_url_basename': url_basename(webpage_url),
1523                 'webpage_url_domain': get_domain(webpage_url),
1524             })
1525         if ie is not None:
1526             self.add_extra_info(ie_result, {
1527                 'extractor': ie.IE_NAME,
1528                 'extractor_key': ie.ie_key(),
1529             })
1530
1531     def process_ie_result(self, ie_result, download=True, extra_info=None):
1532         """
1533         Take the result of the ie(may be modified) and resolve all unresolved
1534         references (URLs, playlist items).
1535
1536         It will also download the videos if 'download'.
1537         Returns the resolved ie_result.
1538         """
1539         if extra_info is None:
1540             extra_info = {}
1541         result_type = ie_result.get('_type', 'video')
1542
1543         if result_type in ('url', 'url_transparent'):
1544             ie_result['url'] = sanitize_url(ie_result['url'])
1545             if ie_result.get('original_url'):
1546                 extra_info.setdefault('original_url', ie_result['original_url'])
1547
1548             extract_flat = self.params.get('extract_flat', False)
1549             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1550                     or extract_flat is True):
1551                 info_copy = ie_result.copy()
1552                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1553                 if ie and not ie_result.get('id'):
1554                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1555                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1556                 self.add_extra_info(info_copy, extra_info)
1557                 info_copy, _ = self.pre_process(info_copy)
1558                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1559                 self._raise_pending_errors(info_copy)
1560                 if self.params.get('force_write_download_archive', False):
1561                     self.record_download_archive(info_copy)
1562                 return ie_result
1563
1564         if result_type == 'video':
1565             self.add_extra_info(ie_result, extra_info)
1566             ie_result = self.process_video_result(ie_result, download=download)
1567             self._raise_pending_errors(ie_result)
1568             additional_urls = (ie_result or {}).get('additional_urls')
1569             if additional_urls:
1570                 # TODO: Improve MetadataParserPP to allow setting a list
1571                 if isinstance(additional_urls, str):
1572                     additional_urls = [additional_urls]
1573                 self.to_screen(
1574                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1575                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1576                 ie_result['additional_entries'] = [
1577                     self.extract_info(
1578                         url, download, extra_info=extra_info,
1579                         force_generic_extractor=self.params.get('force_generic_extractor'))
1580                     for url in additional_urls
1581                 ]
1582             return ie_result
1583         elif result_type == 'url':
1584             # We have to add extra_info to the results because it may be
1585             # contained in a playlist
1586             return self.extract_info(
1587                 ie_result['url'], download,
1588                 ie_key=ie_result.get('ie_key'),
1589                 extra_info=extra_info)
1590         elif result_type == 'url_transparent':
1591             # Use the information from the embedding page
1592             info = self.extract_info(
1593                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1594                 extra_info=extra_info, download=False, process=False)
1595
1596             # extract_info may return None when ignoreerrors is enabled and
1597             # extraction failed with an error, don't crash and return early
1598             # in this case
1599             if not info:
1600                 return info
1601
1602             exempted_fields = {'_type', 'url', 'ie_key'}
1603             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1604                 # For video clips, the id etc of the clip extractor should be used
1605                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1606
1607             new_result = info.copy()
1608             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1609
1610             # Extracted info may not be a video result (i.e.
1611             # info.get('_type', 'video') != video) but rather an url or
1612             # url_transparent. In such cases outer metadata (from ie_result)
1613             # should be propagated to inner one (info). For this to happen
1614             # _type of info should be overridden with url_transparent. This
1615             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1616             if new_result.get('_type') == 'url':
1617                 new_result['_type'] = 'url_transparent'
1618
1619             return self.process_ie_result(
1620                 new_result, download=download, extra_info=extra_info)
1621         elif result_type in ('playlist', 'multi_video'):
1622             # Protect from infinite recursion due to recursively nested playlists
1623             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1624             webpage_url = ie_result['webpage_url']
1625             if webpage_url in self._playlist_urls:
1626                 self.to_screen(
1627                     '[download] Skipping already downloaded playlist: %s'
1628                     % ie_result.get('title') or ie_result.get('id'))
1629                 return
1630
1631             self._playlist_level += 1
1632             self._playlist_urls.add(webpage_url)
1633             self._fill_common_fields(ie_result, False)
1634             self._sanitize_thumbnails(ie_result)
1635             try:
1636                 return self.__process_playlist(ie_result, download)
1637             finally:
1638                 self._playlist_level -= 1
1639                 if not self._playlist_level:
1640                     self._playlist_urls.clear()
1641         elif result_type == 'compat_list':
1642             self.report_warning(
1643                 'Extractor %s returned a compat_list result. '
1644                 'It needs to be updated.' % ie_result.get('extractor'))
1645
1646             def _fixup(r):
1647                 self.add_extra_info(r, {
1648                     'extractor': ie_result['extractor'],
1649                     'webpage_url': ie_result['webpage_url'],
1650                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1651                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1652                     'extractor_key': ie_result['extractor_key'],
1653                 })
1654                 return r
1655             ie_result['entries'] = [
1656                 self.process_ie_result(_fixup(r), download, extra_info)
1657                 for r in ie_result['entries']
1658             ]
1659             return ie_result
1660         else:
1661             raise Exception('Invalid result type: %s' % result_type)
1662
1663     def _ensure_dir_exists(self, path):
1664         return make_dir(path, self.report_error)
1665
1666     @staticmethod
1667     def _playlist_infodict(ie_result, **kwargs):
1668         return {
1669             **ie_result,
1670             'playlist': ie_result.get('title') or ie_result.get('id'),
1671             'playlist_id': ie_result.get('id'),
1672             'playlist_title': ie_result.get('title'),
1673             'playlist_uploader': ie_result.get('uploader'),
1674             'playlist_uploader_id': ie_result.get('uploader_id'),
1675             'playlist_index': 0,
1676             **kwargs,
1677         }
1678
1679     def __process_playlist(self, ie_result, download):
1680         """Process each entry in the playlist"""
1681         title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
1682         self.to_screen(f'[download] Downloading playlist: {title}')
1683
1684         all_entries = PlaylistEntries(self, ie_result)
1685         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1686
1687         lazy = self.params.get('lazy_playlist')
1688         if lazy:
1689             resolved_entries, n_entries = [], 'N/A'
1690             ie_result['requested_entries'], ie_result['entries'] = None, None
1691         else:
1692             entries = resolved_entries = list(entries)
1693             n_entries = len(resolved_entries)
1694             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1695         if not ie_result.get('playlist_count'):
1696             # Better to do this after potentially exhausting entries
1697             ie_result['playlist_count'] = all_entries.get_full_count()
1698
1699         _infojson_written = False
1700         write_playlist_files = self.params.get('allow_playlist_files', True)
1701         if write_playlist_files and self.params.get('list_thumbnails'):
1702             self.list_thumbnails(ie_result)
1703         if write_playlist_files and not self.params.get('simulate'):
1704             ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1705             _infojson_written = self._write_info_json(
1706                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1707             if _infojson_written is None:
1708                 return
1709             if self._write_description('playlist', ie_result,
1710                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1711                 return
1712             # TODO: This should be passed to ThumbnailsConvertor if necessary
1713             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1714
1715         if lazy:
1716             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1717                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1718         elif self.params.get('playlistreverse'):
1719             entries.reverse()
1720         elif self.params.get('playlistrandom'):
1721             random.shuffle(entries)
1722
1723         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
1724                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1725
1726         failures = 0
1727         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1728         for i, (playlist_index, entry) in enumerate(entries):
1729             if lazy:
1730                 resolved_entries.append((playlist_index, entry))
1731
1732             # TODO: Add auto-generated fields
1733             if not entry or self._match_entry(entry, incomplete=True) is not None:
1734                 continue
1735
1736             self.to_screen('[download] Downloading video %s of %s' % (
1737                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1738
1739             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1740             if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1741                 playlist_index = ie_result['requested_entries'][i]
1742
1743             entry_result = self.__process_iterable_entry(entry, download, {
1744                 'n_entries': int_or_none(n_entries),
1745                 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1746                 'playlist_count': ie_result.get('playlist_count'),
1747                 'playlist_index': playlist_index,
1748                 'playlist_autonumber': i + 1,
1749                 'playlist': title,
1750                 'playlist_id': ie_result.get('id'),
1751                 'playlist_title': ie_result.get('title'),
1752                 'playlist_uploader': ie_result.get('uploader'),
1753                 'playlist_uploader_id': ie_result.get('uploader_id'),
1754                 'extractor': ie_result['extractor'],
1755                 'webpage_url': ie_result['webpage_url'],
1756                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1757                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1758                 'extractor_key': ie_result['extractor_key'],
1759             })
1760             if not entry_result:
1761                 failures += 1
1762             if failures >= max_failures:
1763                 self.report_error(
1764                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1765                 break
1766             resolved_entries[i] = (playlist_index, entry_result)
1767
1768         # Update with processed data
1769         ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1770
1771         # Write the updated info to json
1772         if _infojson_written is True and self._write_info_json(
1773                 'updated playlist', ie_result,
1774                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1775             return
1776
1777         ie_result = self.run_all_pps('playlist', ie_result)
1778         self.to_screen(f'[download] Finished downloading playlist: {title}')
1779         return ie_result
1780
1781     @_handle_extraction_exceptions
1782     def __process_iterable_entry(self, entry, download, extra_info):
1783         return self.process_ie_result(
1784             entry, download=download, extra_info=extra_info)
1785
1786     def _build_format_filter(self, filter_spec):
1787         " Returns a function to filter the formats according to the filter_spec "
1788
1789         OPERATORS = {
1790             '<': operator.lt,
1791             '<=': operator.le,
1792             '>': operator.gt,
1793             '>=': operator.ge,
1794             '=': operator.eq,
1795             '!=': operator.ne,
1796         }
1797         operator_rex = re.compile(r'''(?x)\s*
1798             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1799             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1800             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1801             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1802         m = operator_rex.fullmatch(filter_spec)
1803         if m:
1804             try:
1805                 comparison_value = int(m.group('value'))
1806             except ValueError:
1807                 comparison_value = parse_filesize(m.group('value'))
1808                 if comparison_value is None:
1809                     comparison_value = parse_filesize(m.group('value') + 'B')
1810                 if comparison_value is None:
1811                     raise ValueError(
1812                         'Invalid value %r in format specification %r' % (
1813                             m.group('value'), filter_spec))
1814             op = OPERATORS[m.group('op')]
1815
1816         if not m:
1817             STR_OPERATORS = {
1818                 '=': operator.eq,
1819                 '^=': lambda attr, value: attr.startswith(value),
1820                 '$=': lambda attr, value: attr.endswith(value),
1821                 '*=': lambda attr, value: value in attr,
1822                 '~=': lambda attr, value: value.search(attr) is not None
1823             }
1824             str_operator_rex = re.compile(r'''(?x)\s*
1825                 (?P<key>[a-zA-Z0-9._-]+)\s*
1826                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1827                 (?P<quote>["'])?
1828                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1829                 (?(quote)(?P=quote))\s*
1830                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1831             m = str_operator_rex.fullmatch(filter_spec)
1832             if m:
1833                 if m.group('op') == '~=':
1834                     comparison_value = re.compile(m.group('value'))
1835                 else:
1836                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1837                 str_op = STR_OPERATORS[m.group('op')]
1838                 if m.group('negation'):
1839                     op = lambda attr, value: not str_op(attr, value)
1840                 else:
1841                     op = str_op
1842
1843         if not m:
1844             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1845
1846         def _filter(f):
1847             actual_value = f.get(m.group('key'))
1848             if actual_value is None:
1849                 return m.group('none_inclusive')
1850             return op(actual_value, comparison_value)
1851         return _filter
1852
1853     def _check_formats(self, formats):
1854         for f in formats:
1855             self.to_screen('[info] Testing format %s' % f['format_id'])
1856             path = self.get_output_path('temp')
1857             if not self._ensure_dir_exists(f'{path}/'):
1858                 continue
1859             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1860             temp_file.close()
1861             try:
1862                 success, _ = self.dl(temp_file.name, f, test=True)
1863             except (DownloadError, OSError, ValueError) + network_exceptions:
1864                 success = False
1865             finally:
1866                 if os.path.exists(temp_file.name):
1867                     try:
1868                         os.remove(temp_file.name)
1869                     except OSError:
1870                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1871             if success:
1872                 yield f
1873             else:
1874                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1875
1876     def _default_format_spec(self, info_dict, download=True):
1877
1878         def can_merge():
1879             merger = FFmpegMergerPP(self)
1880             return merger.available and merger.can_merge()
1881
1882         prefer_best = (
1883             not self.params.get('simulate')
1884             and download
1885             and (
1886                 not can_merge()
1887                 or info_dict.get('is_live') and not self.params.get('live_from_start')
1888                 or self.params['outtmpl']['default'] == '-'))
1889         compat = (
1890             prefer_best
1891             or self.params.get('allow_multiple_audio_streams', False)
1892             or 'format-spec' in self.params['compat_opts'])
1893
1894         return (
1895             'best/bestvideo+bestaudio' if prefer_best
1896             else 'bestvideo*+bestaudio/best' if not compat
1897             else 'bestvideo+bestaudio/best')
1898
1899     def build_format_selector(self, format_spec):
1900         def syntax_error(note, start):
1901             message = (
1902                 'Invalid format specification: '
1903                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
1904             return SyntaxError(message)
1905
1906         PICKFIRST = 'PICKFIRST'
1907         MERGE = 'MERGE'
1908         SINGLE = 'SINGLE'
1909         GROUP = 'GROUP'
1910         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1911
1912         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1913                                   'video': self.params.get('allow_multiple_video_streams', False)}
1914
1915         check_formats = self.params.get('check_formats') == 'selected'
1916
1917         def _parse_filter(tokens):
1918             filter_parts = []
1919             for type, string, start, _, _ in tokens:
1920                 if type == tokenize.OP and string == ']':
1921                     return ''.join(filter_parts)
1922                 else:
1923                     filter_parts.append(string)
1924
1925         def _remove_unused_ops(tokens):
1926             # Remove operators that we don't use and join them with the surrounding strings
1927             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1928             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1929             last_string, last_start, last_end, last_line = None, None, None, None
1930             for type, string, start, end, line in tokens:
1931                 if type == tokenize.OP and string == '[':
1932                     if last_string:
1933                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1934                         last_string = None
1935                     yield type, string, start, end, line
1936                     # everything inside brackets will be handled by _parse_filter
1937                     for type, string, start, end, line in tokens:
1938                         yield type, string, start, end, line
1939                         if type == tokenize.OP and string == ']':
1940                             break
1941                 elif type == tokenize.OP and string in ALLOWED_OPS:
1942                     if last_string:
1943                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1944                         last_string = None
1945                     yield type, string, start, end, line
1946                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1947                     if not last_string:
1948                         last_string = string
1949                         last_start = start
1950                         last_end = end
1951                     else:
1952                         last_string += string
1953             if last_string:
1954                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1955
1956         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1957             selectors = []
1958             current_selector = None
1959             for type, string, start, _, _ in tokens:
1960                 # ENCODING is only defined in python 3.x
1961                 if type == getattr(tokenize, 'ENCODING', None):
1962                     continue
1963                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1964                     current_selector = FormatSelector(SINGLE, string, [])
1965                 elif type == tokenize.OP:
1966                     if string == ')':
1967                         if not inside_group:
1968                             # ')' will be handled by the parentheses group
1969                             tokens.restore_last_token()
1970                         break
1971                     elif inside_merge and string in ['/', ',']:
1972                         tokens.restore_last_token()
1973                         break
1974                     elif inside_choice and string == ',':
1975                         tokens.restore_last_token()
1976                         break
1977                     elif string == ',':
1978                         if not current_selector:
1979                             raise syntax_error('"," must follow a format selector', start)
1980                         selectors.append(current_selector)
1981                         current_selector = None
1982                     elif string == '/':
1983                         if not current_selector:
1984                             raise syntax_error('"/" must follow a format selector', start)
1985                         first_choice = current_selector
1986                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1987                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1988                     elif string == '[':
1989                         if not current_selector:
1990                             current_selector = FormatSelector(SINGLE, 'best', [])
1991                         format_filter = _parse_filter(tokens)
1992                         current_selector.filters.append(format_filter)
1993                     elif string == '(':
1994                         if current_selector:
1995                             raise syntax_error('Unexpected "("', start)
1996                         group = _parse_format_selection(tokens, inside_group=True)
1997                         current_selector = FormatSelector(GROUP, group, [])
1998                     elif string == '+':
1999                         if not current_selector:
2000                             raise syntax_error('Unexpected "+"', start)
2001                         selector_1 = current_selector
2002                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2003                         if not selector_2:
2004                             raise syntax_error('Expected a selector', start)
2005                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2006                     else:
2007                         raise syntax_error(f'Operator not recognized: "{string}"', start)
2008                 elif type == tokenize.ENDMARKER:
2009                     break
2010             if current_selector:
2011                 selectors.append(current_selector)
2012             return selectors
2013
2014         def _merge(formats_pair):
2015             format_1, format_2 = formats_pair
2016
2017             formats_info = []
2018             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2019             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2020
2021             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2022                 get_no_more = {'video': False, 'audio': False}
2023                 for (i, fmt_info) in enumerate(formats_info):
2024                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2025                         formats_info.pop(i)
2026                         continue
2027                     for aud_vid in ['audio', 'video']:
2028                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2029                             if get_no_more[aud_vid]:
2030                                 formats_info.pop(i)
2031                                 break
2032                             get_no_more[aud_vid] = True
2033
2034             if len(formats_info) == 1:
2035                 return formats_info[0]
2036
2037             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2038             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2039
2040             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2041             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2042
2043             output_ext = self.params.get('merge_output_format')
2044             if not output_ext:
2045                 if the_only_video:
2046                     output_ext = the_only_video['ext']
2047                 elif the_only_audio and not video_fmts:
2048                     output_ext = the_only_audio['ext']
2049                 else:
2050                     output_ext = 'mkv'
2051
2052             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2053
2054             new_dict = {
2055                 'requested_formats': formats_info,
2056                 'format': '+'.join(filtered('format')),
2057                 'format_id': '+'.join(filtered('format_id')),
2058                 'ext': output_ext,
2059                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2060                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2061                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2062                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2063                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2064             }
2065
2066             if the_only_video:
2067                 new_dict.update({
2068                     'width': the_only_video.get('width'),
2069                     'height': the_only_video.get('height'),
2070                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2071                     'fps': the_only_video.get('fps'),
2072                     'dynamic_range': the_only_video.get('dynamic_range'),
2073                     'vcodec': the_only_video.get('vcodec'),
2074                     'vbr': the_only_video.get('vbr'),
2075                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2076                 })
2077
2078             if the_only_audio:
2079                 new_dict.update({
2080                     'acodec': the_only_audio.get('acodec'),
2081                     'abr': the_only_audio.get('abr'),
2082                     'asr': the_only_audio.get('asr'),
2083                 })
2084
2085             return new_dict
2086
2087         def _check_formats(formats):
2088             if not check_formats:
2089                 yield from formats
2090                 return
2091             yield from self._check_formats(formats)
2092
2093         def _build_selector_function(selector):
2094             if isinstance(selector, list):  # ,
2095                 fs = [_build_selector_function(s) for s in selector]
2096
2097                 def selector_function(ctx):
2098                     for f in fs:
2099                         yield from f(ctx)
2100                 return selector_function
2101
2102             elif selector.type == GROUP:  # ()
2103                 selector_function = _build_selector_function(selector.selector)
2104
2105             elif selector.type == PICKFIRST:  # /
2106                 fs = [_build_selector_function(s) for s in selector.selector]
2107
2108                 def selector_function(ctx):
2109                     for f in fs:
2110                         picked_formats = list(f(ctx))
2111                         if picked_formats:
2112                             return picked_formats
2113                     return []
2114
2115             elif selector.type == MERGE:  # +
2116                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2117
2118                 def selector_function(ctx):
2119                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2120                         yield _merge(pair)
2121
2122             elif selector.type == SINGLE:  # atom
2123                 format_spec = selector.selector or 'best'
2124
2125                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2126                 if format_spec == 'all':
2127                     def selector_function(ctx):
2128                         yield from _check_formats(ctx['formats'][::-1])
2129                 elif format_spec == 'mergeall':
2130                     def selector_function(ctx):
2131                         formats = list(_check_formats(
2132                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2133                         if not formats:
2134                             return
2135                         merged_format = formats[-1]
2136                         for f in formats[-2::-1]:
2137                             merged_format = _merge((merged_format, f))
2138                         yield merged_format
2139
2140                 else:
2141                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2142                     mobj = re.match(
2143                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2144                         format_spec)
2145                     if mobj is not None:
2146                         format_idx = int_or_none(mobj.group('n'), default=1)
2147                         format_reverse = mobj.group('bw')[0] == 'b'
2148                         format_type = (mobj.group('type') or [None])[0]
2149                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2150                         format_modified = mobj.group('mod') is not None
2151
2152                         format_fallback = not format_type and not format_modified  # for b, w
2153                         _filter_f = (
2154                             (lambda f: f.get('%scodec' % format_type) != 'none')
2155                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2156                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2157                             if format_type  # bv, ba, wv, wa
2158                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2159                             if not format_modified  # b, w
2160                             else lambda f: True)  # b*, w*
2161                         filter_f = lambda f: _filter_f(f) and (
2162                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2163                     else:
2164                         if format_spec in self._format_selection_exts['audio']:
2165                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2166                         elif format_spec in self._format_selection_exts['video']:
2167                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2168                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2169                         elif format_spec in self._format_selection_exts['storyboards']:
2170                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2171                         else:
2172                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2173
2174                     def selector_function(ctx):
2175                         formats = list(ctx['formats'])
2176                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2177                         if not matches:
2178                             if format_fallback and ctx['incomplete_formats']:
2179                                 # for extractors with incomplete formats (audio only (soundcloud)
2180                                 # or video only (imgur)) best/worst will fallback to
2181                                 # best/worst {video,audio}-only format
2182                                 matches = formats
2183                             elif seperate_fallback and not ctx['has_merged_format']:
2184                                 # for compatibility with youtube-dl when there is no pre-merged format
2185                                 matches = list(filter(seperate_fallback, formats))
2186                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2187                         try:
2188                             yield matches[format_idx - 1]
2189                         except LazyList.IndexError:
2190                             return
2191
2192             filters = [self._build_format_filter(f) for f in selector.filters]
2193
2194             def final_selector(ctx):
2195                 ctx_copy = dict(ctx)
2196                 for _filter in filters:
2197                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2198                 return selector_function(ctx_copy)
2199             return final_selector
2200
2201         stream = io.BytesIO(format_spec.encode())
2202         try:
2203             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2204         except tokenize.TokenError:
2205             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2206
2207         class TokenIterator:
2208             def __init__(self, tokens):
2209                 self.tokens = tokens
2210                 self.counter = 0
2211
2212             def __iter__(self):
2213                 return self
2214
2215             def __next__(self):
2216                 if self.counter >= len(self.tokens):
2217                     raise StopIteration()
2218                 value = self.tokens[self.counter]
2219                 self.counter += 1
2220                 return value
2221
2222             next = __next__
2223
2224             def restore_last_token(self):
2225                 self.counter -= 1
2226
2227         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2228         return _build_selector_function(parsed_selector)
2229
2230     def _calc_headers(self, info_dict):
2231         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2232
2233         cookies = self._calc_cookies(info_dict['url'])
2234         if cookies:
2235             res['Cookie'] = cookies
2236
2237         if 'X-Forwarded-For' not in res:
2238             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2239             if x_forwarded_for_ip:
2240                 res['X-Forwarded-For'] = x_forwarded_for_ip
2241
2242         return res
2243
2244     def _calc_cookies(self, url):
2245         pr = sanitized_Request(url)
2246         self.cookiejar.add_cookie_header(pr)
2247         return pr.get_header('Cookie')
2248
2249     def _sort_thumbnails(self, thumbnails):
2250         thumbnails.sort(key=lambda t: (
2251             t.get('preference') if t.get('preference') is not None else -1,
2252             t.get('width') if t.get('width') is not None else -1,
2253             t.get('height') if t.get('height') is not None else -1,
2254             t.get('id') if t.get('id') is not None else '',
2255             t.get('url')))
2256
2257     def _sanitize_thumbnails(self, info_dict):
2258         thumbnails = info_dict.get('thumbnails')
2259         if thumbnails is None:
2260             thumbnail = info_dict.get('thumbnail')
2261             if thumbnail:
2262                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2263         if not thumbnails:
2264             return
2265
2266         def check_thumbnails(thumbnails):
2267             for t in thumbnails:
2268                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2269                 try:
2270                     self.urlopen(HEADRequest(t['url']))
2271                 except network_exceptions as err:
2272                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2273                     continue
2274                 yield t
2275
2276         self._sort_thumbnails(thumbnails)
2277         for i, t in enumerate(thumbnails):
2278             if t.get('id') is None:
2279                 t['id'] = '%d' % i
2280             if t.get('width') and t.get('height'):
2281                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2282             t['url'] = sanitize_url(t['url'])
2283
2284         if self.params.get('check_formats') is True:
2285             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2286         else:
2287             info_dict['thumbnails'] = thumbnails
2288
2289     def _fill_common_fields(self, info_dict, is_video=True):
2290         # TODO: move sanitization here
2291         if is_video:
2292             # playlists are allowed to lack "title"
2293             title = info_dict.get('title', NO_DEFAULT)
2294             if title is NO_DEFAULT:
2295                 raise ExtractorError('Missing "title" field in extractor result',
2296                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2297             info_dict['fulltitle'] = title
2298             if not title:
2299                 if title == '':
2300                     self.write_debug('Extractor gave empty title. Creating a generic title')
2301                 else:
2302                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2303                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2304
2305         if info_dict.get('duration') is not None:
2306             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2307
2308         for ts_key, date_key in (
2309                 ('timestamp', 'upload_date'),
2310                 ('release_timestamp', 'release_date'),
2311                 ('modified_timestamp', 'modified_date'),
2312         ):
2313             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2314                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2315                 # see http://bugs.python.org/issue1646728)
2316                 with contextlib.suppress(ValueError, OverflowError, OSError):
2317                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2318                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2319
2320         live_keys = ('is_live', 'was_live')
2321         live_status = info_dict.get('live_status')
2322         if live_status is None:
2323             for key in live_keys:
2324                 if info_dict.get(key) is False:
2325                     continue
2326                 if info_dict.get(key):
2327                     live_status = key
2328                 break
2329             if all(info_dict.get(key) is False for key in live_keys):
2330                 live_status = 'not_live'
2331         if live_status:
2332             info_dict['live_status'] = live_status
2333             for key in live_keys:
2334                 if info_dict.get(key) is None:
2335                     info_dict[key] = (live_status == key)
2336
2337         # Auto generate title fields corresponding to the *_number fields when missing
2338         # in order to always have clean titles. This is very common for TV series.
2339         for field in ('chapter', 'season', 'episode'):
2340             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2341                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2342
2343     def _raise_pending_errors(self, info):
2344         err = info.pop('__pending_error', None)
2345         if err:
2346             self.report_error(err, tb=False)
2347
2348     def process_video_result(self, info_dict, download=True):
2349         assert info_dict.get('_type', 'video') == 'video'
2350         self._num_videos += 1
2351
2352         if 'id' not in info_dict:
2353             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2354         elif not info_dict.get('id'):
2355             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2356
2357         def report_force_conversion(field, field_not, conversion):
2358             self.report_warning(
2359                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2360                 % (field, field_not, conversion))
2361
2362         def sanitize_string_field(info, string_field):
2363             field = info.get(string_field)
2364             if field is None or isinstance(field, str):
2365                 return
2366             report_force_conversion(string_field, 'a string', 'string')
2367             info[string_field] = str(field)
2368
2369         def sanitize_numeric_fields(info):
2370             for numeric_field in self._NUMERIC_FIELDS:
2371                 field = info.get(numeric_field)
2372                 if field is None or isinstance(field, (int, float)):
2373                     continue
2374                 report_force_conversion(numeric_field, 'numeric', 'int')
2375                 info[numeric_field] = int_or_none(field)
2376
2377         sanitize_string_field(info_dict, 'id')
2378         sanitize_numeric_fields(info_dict)
2379         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2380             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2381         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2382             self.report_warning('"duration" field is negative, there is an error in extractor')
2383
2384         chapters = info_dict.get('chapters') or []
2385         if chapters and chapters[0].get('start_time'):
2386             chapters.insert(0, {'start_time': 0})
2387
2388         dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2389         for idx, (prev, current, next_) in enumerate(zip(
2390                 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2391             if current.get('start_time') is None:
2392                 current['start_time'] = prev.get('end_time')
2393             if not current.get('end_time'):
2394                 current['end_time'] = next_.get('start_time')
2395             if not current.get('title'):
2396                 current['title'] = f'<Untitled Chapter {idx}>'
2397
2398         if 'playlist' not in info_dict:
2399             # It isn't part of a playlist
2400             info_dict['playlist'] = None
2401             info_dict['playlist_index'] = None
2402
2403         self._sanitize_thumbnails(info_dict)
2404
2405         thumbnail = info_dict.get('thumbnail')
2406         thumbnails = info_dict.get('thumbnails')
2407         if thumbnail:
2408             info_dict['thumbnail'] = sanitize_url(thumbnail)
2409         elif thumbnails:
2410             info_dict['thumbnail'] = thumbnails[-1]['url']
2411
2412         if info_dict.get('display_id') is None and 'id' in info_dict:
2413             info_dict['display_id'] = info_dict['id']
2414
2415         self._fill_common_fields(info_dict)
2416
2417         for cc_kind in ('subtitles', 'automatic_captions'):
2418             cc = info_dict.get(cc_kind)
2419             if cc:
2420                 for _, subtitle in cc.items():
2421                     for subtitle_format in subtitle:
2422                         if subtitle_format.get('url'):
2423                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2424                         if subtitle_format.get('ext') is None:
2425                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2426
2427         automatic_captions = info_dict.get('automatic_captions')
2428         subtitles = info_dict.get('subtitles')
2429
2430         info_dict['requested_subtitles'] = self.process_subtitles(
2431             info_dict['id'], subtitles, automatic_captions)
2432
2433         if info_dict.get('formats') is None:
2434             # There's only one format available
2435             formats = [info_dict]
2436         else:
2437             formats = info_dict['formats']
2438
2439         # or None ensures --clean-infojson removes it
2440         info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
2441         if not self.params.get('allow_unplayable_formats'):
2442             formats = [f for f in formats if not f.get('has_drm')]
2443             if info_dict['_has_drm'] and all(
2444                     f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2445                 self.report_warning(
2446                     'This video is DRM protected and only images are available for download. '
2447                     'Use --list-formats to see them')
2448
2449         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2450         if not get_from_start:
2451             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2452         if info_dict.get('is_live') and formats:
2453             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2454             if get_from_start and not formats:
2455                 self.raise_no_formats(info_dict, msg=(
2456                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2457                     'If you want to download from the current time, use --no-live-from-start'))
2458
2459         if not formats:
2460             self.raise_no_formats(info_dict)
2461
2462         def is_wellformed(f):
2463             url = f.get('url')
2464             if not url:
2465                 self.report_warning(
2466                     '"url" field is missing or empty - skipping format, '
2467                     'there is an error in extractor')
2468                 return False
2469             if isinstance(url, bytes):
2470                 sanitize_string_field(f, 'url')
2471             return True
2472
2473         # Filter out malformed formats for better extraction robustness
2474         formats = list(filter(is_wellformed, formats))
2475
2476         formats_dict = {}
2477
2478         # We check that all the formats have the format and format_id fields
2479         for i, format in enumerate(formats):
2480             sanitize_string_field(format, 'format_id')
2481             sanitize_numeric_fields(format)
2482             format['url'] = sanitize_url(format['url'])
2483             if not format.get('format_id'):
2484                 format['format_id'] = str(i)
2485             else:
2486                 # Sanitize format_id from characters used in format selector expression
2487                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2488             format_id = format['format_id']
2489             if format_id not in formats_dict:
2490                 formats_dict[format_id] = []
2491             formats_dict[format_id].append(format)
2492
2493         # Make sure all formats have unique format_id
2494         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2495         for format_id, ambiguous_formats in formats_dict.items():
2496             ambigious_id = len(ambiguous_formats) > 1
2497             for i, format in enumerate(ambiguous_formats):
2498                 if ambigious_id:
2499                     format['format_id'] = '%s-%d' % (format_id, i)
2500                 if format.get('ext') is None:
2501                     format['ext'] = determine_ext(format['url']).lower()
2502                 # Ensure there is no conflict between id and ext in format selection
2503                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2504                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2505                     format['format_id'] = 'f%s' % format['format_id']
2506
2507         for i, format in enumerate(formats):
2508             if format.get('format') is None:
2509                 format['format'] = '{id} - {res}{note}'.format(
2510                     id=format['format_id'],
2511                     res=self.format_resolution(format),
2512                     note=format_field(format, 'format_note', ' (%s)'),
2513                 )
2514             if format.get('protocol') is None:
2515                 format['protocol'] = determine_protocol(format)
2516             if format.get('resolution') is None:
2517                 format['resolution'] = self.format_resolution(format, default=None)
2518             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2519                 format['dynamic_range'] = 'SDR'
2520             if (info_dict.get('duration') and format.get('tbr')
2521                     and not format.get('filesize') and not format.get('filesize_approx')):
2522                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2523
2524             # Add HTTP headers, so that external programs can use them from the
2525             # json output
2526             full_format_info = info_dict.copy()
2527             full_format_info.update(format)
2528             format['http_headers'] = self._calc_headers(full_format_info)
2529         # Remove private housekeeping stuff
2530         if '__x_forwarded_for_ip' in info_dict:
2531             del info_dict['__x_forwarded_for_ip']
2532
2533         if self.params.get('check_formats') is True:
2534             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2535
2536         if not formats or formats[0] is not info_dict:
2537             # only set the 'formats' fields if the original info_dict list them
2538             # otherwise we end up with a circular reference, the first (and unique)
2539             # element in the 'formats' field in info_dict is info_dict itself,
2540             # which can't be exported to json
2541             info_dict['formats'] = formats
2542
2543         info_dict, _ = self.pre_process(info_dict)
2544
2545         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2546             return info_dict
2547
2548         self.post_extract(info_dict)
2549         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2550
2551         # The pre-processors may have modified the formats
2552         formats = info_dict.get('formats', [info_dict])
2553
2554         list_only = self.params.get('simulate') is None and (
2555             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2556         interactive_format_selection = not list_only and self.format_selector == '-'
2557         if self.params.get('list_thumbnails'):
2558             self.list_thumbnails(info_dict)
2559         if self.params.get('listsubtitles'):
2560             if 'automatic_captions' in info_dict:
2561                 self.list_subtitles(
2562                     info_dict['id'], automatic_captions, 'automatic captions')
2563             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2564         if self.params.get('listformats') or interactive_format_selection:
2565             self.list_formats(info_dict)
2566         if list_only:
2567             # Without this printing, -F --print-json will not work
2568             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2569             return info_dict
2570
2571         format_selector = self.format_selector
2572         if format_selector is None:
2573             req_format = self._default_format_spec(info_dict, download=download)
2574             self.write_debug('Default format spec: %s' % req_format)
2575             format_selector = self.build_format_selector(req_format)
2576
2577         while True:
2578             if interactive_format_selection:
2579                 req_format = input(
2580                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2581                 try:
2582                     format_selector = self.build_format_selector(req_format)
2583                 except SyntaxError as err:
2584                     self.report_error(err, tb=False, is_error=False)
2585                     continue
2586
2587             formats_to_download = list(format_selector({
2588                 'formats': formats,
2589                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2590                 'incomplete_formats': (
2591                     # All formats are video-only or
2592                     all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2593                     # all formats are audio-only
2594                     or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2595             }))
2596             if interactive_format_selection and not formats_to_download:
2597                 self.report_error('Requested format is not available', tb=False, is_error=False)
2598                 continue
2599             break
2600
2601         if not formats_to_download:
2602             if not self.params.get('ignore_no_formats_error'):
2603                 raise ExtractorError(
2604                     'Requested format is not available. Use --list-formats for a list of available formats',
2605                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2606             self.report_warning('Requested format is not available')
2607             # Process what we can, even without any available formats.
2608             formats_to_download = [{}]
2609
2610         requested_ranges = self.params.get('download_ranges')
2611         if requested_ranges:
2612             requested_ranges = tuple(requested_ranges(info_dict, self))
2613
2614         best_format, downloaded_formats = formats_to_download[-1], []
2615         if download:
2616             if best_format:
2617                 def to_screen(*msg):
2618                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2619
2620                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2621                           (f['format_id'] for f in formats_to_download))
2622                 if requested_ranges:
2623                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2624                               (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
2625             max_downloads_reached = False
2626
2627             for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
2628                 new_info = self._copy_infodict(info_dict)
2629                 new_info.update(fmt)
2630                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2631                 if chapter or offset:
2632                     new_info.update({
2633                         'section_start': offset + chapter.get('start_time', 0),
2634                         'section_end': offset + min(chapter.get('end_time', duration), duration),
2635                         'section_title': chapter.get('title'),
2636                         'section_number': chapter.get('index'),
2637                     })
2638                 downloaded_formats.append(new_info)
2639                 try:
2640                     self.process_info(new_info)
2641                 except MaxDownloadsReached:
2642                     max_downloads_reached = True
2643                 self._raise_pending_errors(new_info)
2644                 # Remove copied info
2645                 for key, val in tuple(new_info.items()):
2646                     if info_dict.get(key) == val:
2647                         new_info.pop(key)
2648                 if max_downloads_reached:
2649                     break
2650
2651             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2652             assert write_archive.issubset({True, False, 'ignore'})
2653             if True in write_archive and False not in write_archive:
2654                 self.record_download_archive(info_dict)
2655
2656             info_dict['requested_downloads'] = downloaded_formats
2657             info_dict = self.run_all_pps('after_video', info_dict)
2658             if max_downloads_reached:
2659                 raise MaxDownloadsReached()
2660
2661         # We update the info dict with the selected best quality format (backwards compatibility)
2662         info_dict.update(best_format)
2663         return info_dict
2664
2665     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2666         """Select the requested subtitles and their format"""
2667         available_subs, normal_sub_langs = {}, []
2668         if normal_subtitles and self.params.get('writesubtitles'):
2669             available_subs.update(normal_subtitles)
2670             normal_sub_langs = tuple(normal_subtitles.keys())
2671         if automatic_captions and self.params.get('writeautomaticsub'):
2672             for lang, cap_info in automatic_captions.items():
2673                 if lang not in available_subs:
2674                     available_subs[lang] = cap_info
2675
2676         if (not self.params.get('writesubtitles') and not
2677                 self.params.get('writeautomaticsub') or not
2678                 available_subs):
2679             return None
2680
2681         all_sub_langs = tuple(available_subs.keys())
2682         if self.params.get('allsubtitles', False):
2683             requested_langs = all_sub_langs
2684         elif self.params.get('subtitleslangs', False):
2685             # A list is used so that the order of languages will be the same as
2686             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2687             requested_langs = []
2688             for lang_re in self.params.get('subtitleslangs'):
2689                 discard = lang_re[0] == '-'
2690                 if discard:
2691                     lang_re = lang_re[1:]
2692                 if lang_re == 'all':
2693                     if discard:
2694                         requested_langs = []
2695                     else:
2696                         requested_langs.extend(all_sub_langs)
2697                     continue
2698                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2699                 if discard:
2700                     for lang in current_langs:
2701                         while lang in requested_langs:
2702                             requested_langs.remove(lang)
2703                 else:
2704                     requested_langs.extend(current_langs)
2705             requested_langs = orderedSet(requested_langs)
2706         elif normal_sub_langs:
2707             requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
2708         else:
2709             requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
2710         if requested_langs:
2711             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2712
2713         formats_query = self.params.get('subtitlesformat', 'best')
2714         formats_preference = formats_query.split('/') if formats_query else []
2715         subs = {}
2716         for lang in requested_langs:
2717             formats = available_subs.get(lang)
2718             if formats is None:
2719                 self.report_warning(f'{lang} subtitles not available for {video_id}')
2720                 continue
2721             for ext in formats_preference:
2722                 if ext == 'best':
2723                     f = formats[-1]
2724                     break
2725                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2726                 if matches:
2727                     f = matches[-1]
2728                     break
2729             else:
2730                 f = formats[-1]
2731                 self.report_warning(
2732                     'No subtitle format found matching "%s" for language %s, '
2733                     'using %s' % (formats_query, lang, f['ext']))
2734             subs[lang] = f
2735         return subs
2736
2737     def _forceprint(self, key, info_dict):
2738         if info_dict is None:
2739             return
2740         info_copy = info_dict.copy()
2741         info_copy['formats_table'] = self.render_formats_table(info_dict)
2742         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2743         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2744         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2745
2746         def format_tmpl(tmpl):
2747             mobj = re.match(r'\w+(=?)$', tmpl)
2748             if mobj and mobj.group(1):
2749                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2750             elif mobj:
2751                 return f'%({tmpl})s'
2752             return tmpl
2753
2754         for tmpl in self.params['forceprint'].get(key, []):
2755             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2756
2757         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2758             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2759             tmpl = format_tmpl(tmpl)
2760             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2761             if self._ensure_dir_exists(filename):
2762                 with open(filename, 'a', encoding='utf-8') as f:
2763                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2764
2765     def __forced_printings(self, info_dict, filename, incomplete):
2766         def print_mandatory(field, actual_field=None):
2767             if actual_field is None:
2768                 actual_field = field
2769             if (self.params.get('force%s' % field, False)
2770                     and (not incomplete or info_dict.get(actual_field) is not None)):
2771                 self.to_stdout(info_dict[actual_field])
2772
2773         def print_optional(field):
2774             if (self.params.get('force%s' % field, False)
2775                     and info_dict.get(field) is not None):
2776                 self.to_stdout(info_dict[field])
2777
2778         info_dict = info_dict.copy()
2779         if filename is not None:
2780             info_dict['filename'] = filename
2781         if info_dict.get('requested_formats') is not None:
2782             # For RTMP URLs, also include the playpath
2783             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2784         elif info_dict.get('url'):
2785             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2786
2787         if (self.params.get('forcejson')
2788                 or self.params['forceprint'].get('video')
2789                 or self.params['print_to_file'].get('video')):
2790             self.post_extract(info_dict)
2791         self._forceprint('video', info_dict)
2792
2793         print_mandatory('title')
2794         print_mandatory('id')
2795         print_mandatory('url', 'urls')
2796         print_optional('thumbnail')
2797         print_optional('description')
2798         print_optional('filename')
2799         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2800             self.to_stdout(formatSeconds(info_dict['duration']))
2801         print_mandatory('format')
2802
2803         if self.params.get('forcejson'):
2804             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2805
2806     def dl(self, name, info, subtitle=False, test=False):
2807         if not info.get('url'):
2808             self.raise_no_formats(info, True)
2809
2810         if test:
2811             verbose = self.params.get('verbose')
2812             params = {
2813                 'test': True,
2814                 'quiet': self.params.get('quiet') or not verbose,
2815                 'verbose': verbose,
2816                 'noprogress': not verbose,
2817                 'nopart': True,
2818                 'skip_unavailable_fragments': False,
2819                 'keep_fragments': False,
2820                 'overwrites': True,
2821                 '_no_ytdl_file': True,
2822             }
2823         else:
2824             params = self.params
2825         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2826         if not test:
2827             for ph in self._progress_hooks:
2828                 fd.add_progress_hook(ph)
2829             urls = '", "'.join(
2830                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2831                 for f in info.get('requested_formats', []) or [info])
2832             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
2833
2834         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2835         # But it may contain objects that are not deep-copyable
2836         new_info = self._copy_infodict(info)
2837         if new_info.get('http_headers') is None:
2838             new_info['http_headers'] = self._calc_headers(new_info)
2839         return fd.download(name, new_info, subtitle)
2840
2841     def existing_file(self, filepaths, *, default_overwrite=True):
2842         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2843         if existing_files and not self.params.get('overwrites', default_overwrite):
2844             return existing_files[0]
2845
2846         for file in existing_files:
2847             self.report_file_delete(file)
2848             os.remove(file)
2849         return None
2850
2851     def process_info(self, info_dict):
2852         """Process a single resolved IE result. (Modifies it in-place)"""
2853
2854         assert info_dict.get('_type', 'video') == 'video'
2855         original_infodict = info_dict
2856
2857         if 'format' not in info_dict and 'ext' in info_dict:
2858             info_dict['format'] = info_dict['ext']
2859
2860         # This is mostly just for backward compatibility of process_info
2861         # As a side-effect, this allows for format-specific filters
2862         if self._match_entry(info_dict) is not None:
2863             info_dict['__write_download_archive'] = 'ignore'
2864             return
2865
2866         # Does nothing under normal operation - for backward compatibility of process_info
2867         self.post_extract(info_dict)
2868         self._num_downloads += 1
2869
2870         # info_dict['_filename'] needs to be set for backward compatibility
2871         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2872         temp_filename = self.prepare_filename(info_dict, 'temp')
2873         files_to_move = {}
2874
2875         # Forced printings
2876         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2877
2878         def check_max_downloads():
2879             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
2880                 raise MaxDownloadsReached()
2881
2882         if self.params.get('simulate'):
2883             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2884             check_max_downloads()
2885             return
2886
2887         if full_filename is None:
2888             return
2889         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2890             return
2891         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2892             return
2893
2894         if self._write_description('video', info_dict,
2895                                    self.prepare_filename(info_dict, 'description')) is None:
2896             return
2897
2898         sub_files = self._write_subtitles(info_dict, temp_filename)
2899         if sub_files is None:
2900             return
2901         files_to_move.update(dict(sub_files))
2902
2903         thumb_files = self._write_thumbnails(
2904             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2905         if thumb_files is None:
2906             return
2907         files_to_move.update(dict(thumb_files))
2908
2909         infofn = self.prepare_filename(info_dict, 'infojson')
2910         _infojson_written = self._write_info_json('video', info_dict, infofn)
2911         if _infojson_written:
2912             info_dict['infojson_filename'] = infofn
2913             # For backward compatibility, even though it was a private field
2914             info_dict['__infojson_filename'] = infofn
2915         elif _infojson_written is None:
2916             return
2917
2918         # Note: Annotations are deprecated
2919         annofn = None
2920         if self.params.get('writeannotations', False):
2921             annofn = self.prepare_filename(info_dict, 'annotation')
2922         if annofn:
2923             if not self._ensure_dir_exists(encodeFilename(annofn)):
2924                 return
2925             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2926                 self.to_screen('[info] Video annotations are already present')
2927             elif not info_dict.get('annotations'):
2928                 self.report_warning('There are no annotations to write.')
2929             else:
2930                 try:
2931                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2932                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2933                         annofile.write(info_dict['annotations'])
2934                 except (KeyError, TypeError):
2935                     self.report_warning('There are no annotations to write.')
2936                 except OSError:
2937                     self.report_error('Cannot write annotations file: ' + annofn)
2938                     return
2939
2940         # Write internet shortcut files
2941         def _write_link_file(link_type):
2942             url = try_get(info_dict['webpage_url'], iri_to_uri)
2943             if not url:
2944                 self.report_warning(
2945                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2946                 return True
2947             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2948             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2949                 return False
2950             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2951                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2952                 return True
2953             try:
2954                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2955                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2956                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2957                     template_vars = {'url': url}
2958                     if link_type == 'desktop':
2959                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2960                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2961             except OSError:
2962                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2963                 return False
2964             return True
2965
2966         write_links = {
2967             'url': self.params.get('writeurllink'),
2968             'webloc': self.params.get('writewebloclink'),
2969             'desktop': self.params.get('writedesktoplink'),
2970         }
2971         if self.params.get('writelink'):
2972             link_type = ('webloc' if sys.platform == 'darwin'
2973                          else 'desktop' if sys.platform.startswith('linux')
2974                          else 'url')
2975             write_links[link_type] = True
2976
2977         if any(should_write and not _write_link_file(link_type)
2978                for link_type, should_write in write_links.items()):
2979             return
2980
2981         def replace_info_dict(new_info):
2982             nonlocal info_dict
2983             if new_info == info_dict:
2984                 return
2985             info_dict.clear()
2986             info_dict.update(new_info)
2987
2988         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2989         replace_info_dict(new_info)
2990
2991         if self.params.get('skip_download'):
2992             info_dict['filepath'] = temp_filename
2993             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2994             info_dict['__files_to_move'] = files_to_move
2995             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2996             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2997         else:
2998             # Download
2999             info_dict.setdefault('__postprocessors', [])
3000             try:
3001
3002                 def existing_video_file(*filepaths):
3003                     ext = info_dict.get('ext')
3004                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3005                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3006                                               default_overwrite=False)
3007                     if file:
3008                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3009                     return file
3010
3011                 fd, success = None, True
3012                 if info_dict.get('protocol') or info_dict.get('url'):
3013                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3014                     if fd is not FFmpegFD and (
3015                             info_dict.get('section_start') or info_dict.get('section_end')):
3016                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3017                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
3018                         self.report_error(f'{msg}. Aborting')
3019                         return
3020
3021                 if info_dict.get('requested_formats') is not None:
3022
3023                     def compatible_formats(formats):
3024                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3025                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
3026                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
3027                         if len(video_formats) > 2 or len(audio_formats) > 2:
3028                             return False
3029
3030                         # Check extension
3031                         exts = {format.get('ext') for format in formats}
3032                         COMPATIBLE_EXTS = (
3033                             {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
3034                             {'webm'},
3035                         )
3036                         for ext_sets in COMPATIBLE_EXTS:
3037                             if ext_sets.issuperset(exts):
3038                                 return True
3039                         # TODO: Check acodec/vcodec
3040                         return False
3041
3042                     requested_formats = info_dict['requested_formats']
3043                     old_ext = info_dict['ext']
3044                     if self.params.get('merge_output_format') is None:
3045                         if not compatible_formats(requested_formats):
3046                             info_dict['ext'] = 'mkv'
3047                             self.report_warning(
3048                                 'Requested formats are incompatible for merge and will be merged into mkv')
3049                         if (info_dict['ext'] == 'webm'
3050                                 and info_dict.get('thumbnails')
3051                                 # check with type instead of pp_key, __name__, or isinstance
3052                                 # since we dont want any custom PPs to trigger this
3053                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3054                             info_dict['ext'] = 'mkv'
3055                             self.report_warning(
3056                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3057                     new_ext = info_dict['ext']
3058
3059                     def correct_ext(filename, ext=new_ext):
3060                         if filename == '-':
3061                             return filename
3062                         filename_real_ext = os.path.splitext(filename)[1][1:]
3063                         filename_wo_ext = (
3064                             os.path.splitext(filename)[0]
3065                             if filename_real_ext in (old_ext, new_ext)
3066                             else filename)
3067                         return f'{filename_wo_ext}.{ext}'
3068
3069                     # Ensure filename always has a correct extension for successful merge
3070                     full_filename = correct_ext(full_filename)
3071                     temp_filename = correct_ext(temp_filename)
3072                     dl_filename = existing_video_file(full_filename, temp_filename)
3073                     info_dict['__real_download'] = False
3074
3075                     merger = FFmpegMergerPP(self)
3076                     downloaded = []
3077                     if dl_filename is not None:
3078                         self.report_file_already_downloaded(dl_filename)
3079                     elif fd:
3080                         for f in requested_formats if fd != FFmpegFD else []:
3081                             f['filepath'] = fname = prepend_extension(
3082                                 correct_ext(temp_filename, info_dict['ext']),
3083                                 'f%s' % f['format_id'], info_dict['ext'])
3084                             downloaded.append(fname)
3085                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3086                         success, real_download = self.dl(temp_filename, info_dict)
3087                         info_dict['__real_download'] = real_download
3088                     else:
3089                         if self.params.get('allow_unplayable_formats'):
3090                             self.report_warning(
3091                                 'You have requested merging of multiple formats '
3092                                 'while also allowing unplayable formats to be downloaded. '
3093                                 'The formats won\'t be merged to prevent data corruption.')
3094                         elif not merger.available:
3095                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3096                             if not self.params.get('ignoreerrors'):
3097                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3098                                 return
3099                             self.report_warning(f'{msg}. The formats won\'t be merged')
3100
3101                         if temp_filename == '-':
3102                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3103                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3104                                       else 'but ffmpeg is not installed')
3105                             self.report_warning(
3106                                 f'You have requested downloading multiple formats to stdout {reason}. '
3107                                 'The formats will be streamed one after the other')
3108                             fname = temp_filename
3109                         for f in requested_formats:
3110                             new_info = dict(info_dict)
3111                             del new_info['requested_formats']
3112                             new_info.update(f)
3113                             if temp_filename != '-':
3114                                 fname = prepend_extension(
3115                                     correct_ext(temp_filename, new_info['ext']),
3116                                     'f%s' % f['format_id'], new_info['ext'])
3117                                 if not self._ensure_dir_exists(fname):
3118                                     return
3119                                 f['filepath'] = fname
3120                                 downloaded.append(fname)
3121                             partial_success, real_download = self.dl(fname, new_info)
3122                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3123                             success = success and partial_success
3124
3125                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3126                         info_dict['__postprocessors'].append(merger)
3127                         info_dict['__files_to_merge'] = downloaded
3128                         # Even if there were no downloads, it is being merged only now
3129                         info_dict['__real_download'] = True
3130                     else:
3131                         for file in downloaded:
3132                             files_to_move[file] = None
3133                 else:
3134                     # Just a single file
3135                     dl_filename = existing_video_file(full_filename, temp_filename)
3136                     if dl_filename is None or dl_filename == temp_filename:
3137                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3138                         # So we should try to resume the download
3139                         success, real_download = self.dl(temp_filename, info_dict)
3140                         info_dict['__real_download'] = real_download
3141                     else:
3142                         self.report_file_already_downloaded(dl_filename)
3143
3144                 dl_filename = dl_filename or temp_filename
3145                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3146
3147             except network_exceptions as err:
3148                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3149                 return
3150             except OSError as err:
3151                 raise UnavailableVideoError(err)
3152             except (ContentTooShortError, ) as err:
3153                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3154                 return
3155
3156             self._raise_pending_errors(info_dict)
3157             if success and full_filename != '-':
3158
3159                 def fixup():
3160                     do_fixup = True
3161                     fixup_policy = self.params.get('fixup')
3162                     vid = info_dict['id']
3163
3164                     if fixup_policy in ('ignore', 'never'):
3165                         return
3166                     elif fixup_policy == 'warn':
3167                         do_fixup = 'warn'
3168                     elif fixup_policy != 'force':
3169                         assert fixup_policy in ('detect_or_warn', None)
3170                         if not info_dict.get('__real_download'):
3171                             do_fixup = False
3172
3173                     def ffmpeg_fixup(cndn, msg, cls):
3174                         if not (do_fixup and cndn):
3175                             return
3176                         elif do_fixup == 'warn':
3177                             self.report_warning(f'{vid}: {msg}')
3178                             return
3179                         pp = cls(self)
3180                         if pp.available:
3181                             info_dict['__postprocessors'].append(pp)
3182                         else:
3183                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3184
3185                     stretched_ratio = info_dict.get('stretched_ratio')
3186                     ffmpeg_fixup(stretched_ratio not in (1, None),
3187                                  f'Non-uniform pixel ratio {stretched_ratio}',
3188                                  FFmpegFixupStretchedPP)
3189
3190                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3191                     downloader = downloader.FD_NAME if downloader else None
3192
3193                     ext = info_dict.get('ext')
3194                     postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3195                         isinstance(pp, FFmpegVideoConvertorPP)
3196                         and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3197                     ) for pp in self._pps['post_process'])
3198
3199                     if not postprocessed_by_ffmpeg:
3200                         ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3201                                      'writing DASH m4a. Only some players support this container',
3202                                      FFmpegFixupM4aPP)
3203                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3204                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3205                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3206                                      FFmpegFixupM3u8PP)
3207                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3208                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3209
3210                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3211                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3212
3213                 fixup()
3214                 try:
3215                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3216                 except PostProcessingError as err:
3217                     self.report_error('Postprocessing: %s' % str(err))
3218                     return
3219                 try:
3220                     for ph in self._post_hooks:
3221                         ph(info_dict['filepath'])
3222                 except Exception as err:
3223                     self.report_error('post hooks: %s' % str(err))
3224                     return
3225                 info_dict['__write_download_archive'] = True
3226
3227         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3228         if self.params.get('force_write_download_archive'):
3229             info_dict['__write_download_archive'] = True
3230         check_max_downloads()
3231
3232     def __download_wrapper(self, func):
3233         @functools.wraps(func)
3234         def wrapper(*args, **kwargs):
3235             try:
3236                 res = func(*args, **kwargs)
3237             except UnavailableVideoError as e:
3238                 self.report_error(e)
3239             except DownloadCancelled as e:
3240                 self.to_screen(f'[info] {e}')
3241                 if not self.params.get('break_per_url'):
3242                     raise
3243             else:
3244                 if self.params.get('dump_single_json', False):
3245                     self.post_extract(res)
3246                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3247         return wrapper
3248
3249     def download(self, url_list):
3250         """Download a given list of URLs."""
3251         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3252         outtmpl = self.params['outtmpl']['default']
3253         if (len(url_list) > 1
3254                 and outtmpl != '-'
3255                 and '%' not in outtmpl
3256                 and self.params.get('max_downloads') != 1):
3257             raise SameFileError(outtmpl)
3258
3259         for url in url_list:
3260             self.__download_wrapper(self.extract_info)(
3261                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3262
3263         return self._download_retcode
3264
3265     def download_with_info_file(self, info_filename):
3266         with contextlib.closing(fileinput.FileInput(
3267                 [info_filename], mode='r',
3268                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3269             # FileInput doesn't have a read method, we can't call json.load
3270             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3271         try:
3272             self.__download_wrapper(self.process_ie_result)(info, download=True)
3273         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3274             if not isinstance(e, EntryNotInPlaylist):
3275                 self.to_stderr('\r')
3276             webpage_url = info.get('webpage_url')
3277             if webpage_url is not None:
3278                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3279                 return self.download([webpage_url])
3280             else:
3281                 raise
3282         return self._download_retcode
3283
3284     @staticmethod
3285     def sanitize_info(info_dict, remove_private_keys=False):
3286         ''' Sanitize the infodict for converting to json '''
3287         if info_dict is None:
3288             return info_dict
3289         info_dict.setdefault('epoch', int(time.time()))
3290         info_dict.setdefault('_type', 'video')
3291
3292         if remove_private_keys:
3293             reject = lambda k, v: v is None or k.startswith('__') or k in {
3294                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3295                 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3296             }
3297         else:
3298             reject = lambda k, v: False
3299
3300         def filter_fn(obj):
3301             if isinstance(obj, dict):
3302                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3303             elif isinstance(obj, (list, tuple, set, LazyList)):
3304                 return list(map(filter_fn, obj))
3305             elif obj is None or isinstance(obj, (str, int, float, bool)):
3306                 return obj
3307             else:
3308                 return repr(obj)
3309
3310         return filter_fn(info_dict)
3311
3312     @staticmethod
3313     def filter_requested_info(info_dict, actually_filter=True):
3314         ''' Alias of sanitize_info for backward compatibility '''
3315         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3316
3317     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3318         for filename in set(filter(None, files_to_delete)):
3319             if msg:
3320                 self.to_screen(msg % filename)
3321             try:
3322                 os.remove(filename)
3323             except OSError:
3324                 self.report_warning(f'Unable to delete file {filename}')
3325             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3326                 del info['__files_to_move'][filename]
3327
3328     @staticmethod
3329     def post_extract(info_dict):
3330         def actual_post_extract(info_dict):
3331             if info_dict.get('_type') in ('playlist', 'multi_video'):
3332                 for video_dict in info_dict.get('entries', {}):
3333                     actual_post_extract(video_dict or {})
3334                 return
3335
3336             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3337             info_dict.update(post_extractor())
3338
3339         actual_post_extract(info_dict or {})
3340
3341     def run_pp(self, pp, infodict):
3342         files_to_delete = []
3343         if '__files_to_move' not in infodict:
3344             infodict['__files_to_move'] = {}
3345         try:
3346             files_to_delete, infodict = pp.run(infodict)
3347         except PostProcessingError as e:
3348             # Must be True and not 'only_download'
3349             if self.params.get('ignoreerrors') is True:
3350                 self.report_error(e)
3351                 return infodict
3352             raise
3353
3354         if not files_to_delete:
3355             return infodict
3356         if self.params.get('keepvideo', False):
3357             for f in files_to_delete:
3358                 infodict['__files_to_move'].setdefault(f, '')
3359         else:
3360             self._delete_downloaded_files(
3361                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3362         return infodict
3363
3364     def run_all_pps(self, key, info, *, additional_pps=None):
3365         self._forceprint(key, info)
3366         for pp in (additional_pps or []) + self._pps[key]:
3367             info = self.run_pp(pp, info)
3368         return info
3369
3370     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3371         info = dict(ie_info)
3372         info['__files_to_move'] = files_to_move or {}
3373         try:
3374             info = self.run_all_pps(key, info)
3375         except PostProcessingError as err:
3376             msg = f'Preprocessing: {err}'
3377             info.setdefault('__pending_error', msg)
3378             self.report_error(msg, is_error=False)
3379         return info, info.pop('__files_to_move', None)
3380
3381     def post_process(self, filename, info, files_to_move=None):
3382         """Run all the postprocessors on the given file."""
3383         info['filepath'] = filename
3384         info['__files_to_move'] = files_to_move or {}
3385         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3386         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3387         del info['__files_to_move']
3388         return self.run_all_pps('after_move', info)
3389
3390     def _make_archive_id(self, info_dict):
3391         video_id = info_dict.get('id')
3392         if not video_id:
3393             return
3394         # Future-proof against any change in case
3395         # and backwards compatibility with prior versions
3396         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3397         if extractor is None:
3398             url = str_or_none(info_dict.get('url'))
3399             if not url:
3400                 return
3401             # Try to find matching extractor for the URL and take its ie_key
3402             for ie_key, ie in self._ies.items():
3403                 if ie.suitable(url):
3404                     extractor = ie_key
3405                     break
3406             else:
3407                 return
3408         return f'{extractor.lower()} {video_id}'
3409
3410     def in_download_archive(self, info_dict):
3411         fn = self.params.get('download_archive')
3412         if fn is None:
3413             return False
3414
3415         vid_id = self._make_archive_id(info_dict)
3416         if not vid_id:
3417             return False  # Incomplete video information
3418
3419         return vid_id in self.archive
3420
3421     def record_download_archive(self, info_dict):
3422         fn = self.params.get('download_archive')
3423         if fn is None:
3424             return
3425         vid_id = self._make_archive_id(info_dict)
3426         assert vid_id
3427         self.write_debug(f'Adding to archive: {vid_id}')
3428         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3429             archive_file.write(vid_id + '\n')
3430         self.archive.add(vid_id)
3431
3432     @staticmethod
3433     def format_resolution(format, default='unknown'):
3434         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3435             return 'audio only'
3436         if format.get('resolution') is not None:
3437             return format['resolution']
3438         if format.get('width') and format.get('height'):
3439             return '%dx%d' % (format['width'], format['height'])
3440         elif format.get('height'):
3441             return '%sp' % format['height']
3442         elif format.get('width'):
3443             return '%dx?' % format['width']
3444         return default
3445
3446     def _list_format_headers(self, *headers):
3447         if self.params.get('listformats_table', True) is not False:
3448             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3449         return headers
3450
3451     def _format_note(self, fdict):
3452         res = ''
3453         if fdict.get('ext') in ['f4f', 'f4m']:
3454             res += '(unsupported)'
3455         if fdict.get('language'):
3456             if res:
3457                 res += ' '
3458             res += '[%s]' % fdict['language']
3459         if fdict.get('format_note') is not None:
3460             if res:
3461                 res += ' '
3462             res += fdict['format_note']
3463         if fdict.get('tbr') is not None:
3464             if res:
3465                 res += ', '
3466             res += '%4dk' % fdict['tbr']
3467         if fdict.get('container') is not None:
3468             if res:
3469                 res += ', '
3470             res += '%s container' % fdict['container']
3471         if (fdict.get('vcodec') is not None
3472                 and fdict.get('vcodec') != 'none'):
3473             if res:
3474                 res += ', '
3475             res += fdict['vcodec']
3476             if fdict.get('vbr') is not None:
3477                 res += '@'
3478         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3479             res += 'video@'
3480         if fdict.get('vbr') is not None:
3481             res += '%4dk' % fdict['vbr']
3482         if fdict.get('fps') is not None:
3483             if res:
3484                 res += ', '
3485             res += '%sfps' % fdict['fps']
3486         if fdict.get('acodec') is not None:
3487             if res:
3488                 res += ', '
3489             if fdict['acodec'] == 'none':
3490                 res += 'video only'
3491             else:
3492                 res += '%-5s' % fdict['acodec']
3493         elif fdict.get('abr') is not None:
3494             if res:
3495                 res += ', '
3496             res += 'audio'
3497         if fdict.get('abr') is not None:
3498             res += '@%3dk' % fdict['abr']
3499         if fdict.get('asr') is not None:
3500             res += ' (%5dHz)' % fdict['asr']
3501         if fdict.get('filesize') is not None:
3502             if res:
3503                 res += ', '
3504             res += format_bytes(fdict['filesize'])
3505         elif fdict.get('filesize_approx') is not None:
3506             if res:
3507                 res += ', '
3508             res += '~' + format_bytes(fdict['filesize_approx'])
3509         return res
3510
3511     def render_formats_table(self, info_dict):
3512         if not info_dict.get('formats') and not info_dict.get('url'):
3513             return None
3514
3515         formats = info_dict.get('formats', [info_dict])
3516         if not self.params.get('listformats_table', True) is not False:
3517             table = [
3518                 [
3519                     format_field(f, 'format_id'),
3520                     format_field(f, 'ext'),
3521                     self.format_resolution(f),
3522                     self._format_note(f)
3523                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3524             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3525
3526         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3527         table = [
3528             [
3529                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3530                 format_field(f, 'ext'),
3531                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3532                 format_field(f, 'fps', '\t%d'),
3533                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3534                 delim,
3535                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3536                 format_field(f, 'tbr', '\t%dk'),
3537                 shorten_protocol_name(f.get('protocol', '')),
3538                 delim,
3539                 format_field(f, 'vcodec', default='unknown').replace(
3540                     'none', 'images' if f.get('acodec') == 'none'
3541                             else self._format_out('audio only', self.Styles.SUPPRESS)),
3542                 format_field(f, 'vbr', '\t%dk'),
3543                 format_field(f, 'acodec', default='unknown').replace(
3544                     'none', '' if f.get('vcodec') == 'none'
3545                             else self._format_out('video only', self.Styles.SUPPRESS)),
3546                 format_field(f, 'abr', '\t%dk'),
3547                 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3548                 join_nonempty(
3549                     self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3550                     format_field(f, 'language', '[%s]'),
3551                     join_nonempty(format_field(f, 'format_note'),
3552                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3553                                   delim=', '),
3554                     delim=' '),
3555             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3556         header_line = self._list_format_headers(
3557             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3558             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3559
3560         return render_table(
3561             header_line, table, hide_empty=True,
3562             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3563
3564     def render_thumbnails_table(self, info_dict):
3565         thumbnails = list(info_dict.get('thumbnails') or [])
3566         if not thumbnails:
3567             return None
3568         return render_table(
3569             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3570             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3571
3572     def render_subtitles_table(self, video_id, subtitles):
3573         def _row(lang, formats):
3574             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3575             if len(set(names)) == 1:
3576                 names = [] if names[0] == 'unknown' else names[:1]
3577             return [lang, ', '.join(names), ', '.join(exts)]
3578
3579         if not subtitles:
3580             return None
3581         return render_table(
3582             self._list_format_headers('Language', 'Name', 'Formats'),
3583             [_row(lang, formats) for lang, formats in subtitles.items()],
3584             hide_empty=True)
3585
3586     def __list_table(self, video_id, name, func, *args):
3587         table = func(*args)
3588         if not table:
3589             self.to_screen(f'{video_id} has no {name}')
3590             return
3591         self.to_screen(f'[info] Available {name} for {video_id}:')
3592         self.to_stdout(table)
3593
3594     def list_formats(self, info_dict):
3595         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3596
3597     def list_thumbnails(self, info_dict):
3598         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3599
3600     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3601         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3602
3603     def urlopen(self, req):
3604         """ Start an HTTP download """
3605         if isinstance(req, str):
3606             req = sanitized_Request(req)
3607         return self._opener.open(req, timeout=self._socket_timeout)
3608
3609     def print_debug_header(self):
3610         if not self.params.get('verbose'):
3611             return
3612
3613         # These imports can be slow. So import them only as needed
3614         from .extractor.extractors import _LAZY_LOADER
3615         from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3616
3617         def get_encoding(stream):
3618             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3619             if not supports_terminal_sequences(stream):
3620                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3621                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3622             return ret
3623
3624         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3625             locale.getpreferredencoding(),
3626             sys.getfilesystemencoding(),
3627             self.get_encoding(),
3628             ', '.join(
3629                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3630                 if stream is not None and key != 'console')
3631         )
3632
3633         logger = self.params.get('logger')
3634         if logger:
3635             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3636             write_debug(encoding_str)
3637         else:
3638             write_string(f'[debug] {encoding_str}\n', encoding=None)
3639             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3640
3641         source = detect_variant()
3642         write_debug(join_nonempty(
3643             'yt-dlp version', __version__,
3644             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3645             '' if source == 'unknown' else f'({source})',
3646             delim=' '))
3647         if not _LAZY_LOADER:
3648             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3649                 write_debug('Lazy loading extractors is forcibly disabled')
3650             else:
3651                 write_debug('Lazy loading extractors is disabled')
3652         if plugin_extractors or plugin_postprocessors:
3653             write_debug('Plugins: %s' % [
3654                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3655                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3656         if self.params['compat_opts']:
3657             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3658
3659         if source == 'source':
3660             try:
3661                 stdout, _, _ = Popen.run(
3662                     ['git', 'rev-parse', '--short', 'HEAD'],
3663                     text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
3664                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3665                 if re.fullmatch('[0-9a-f]+', stdout.strip()):
3666                     write_debug(f'Git HEAD: {stdout.strip()}')
3667             except Exception:
3668                 with contextlib.suppress(Exception):
3669                     sys.exc_clear()
3670
3671         write_debug(system_identifier())
3672
3673         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3674         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3675         if ffmpeg_features:
3676             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3677
3678         exe_versions['rtmpdump'] = rtmpdump_version()
3679         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3680         exe_str = ', '.join(
3681             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3682         ) or 'none'
3683         write_debug('exe versions: %s' % exe_str)
3684
3685         from .compat.compat_utils import get_package_info
3686         from .dependencies import available_dependencies
3687
3688         write_debug('Optional libraries: %s' % (', '.join(sorted({
3689             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3690         })) or 'none'))
3691
3692         self._setup_opener()
3693         proxy_map = {}
3694         for handler in self._opener.handlers:
3695             if hasattr(handler, 'proxies'):
3696                 proxy_map.update(handler.proxies)
3697         write_debug(f'Proxy map: {proxy_map}')
3698
3699         # Not implemented
3700         if False and self.params.get('call_home'):
3701             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3702             write_debug('Public IP address: %s' % ipaddr)
3703             latest_version = self.urlopen(
3704                 'https://yt-dl.org/latest/version').read().decode()
3705             if version_tuple(latest_version) > version_tuple(__version__):
3706                 self.report_warning(
3707                     'You are using an outdated version (newest version: %s)! '
3708                     'See https://yt-dl.org/update if you need help updating.' %
3709                     latest_version)
3710
3711     def _setup_opener(self):
3712         if hasattr(self, '_opener'):
3713             return
3714         timeout_val = self.params.get('socket_timeout')
3715         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3716
3717         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3718         opts_cookiefile = self.params.get('cookiefile')
3719         opts_proxy = self.params.get('proxy')
3720
3721         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3722
3723         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3724         if opts_proxy is not None:
3725             if opts_proxy == '':
3726                 proxies = {}
3727             else:
3728                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3729         else:
3730             proxies = urllib.request.getproxies()
3731             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3732             if 'http' in proxies and 'https' not in proxies:
3733                 proxies['https'] = proxies['http']
3734         proxy_handler = PerRequestProxyHandler(proxies)
3735
3736         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3737         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3738         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3739         redirect_handler = YoutubeDLRedirectHandler()
3740         data_handler = urllib.request.DataHandler()
3741
3742         # When passing our own FileHandler instance, build_opener won't add the
3743         # default FileHandler and allows us to disable the file protocol, which
3744         # can be used for malicious purposes (see
3745         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3746         file_handler = urllib.request.FileHandler()
3747
3748         def file_open(*args, **kwargs):
3749             raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3750         file_handler.file_open = file_open
3751
3752         opener = urllib.request.build_opener(
3753             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3754
3755         # Delete the default user-agent header, which would otherwise apply in
3756         # cases where our custom HTTP handler doesn't come into play
3757         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3758         opener.addheaders = []
3759         self._opener = opener
3760
3761     def encode(self, s):
3762         if isinstance(s, bytes):
3763             return s  # Already encoded
3764
3765         try:
3766             return s.encode(self.get_encoding())
3767         except UnicodeEncodeError as err:
3768             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3769             raise
3770
3771     def get_encoding(self):
3772         encoding = self.params.get('encoding')
3773         if encoding is None:
3774             encoding = preferredencoding()
3775         return encoding
3776
3777     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3778         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3779         if overwrite is None:
3780             overwrite = self.params.get('overwrites', True)
3781         if not self.params.get('writeinfojson'):
3782             return False
3783         elif not infofn:
3784             self.write_debug(f'Skipping writing {label} infojson')
3785             return False
3786         elif not self._ensure_dir_exists(infofn):
3787             return None
3788         elif not overwrite and os.path.exists(infofn):
3789             self.to_screen(f'[info] {label.title()} metadata is already present')
3790             return 'exists'
3791
3792         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3793         try:
3794             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3795             return True
3796         except OSError:
3797             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3798             return None
3799
3800     def _write_description(self, label, ie_result, descfn):
3801         ''' Write description and returns True = written, False = skip, None = error '''
3802         if not self.params.get('writedescription'):
3803             return False
3804         elif not descfn:
3805             self.write_debug(f'Skipping writing {label} description')
3806             return False
3807         elif not self._ensure_dir_exists(descfn):
3808             return None
3809         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3810             self.to_screen(f'[info] {label.title()} description is already present')
3811         elif ie_result.get('description') is None:
3812             self.report_warning(f'There\'s no {label} description to write')
3813             return False
3814         else:
3815             try:
3816                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3817                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3818                     descfile.write(ie_result['description'])
3819             except OSError:
3820                 self.report_error(f'Cannot write {label} description file {descfn}')
3821                 return None
3822         return True
3823
3824     def _write_subtitles(self, info_dict, filename):
3825         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3826         ret = []
3827         subtitles = info_dict.get('requested_subtitles')
3828         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3829             # subtitles download errors are already managed as troubles in relevant IE
3830             # that way it will silently go on when used with unsupporting IE
3831             return ret
3832
3833         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3834         if not sub_filename_base:
3835             self.to_screen('[info] Skipping writing video subtitles')
3836             return ret
3837         for sub_lang, sub_info in subtitles.items():
3838             sub_format = sub_info['ext']
3839             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3840             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3841             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3842             if existing_sub:
3843                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3844                 sub_info['filepath'] = existing_sub
3845                 ret.append((existing_sub, sub_filename_final))
3846                 continue
3847
3848             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3849             if sub_info.get('data') is not None:
3850                 try:
3851                     # Use newline='' to prevent conversion of newline characters
3852                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3853                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3854                         subfile.write(sub_info['data'])
3855                     sub_info['filepath'] = sub_filename
3856                     ret.append((sub_filename, sub_filename_final))
3857                     continue
3858                 except OSError:
3859                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3860                     return None
3861
3862             try:
3863                 sub_copy = sub_info.copy()
3864                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3865                 self.dl(sub_filename, sub_copy, subtitle=True)
3866                 sub_info['filepath'] = sub_filename
3867                 ret.append((sub_filename, sub_filename_final))
3868             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3869                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3870                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3871                     if not self.params.get('ignoreerrors'):
3872                         self.report_error(msg)
3873                     raise DownloadError(msg)
3874                 self.report_warning(msg)
3875         return ret
3876
3877     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3878         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3879         write_all = self.params.get('write_all_thumbnails', False)
3880         thumbnails, ret = [], []
3881         if write_all or self.params.get('writethumbnail', False):
3882             thumbnails = info_dict.get('thumbnails') or []
3883         multiple = write_all and len(thumbnails) > 1
3884
3885         if thumb_filename_base is None:
3886             thumb_filename_base = filename
3887         if thumbnails and not thumb_filename_base:
3888             self.write_debug(f'Skipping writing {label} thumbnail')
3889             return ret
3890
3891         for idx, t in list(enumerate(thumbnails))[::-1]:
3892             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3893             thumb_display_id = f'{label} thumbnail {t["id"]}'
3894             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3895             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3896
3897             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3898             if existing_thumb:
3899                 self.to_screen('[info] %s is already present' % (
3900                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3901                 t['filepath'] = existing_thumb
3902                 ret.append((existing_thumb, thumb_filename_final))
3903             else:
3904                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3905                 try:
3906                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3907                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3908                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3909                         shutil.copyfileobj(uf, thumbf)
3910                     ret.append((thumb_filename, thumb_filename_final))
3911                     t['filepath'] = thumb_filename
3912                 except network_exceptions as err:
3913                     thumbnails.pop(idx)
3914                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3915             if ret and not write_all:
3916                 break
3917         return ret