yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 import collections
   3 import contextlib
   4 import datetime
   5 import errno
   6 import fileinput
   7 import functools
   8 import io
   9 import itertools
  10 import json
  11 import locale
  12 import operator
  13 import os
  14 import platform
  15 import random
  16 import re
  17 import shutil
  18 import subprocess
  19 import sys
  20 import tempfile
  21 import time
  22 import tokenize
  23 import traceback
  24 import unicodedata
  25 import urllib.request
  26 from string import ascii_letters
  27
  28 from .cache import Cache
  29 from .compat import (
  30     HAS_LEGACY as compat_has_legacy,
  31     compat_get_terminal_size,
  32     compat_os_name,
  33     compat_shlex_quote,
  34     compat_str,
  35     compat_urllib_error,
  36     compat_urllib_request,
  37 )
  38 from .cookies import load_cookies
  39 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  40 from .downloader.rtmp import rtmpdump_version
  41 from .extractor import gen_extractor_classes, get_info_extractor
  42 from .extractor.openload import PhantomJSwrapper
  43 from .minicurses import format_text
  44 from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
  45 from .postprocessor import (
  46     EmbedThumbnailPP,
  47     FFmpegFixupDuplicateMoovPP,
  48     FFmpegFixupDurationPP,
  49     FFmpegFixupM3u8PP,
  50     FFmpegFixupM4aPP,
  51     FFmpegFixupStretchedPP,
  52     FFmpegFixupTimestampPP,
  53     FFmpegMergerPP,
  54     FFmpegPostProcessor,
  55     MoveFilesAfterDownloadPP,
  56     get_postprocessor,
  57 )
  58 from .update import detect_variant
  59 from .utils import (
  60     DEFAULT_OUTTMPL,
  61     LINK_TEMPLATES,
  62     NO_DEFAULT,
  63     NUMBER_RE,
  64     OUTTMPL_TYPES,
  65     POSTPROCESS_WHEN,
  66     STR_FORMAT_RE_TMPL,
  67     STR_FORMAT_TYPES,
  68     ContentTooShortError,
  69     DateRange,
  70     DownloadCancelled,
  71     DownloadError,
  72     EntryNotInPlaylist,
  73     ExistingVideoReached,
  74     ExtractorError,
  75     GeoRestrictedError,
  76     HEADRequest,
  77     ISO3166Utils,
  78     LazyList,
  79     MaxDownloadsReached,
  80     Namespace,
  81     PagedList,
  82     PerRequestProxyHandler,
  83     PlaylistEntries,
  84     Popen,
  85     PostProcessingError,
  86     ReExtractInfo,
  87     RejectedVideoReached,
  88     SameFileError,
  89     UnavailableVideoError,
  90     YoutubeDLCookieProcessor,
  91     YoutubeDLHandler,
  92     YoutubeDLRedirectHandler,
  93     age_restricted,
  94     args_to_str,
  95     date_from_str,
  96     determine_ext,
  97     determine_protocol,
  98     encode_compat_str,
  99     encodeFilename,
 100     error_to_compat_str,
 101     expand_path,
 102     filter_dict,
 103     float_or_none,
 104     format_bytes,
 105     format_decimal_suffix,
 106     format_field,
 107     formatSeconds,
 108     get_domain,
 109     int_or_none,
 110     iri_to_uri,
 111     join_nonempty,
 112     locked_file,
 113     make_dir,
 114     make_HTTPS_handler,
 115     merge_headers,
 116     network_exceptions,
 117     number_of_digits,
 118     orderedSet,
 119     parse_filesize,
 120     platform_name,
 121     preferredencoding,
 122     prepend_extension,
 123     register_socks_protocols,
 124     remove_terminal_sequences,
 125     render_table,
 126     replace_extension,
 127     sanitize_filename,
 128     sanitize_path,
 129     sanitize_url,
 130     sanitized_Request,
 131     std_headers,
 132     str_or_none,
 133     strftime_or_none,
 134     subtitles_filename,
 135     supports_terminal_sequences,
 136     timetuple_from_msec,
 137     to_high_limit_path,
 138     traverse_obj,
 139     try_get,
 140     url_basename,
 141     variadic,
 142     version_tuple,
 143     windows_enable_vt_mode,
 144     write_json_file,
 145     write_string,
 146 )
 147 from .version import RELEASE_GIT_HEAD, __version__
 148
 149 if compat_os_name == 'nt':
 150     import ctypes
 151
 152
 153 class YoutubeDL:
 154     """YoutubeDL class.
 155
 156     YoutubeDL objects are the ones responsible of downloading the
 157     actual video file and writing it to disk if the user has requested
 158     it, among some other tasks. In most cases there should be one per
 159     program. As, given a video URL, the downloader doesn't know how to
 160     extract all the needed information, task that InfoExtractors do, it
 161     has to pass the URL to one of them.
 162
 163     For this, YoutubeDL objects have a method that allows
 164     InfoExtractors to be registered in a given order. When it is passed
 165     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 166     finds that reports being able to handle it. The InfoExtractor extracts
 167     all the information about the video or videos the URL refers to, and
 168     YoutubeDL process the extracted information, possibly using a File
 169     Downloader to download the video.
 170
 171     YoutubeDL objects accept a lot of parameters. In order not to saturate
 172     the object constructor with arguments, it receives a dictionary of
 173     options instead. These options are available through the params
 174     attribute for the InfoExtractors to use. The YoutubeDL also
 175     registers itself as the downloader in charge for the InfoExtractors
 176     that are added to it, so this is a "mutual registration".
 177
 178     Available options:
 179
 180     username:          Username for authentication purposes.
 181     password:          Password for authentication purposes.
 182     videopassword:     Password for accessing a video.
 183     ap_mso:            Adobe Pass multiple-system operator identifier.
 184     ap_username:       Multiple-system operator account username.
 185     ap_password:       Multiple-system operator account password.
 186     usenetrc:          Use netrc for authentication instead.
 187     verbose:           Print additional info to stdout.
 188     quiet:             Do not print messages to stdout.
 189     no_warnings:       Do not print out anything for warnings.
 190     forceprint:        A dict with keys WHEN mapped to a list of templates to
 191                        print to stdout. The allowed keys are video or any of the
 192                        items in utils.POSTPROCESS_WHEN.
 193                        For compatibility, a single list is also accepted
 194     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 195                        a list of tuples with (template, filename)
 196     forcejson:         Force printing info_dict as JSON.
 197     dump_single_json:  Force printing the info_dict of the whole playlist
 198                        (or video) as a single JSON line.
 199     force_write_download_archive: Force writing download archive regardless
 200                        of 'skip_download' or 'simulate'.
 201     simulate:          Do not download the video files. If unset (or None),
 202                        simulate only if listsubtitles, listformats or list_thumbnails is used
 203     format:            Video format code. see "FORMAT SELECTION" for more details.
 204                        You can also pass a function. The function takes 'ctx' as
 205                        argument and returns the formats to download.
 206                        See "build_format_selector" for an implementation
 207     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 208     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 209                        extracting metadata even if the video is not actually
 210                        available for download (experimental)
 211     format_sort:       A list of fields by which to sort the video formats.
 212                        See "Sorting Formats" for more details.
 213     format_sort_force: Force the given format_sort. see "Sorting Formats"
 214                        for more details.
 215     prefer_free_formats: Whether to prefer video formats with free containers
 216                        over non-free ones of same quality.
 217     allow_multiple_video_streams:   Allow multiple video streams to be merged
 218                        into a single file
 219     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 220                        into a single file
 221     check_formats      Whether to test if the formats are downloadable.
 222                        Can be True (check all), False (check none),
 223                        'selected' (check selected formats),
 224                        or None (check only if requested by extractor)
 225     paths:             Dictionary of output paths. The allowed keys are 'home'
 226                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 227     outtmpl:           Dictionary of templates for output names. Allowed keys
 228                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 229                        For compatibility with youtube-dl, a single string can also be used
 230     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 231     restrictfilenames: Do not allow "&" and spaces in file names
 232     trim_file_name:    Limit length of filename (extension excluded)
 233     windowsfilenames:  Force the filenames to be windows compatible
 234     ignoreerrors:      Do not stop on download/postprocessing errors.
 235                        Can be 'only_download' to ignore only download errors.
 236                        Default is 'only_download' for CLI, but False for API
 237     skip_playlist_after_errors: Number of allowed failures until the rest of
 238                        the playlist is skipped
 239     force_generic_extractor: Force downloader to use the generic extractor
 240     overwrites:        Overwrite all video and metadata files if True,
 241                        overwrite only non-video files if None
 242                        and don't overwrite any file if False
 243                        For compatibility with youtube-dl,
 244                        "nooverwrites" may also be used instead
 245     playlist_items:    Specific indices of playlist to download.
 246     playlistrandom:    Download playlist items in random order.
 247     lazy_playlist:     Process playlist entries as they are received.
 248     matchtitle:        Download only matching titles.
 249     rejecttitle:       Reject downloads for matching titles.
 250     logger:            Log messages to a logging.Logger instance.
 251     logtostderr:       Log messages to stderr instead of stdout.
 252     consoletitle:       Display progress in console window's titlebar.
 253     writedescription:  Write the video description to a .description file
 254     writeinfojson:     Write the video description to a .info.json file
 255     clean_infojson:    Remove private fields from the infojson
 256     getcomments:       Extract video comments. This will not be written to disk
 257                        unless writeinfojson is also given
 258     writeannotations:  Write the video annotations to a .annotations.xml file
 259     writethumbnail:    Write the thumbnail image to a file
 260     allow_playlist_files: Whether to write playlists' description, infojson etc
 261                        also to disk when using the 'write*' options
 262     write_all_thumbnails:  Write all thumbnail formats to files
 263     writelink:         Write an internet shortcut file, depending on the
 264                        current platform (.url/.webloc/.desktop)
 265     writeurllink:      Write a Windows internet shortcut file (.url)
 266     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 267     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 268     writesubtitles:    Write the video subtitles to a file
 269     writeautomaticsub: Write the automatically generated subtitles to a file
 270     listsubtitles:     Lists all available subtitles for the video
 271     subtitlesformat:   The format code for subtitles
 272     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 273                        The list may contain "all" to refer to all the available
 274                        subtitles. The language can be prefixed with a "-" to
 275                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 276     keepvideo:         Keep the video file after post-processing
 277     daterange:         A DateRange object, download only if the upload_date is in the range.
 278     skip_download:     Skip the actual download of the video file
 279     cachedir:          Location of the cache files in the filesystem.
 280                        False to disable filesystem cache.
 281     noplaylist:        Download single video instead of a playlist if in doubt.
 282     age_limit:         An integer representing the user's age in years.
 283                        Unsuitable videos for the given age are skipped.
 284     min_views:         An integer representing the minimum view count the video
 285                        must have in order to not be skipped.
 286                        Videos without view count information are always
 287                        downloaded. None for no limit.
 288     max_views:         An integer representing the maximum view count.
 289                        Videos that are more popular than that are not
 290                        downloaded.
 291                        Videos without view count information are always
 292                        downloaded. None for no limit.
 293     download_archive:  File name of a file where all downloads are recorded.
 294                        Videos already present in the file are not downloaded
 295                        again.
 296     break_on_existing: Stop the download process after attempting to download a
 297                        file that is in the archive.
 298     break_on_reject:   Stop the download process when encountering a video that
 299                        has been filtered out.
 300     break_per_url:     Whether break_on_reject and break_on_existing
 301                        should act on each input URL as opposed to for the entire queue
 302     cookiefile:        File name or text stream from where cookies should be read and dumped to
 303     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 304                        name/pathfrom where cookies are loaded, and the name of the
 305                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 306     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 307                        support RFC 5746 secure renegotiation
 308     nocheckcertificate:  Do not verify SSL certificates
 309     client_certificate:  Path to client certificate file in PEM format. May include the private key
 310     client_certificate_key:  Path to private key file for client certificate
 311     client_certificate_password:  Password for client certificate private key, if encrypted.
 312                         If not provided and the key is encrypted, yt-dlp will ask interactively
 313     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 314                        At the moment, this is only supported by YouTube.
 315     http_headers:      A dictionary of custom headers to be used for all requests
 316     proxy:             URL of the proxy server to use
 317     geo_verification_proxy:  URL of the proxy to use for IP address verification
 318                        on geo-restricted sites.
 319     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 320     bidi_workaround:   Work around buggy terminals without bidirectional text
 321                        support, using fridibi
 322     debug_printtraffic:Print out sent and received HTTP traffic
 323     default_search:    Prepend this string if an input url is not valid.
 324                        'auto' for elaborate guessing
 325     encoding:          Use this encoding instead of the system-specified.
 326     extract_flat:      Do not resolve URLs, return the immediate result.
 327                        Pass in 'in_playlist' to only show this behavior for
 328                        playlist items.
 329     wait_for_video:    If given, wait for scheduled streams to become available.
 330                        The value should be a tuple containing the range
 331                        (min_secs, max_secs) to wait between retries
 332     postprocessors:    A list of dictionaries, each with an entry
 333                        * key:  The name of the postprocessor. See
 334                                yt_dlp/postprocessor/__init__.py for a list.
 335                        * when: When to run the postprocessor. Allowed values are
 336                                the entries of utils.POSTPROCESS_WHEN
 337                                Assumed to be 'post_process' if not given
 338     progress_hooks:    A list of functions that get called on download
 339                        progress, with a dictionary with the entries
 340                        * status: One of "downloading", "error", or "finished".
 341                                  Check this first and ignore unknown values.
 342                        * info_dict: The extracted info_dict
 343
 344                        If status is one of "downloading", or "finished", the
 345                        following properties may also be present:
 346                        * filename: The final filename (always present)
 347                        * tmpfilename: The filename we're currently writing to
 348                        * downloaded_bytes: Bytes on disk
 349                        * total_bytes: Size of the whole file, None if unknown
 350                        * total_bytes_estimate: Guess of the eventual file size,
 351                                                None if unavailable.
 352                        * elapsed: The number of seconds since download started.
 353                        * eta: The estimated time in seconds, None if unknown
 354                        * speed: The download speed in bytes/second, None if
 355                                 unknown
 356                        * fragment_index: The counter of the currently
 357                                          downloaded video fragment.
 358                        * fragment_count: The number of fragments (= individual
 359                                          files that will be merged)
 360
 361                        Progress hooks are guaranteed to be called at least once
 362                        (with status "finished") if the download is successful.
 363     postprocessor_hooks:  A list of functions that get called on postprocessing
 364                        progress, with a dictionary with the entries
 365                        * status: One of "started", "processing", or "finished".
 366                                  Check this first and ignore unknown values.
 367                        * postprocessor: Name of the postprocessor
 368                        * info_dict: The extracted info_dict
 369
 370                        Progress hooks are guaranteed to be called at least twice
 371                        (with status "started" and "finished") if the processing is successful.
 372     merge_output_format: Extension to use when merging formats.
 373     final_ext:         Expected final extension; used to detect when the file was
 374                        already downloaded and converted
 375     fixup:             Automatically correct known faults of the file.
 376                        One of:
 377                        - "never": do nothing
 378                        - "warn": only emit a warning
 379                        - "detect_or_warn": check whether we can do anything
 380                                            about it, warn otherwise (default)
 381     source_address:    Client-side IP address to bind to.
 382     sleep_interval_requests: Number of seconds to sleep between requests
 383                        during extraction
 384     sleep_interval:    Number of seconds to sleep before each download when
 385                        used alone or a lower bound of a range for randomized
 386                        sleep before each download (minimum possible number
 387                        of seconds to sleep) when used along with
 388                        max_sleep_interval.
 389     max_sleep_interval:Upper bound of a range for randomized sleep before each
 390                        download (maximum possible number of seconds to sleep).
 391                        Must only be used along with sleep_interval.
 392                        Actual sleep time will be a random float from range
 393                        [sleep_interval; max_sleep_interval].
 394     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 395     listformats:       Print an overview of available video formats and exit.
 396     list_thumbnails:   Print a table of all thumbnails and exit.
 397     match_filter:      A function that gets called for every video with the signature
 398                        (info_dict, *, incomplete: bool) -> Optional[str]
 399                        For backward compatibility with youtube-dl, the signature
 400                        (info_dict) -> Optional[str] is also allowed.
 401                        - If it returns a message, the video is ignored.
 402                        - If it returns None, the video is downloaded.
 403                        - If it returns utils.NO_DEFAULT, the user is interactively
 404                          asked whether to download the video.
 405                        match_filter_func in utils.py is one example for this.
 406     no_color:          Do not emit color codes in output.
 407     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 408                        HTTP header
 409     geo_bypass_country:
 410                        Two-letter ISO 3166-2 country code that will be used for
 411                        explicit geographic restriction bypassing via faking
 412                        X-Forwarded-For HTTP header
 413     geo_bypass_ip_block:
 414                        IP range in CIDR notation that will be used similarly to
 415                        geo_bypass_country
 416     external_downloader: A dictionary of protocol keys and the executable of the
 417                        external downloader to use for it. The allowed protocols
 418                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 419                        Set the value to 'native' to use the native downloader
 420     compat_opts:       Compatibility options. See "Differences in default behavior".
 421                        The following options do not work when used through the API:
 422                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 423                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 424                        Refer __init__.py for their implementation
 425     progress_template: Dictionary of templates for progress outputs.
 426                        Allowed keys are 'download', 'postprocess',
 427                        'download-title' (console title) and 'postprocess-title'.
 428                        The template is mapped on a dictionary with keys 'progress' and 'info'
 429     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 430                        as argument and returns the time to sleep in seconds.
 431                        Allowed keys are 'http', 'fragment', 'file_access'
 432     download_ranges:   A function that gets called for every video with the signature
 433                        (info_dict, *, ydl) -> Iterable[Section].
 434                        Only the returned sections will be downloaded. Each Section contains:
 435                        * start_time: Start time of the section in seconds
 436                        * end_time: End time of the section in seconds
 437                        * title: Section title (Optional)
 438                        * index: Section number (Optional)
 439
 440     The following parameters are not used by YoutubeDL itself, they are used by
 441     the downloader (see yt_dlp/downloader/common.py):
 442     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 443     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 444     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 445     external_downloader_args, concurrent_fragment_downloads.
 446
 447     The following options are used by the post processors:
 448     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 449                        to the binary or its containing directory.
 450     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 451                        and a list of additional command-line arguments for the
 452                        postprocessor/executable. The dict can also have "PP+EXE" keys
 453                        which are used when the given exe is used by the given PP.
 454                        Use 'default' as the name for arguments to passed to all PP
 455                        For compatibility with youtube-dl, a single list of args
 456                        can also be used
 457
 458     The following options are used by the extractors:
 459     extractor_retries: Number of times to retry for known errors
 460     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 461     hls_split_discontinuity: Split HLS playlists to different formats at
 462                        discontinuities such as ad breaks (default: False)
 463     extractor_args:    A dictionary of arguments to be passed to the extractors.
 464                        See "EXTRACTOR ARGUMENTS" for details.
 465                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 466     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 467
 468     The following options are deprecated and may be removed in the future:
 469
 470     playliststart:     - Use playlist_items
 471                        Playlist item to start at.
 472     playlistend:       - Use playlist_items
 473                        Playlist item to end at.
 474     playlistreverse:   - Use playlist_items
 475                        Download playlist items in reverse order.
 476     forceurl:          - Use forceprint
 477                        Force printing final URL.
 478     forcetitle:        - Use forceprint
 479                        Force printing title.
 480     forceid:           - Use forceprint
 481                        Force printing ID.
 482     forcethumbnail:    - Use forceprint
 483                        Force printing thumbnail URL.
 484     forcedescription:  - Use forceprint
 485                        Force printing description.
 486     forcefilename:     - Use forceprint
 487                        Force printing final filename.
 488     forceduration:     - Use forceprint
 489                        Force printing duration.
 490     allsubtitles:      - Use subtitleslangs = ['all']
 491                        Downloads all the subtitles of the video
 492                        (requires writesubtitles or writeautomaticsub)
 493     include_ads:       - Doesn't work
 494                        Download ads as well
 495     call_home:         - Not implemented
 496                        Boolean, true iff we are allowed to contact the
 497                        yt-dlp servers for debugging.
 498     post_hooks:        - Register a custom postprocessor
 499                        A list of functions that get called as the final step
 500                        for each video file, after all postprocessors have been
 501                        called. The filename will be passed as the only argument.
 502     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 503                        Use the native HLS downloader instead of ffmpeg/avconv
 504                        if True, otherwise use ffmpeg/avconv if False, otherwise
 505                        use downloader suggested by extractor if None.
 506     prefer_ffmpeg:     - avconv support is deprecated
 507                        If False, use avconv instead of ffmpeg if both are available,
 508                        otherwise prefer ffmpeg.
 509     youtube_include_dash_manifest: - Use extractor_args
 510                        If True (default), DASH manifests and related
 511                        data will be downloaded and processed by extractor.
 512                        You can reduce network I/O by disabling it if you don't
 513                        care about DASH. (only for youtube)
 514     youtube_include_hls_manifest: - Use extractor_args
 515                        If True (default), HLS manifests and related
 516                        data will be downloaded and processed by extractor.
 517                        You can reduce network I/O by disabling it if you don't
 518                        care about HLS. (only for youtube)
 519     """
 520
 521     _NUMERIC_FIELDS = {
 522         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 523         'timestamp', 'release_timestamp',
 524         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 525         'average_rating', 'comment_count', 'age_limit',
 526         'start_time', 'end_time',
 527         'chapter_number', 'season_number', 'episode_number',
 528         'track_number', 'disc_number', 'release_year',
 529     }
 530
 531     _format_fields = {
 532         # NB: Keep in sync with the docstring of extractor/common.py
 533         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 534         'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
 535         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
 536         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 537         'preference', 'language', 'language_preference', 'quality', 'source_preference',
 538         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
 539         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 540     }
 541     _format_selection_exts = {
 542         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 543         'video': {'mp4', 'flv', 'webm', '3gp'},
 544         'storyboards': {'mhtml'},
 545     }
 546
 547     def __init__(self, params=None, auto_init=True):
 548         """Create a FileDownloader object with the given options.
 549         @param auto_init    Whether to load the default extractors and print header (if verbose).
 550                             Set to 'no_verbose_header' to not print the header
 551         """
 552         if params is None:
 553             params = {}
 554         self.params = params
 555         self._ies = {}
 556         self._ies_instances = {}
 557         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 558         self._printed_messages = set()
 559         self._first_webpage_request = True
 560         self._post_hooks = []
 561         self._progress_hooks = []
 562         self._postprocessor_hooks = []
 563         self._download_retcode = 0
 564         self._num_downloads = 0
 565         self._num_videos = 0
 566         self._playlist_level = 0
 567         self._playlist_urls = set()
 568         self.cache = Cache(self)
 569
 570         windows_enable_vt_mode()
 571         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 572         self._out_files = Namespace(
 573             out=stdout,
 574             error=sys.stderr,
 575             screen=sys.stderr if self.params.get('quiet') else stdout,
 576             console=None if compat_os_name == 'nt' else next(
 577                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 578         )
 579         self._allow_colors = Namespace(**{
 580             type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
 581             for type_, stream in self._out_files.items_ if type_ != 'console'
 582         })
 583
 584         if sys.version_info < (3, 6):
 585             self.report_warning(
 586                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 587
 588         if self.params.get('allow_unplayable_formats'):
 589             self.report_warning(
 590                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 591                 'This is a developer option intended for debugging. \n'
 592                 '         If you experience any issues while using this option, '
 593                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 594
 595         def check_deprecated(param, option, suggestion):
 596             if self.params.get(param) is not None:
 597                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 598                 return True
 599             return False
 600
 601         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 602             if self.params.get('geo_verification_proxy') is None:
 603                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 604
 605         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 606         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 607         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 608
 609         for msg in self.params.get('_warnings', []):
 610             self.report_warning(msg)
 611         for msg in self.params.get('_deprecation_warnings', []):
 612             self.deprecation_warning(msg)
 613
 614         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 615         if not compat_has_legacy:
 616             self.params['compat_opts'].add('no-compat-legacy')
 617         if 'list-formats' in self.params['compat_opts']:
 618             self.params['listformats_table'] = False
 619
 620         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 621             # nooverwrites was unnecessarily changed to overwrites
 622             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 623             # This ensures compatibility with both keys
 624             self.params['overwrites'] = not self.params['nooverwrites']
 625         elif self.params.get('overwrites') is None:
 626             self.params.pop('overwrites', None)
 627         else:
 628             self.params['nooverwrites'] = not self.params['overwrites']
 629
 630         self.params.setdefault('forceprint', {})
 631         self.params.setdefault('print_to_file', {})
 632
 633         # Compatibility with older syntax
 634         if not isinstance(params['forceprint'], dict):
 635             self.params['forceprint'] = {'video': params['forceprint']}
 636
 637         if self.params.get('bidi_workaround', False):
 638             try:
 639                 import pty
 640                 master, slave = pty.openpty()
 641                 width = compat_get_terminal_size().columns
 642                 width_args = [] if width is None else ['-w', str(width)]
 643                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 644                 try:
 645                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 646                 except OSError:
 647                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 648                 self._output_channel = os.fdopen(master, 'rb')
 649             except OSError as ose:
 650                 if ose.errno == errno.ENOENT:
 651                     self.report_warning(
 652                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 653                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 654                 else:
 655                     raise
 656
 657         if auto_init:
 658             if auto_init != 'no_verbose_header':
 659                 self.print_debug_header()
 660             self.add_default_info_extractors()
 661
 662         if (sys.platform != 'win32'
 663                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 664                 and not self.params.get('restrictfilenames', False)):
 665             # Unicode filesystem API will throw errors (#1474, #13027)
 666             self.report_warning(
 667                 'Assuming --restrict-filenames since file system encoding '
 668                 'cannot encode all characters. '
 669                 'Set the LC_ALL environment variable to fix this.')
 670             self.params['restrictfilenames'] = True
 671
 672         self._parse_outtmpl()
 673
 674         # Creating format selector here allows us to catch syntax errors before the extraction
 675         self.format_selector = (
 676             self.params.get('format') if self.params.get('format') in (None, '-')
 677             else self.params['format'] if callable(self.params['format'])
 678             else self.build_format_selector(self.params['format']))
 679
 680         # Set http_headers defaults according to std_headers
 681         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 682
 683         hooks = {
 684             'post_hooks': self.add_post_hook,
 685             'progress_hooks': self.add_progress_hook,
 686             'postprocessor_hooks': self.add_postprocessor_hook,
 687         }
 688         for opt, fn in hooks.items():
 689             for ph in self.params.get(opt, []):
 690                 fn(ph)
 691
 692         for pp_def_raw in self.params.get('postprocessors', []):
 693             pp_def = dict(pp_def_raw)
 694             when = pp_def.pop('when', 'post_process')
 695             self.add_post_processor(
 696                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 697                 when=when)
 698
 699         self._setup_opener()
 700         register_socks_protocols()
 701
 702         def preload_download_archive(fn):
 703             """Preload the archive, if any is specified"""
 704             if fn is None:
 705                 return False
 706             self.write_debug(f'Loading archive file {fn!r}')
 707             try:
 708                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 709                     for line in archive_file:
 710                         self.archive.add(line.strip())
 711             except OSError as ioe:
 712                 if ioe.errno != errno.ENOENT:
 713                     raise
 714                 return False
 715             return True
 716
 717         self.archive = set()
 718         preload_download_archive(self.params.get('download_archive'))
 719
 720     def warn_if_short_id(self, argv):
 721         # short YouTube ID starting with dash?
 722         idxs = [
 723             i for i, a in enumerate(argv)
 724             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 725         if idxs:
 726             correct_argv = (
 727                 ['yt-dlp']
 728                 + [a for i, a in enumerate(argv) if i not in idxs]
 729                 + ['--'] + [argv[i] for i in idxs]
 730             )
 731             self.report_warning(
 732                 'Long argument string detected. '
 733                 'Use -- to separate parameters and URLs, like this:\n%s' %
 734                 args_to_str(correct_argv))
 735
 736     def add_info_extractor(self, ie):
 737         """Add an InfoExtractor object to the end of the list."""
 738         ie_key = ie.ie_key()
 739         self._ies[ie_key] = ie
 740         if not isinstance(ie, type):
 741             self._ies_instances[ie_key] = ie
 742             ie.set_downloader(self)
 743
 744     def _get_info_extractor_class(self, ie_key):
 745         ie = self._ies.get(ie_key)
 746         if ie is None:
 747             ie = get_info_extractor(ie_key)
 748             self.add_info_extractor(ie)
 749         return ie
 750
 751     def get_info_extractor(self, ie_key):
 752         """
 753         Get an instance of an IE with name ie_key, it will try to get one from
 754         the _ies list, if there's no instance it will create a new one and add
 755         it to the extractor list.
 756         """
 757         ie = self._ies_instances.get(ie_key)
 758         if ie is None:
 759             ie = get_info_extractor(ie_key)()
 760             self.add_info_extractor(ie)
 761         return ie
 762
 763     def add_default_info_extractors(self):
 764         """
 765         Add the InfoExtractors returned by gen_extractors to the end of the list
 766         """
 767         for ie in gen_extractor_classes():
 768             self.add_info_extractor(ie)
 769
 770     def add_post_processor(self, pp, when='post_process'):
 771         """Add a PostProcessor object to the end of the chain."""
 772         self._pps[when].append(pp)
 773         pp.set_downloader(self)
 774
 775     def add_post_hook(self, ph):
 776         """Add the post hook"""
 777         self._post_hooks.append(ph)
 778
 779     def add_progress_hook(self, ph):
 780         """Add the download progress hook"""
 781         self._progress_hooks.append(ph)
 782
 783     def add_postprocessor_hook(self, ph):
 784         """Add the postprocessing progress hook"""
 785         self._postprocessor_hooks.append(ph)
 786         for pps in self._pps.values():
 787             for pp in pps:
 788                 pp.add_progress_hook(ph)
 789
 790     def _bidi_workaround(self, message):
 791         if not hasattr(self, '_output_channel'):
 792             return message
 793
 794         assert hasattr(self, '_output_process')
 795         assert isinstance(message, compat_str)
 796         line_count = message.count('\n') + 1
 797         self._output_process.stdin.write((message + '\n').encode())
 798         self._output_process.stdin.flush()
 799         res = ''.join(self._output_channel.readline().decode()
 800                       for _ in range(line_count))
 801         return res[:-len('\n')]
 802
 803     def _write_string(self, message, out=None, only_once=False):
 804         if only_once:
 805             if message in self._printed_messages:
 806                 return
 807             self._printed_messages.add(message)
 808         write_string(message, out=out, encoding=self.params.get('encoding'))
 809
 810     def to_stdout(self, message, skip_eol=False, quiet=None):
 811         """Print message to stdout"""
 812         if quiet is not None:
 813             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
 814         if skip_eol is not False:
 815             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
 816         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 817
 818     def to_screen(self, message, skip_eol=False, quiet=None):
 819         """Print message to screen if not in quiet mode"""
 820         if self.params.get('logger'):
 821             self.params['logger'].debug(message)
 822             return
 823         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 824             return
 825         self._write_string(
 826             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 827             self._out_files.screen)
 828
 829     def to_stderr(self, message, only_once=False):
 830         """Print message to stderr"""
 831         assert isinstance(message, compat_str)
 832         if self.params.get('logger'):
 833             self.params['logger'].error(message)
 834         else:
 835             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 836
 837     def _send_console_code(self, code):
 838         if compat_os_name == 'nt' or not self._out_files.console:
 839             return
 840         self._write_string(code, self._out_files.console)
 841
 842     def to_console_title(self, message):
 843         if not self.params.get('consoletitle', False):
 844             return
 845         message = remove_terminal_sequences(message)
 846         if compat_os_name == 'nt':
 847             if ctypes.windll.kernel32.GetConsoleWindow():
 848                 # c_wchar_p() might not be necessary if `message` is
 849                 # already of type unicode()
 850                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 851         else:
 852             self._send_console_code(f'\033]0;{message}\007')
 853
 854     def save_console_title(self):
 855         if not self.params.get('consoletitle') or self.params.get('simulate'):
 856             return
 857         self._send_console_code('\033[22;0t')  # Save the title on stack
 858
 859     def restore_console_title(self):
 860         if not self.params.get('consoletitle') or self.params.get('simulate'):
 861             return
 862         self._send_console_code('\033[23;0t')  # Restore the title from stack
 863
 864     def __enter__(self):
 865         self.save_console_title()
 866         return self
 867
 868     def __exit__(self, *args):
 869         self.restore_console_title()
 870
 871         if self.params.get('cookiefile') is not None:
 872             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 873
 874     def trouble(self, message=None, tb=None, is_error=True):
 875         """Determine action to take when a download problem appears.
 876
 877         Depending on if the downloader has been configured to ignore
 878         download errors or not, this method may throw an exception or
 879         not when errors are found, after printing the message.
 880
 881         @param tb          If given, is additional traceback information
 882         @param is_error    Whether to raise error according to ignorerrors
 883         """
 884         if message is not None:
 885             self.to_stderr(message)
 886         if self.params.get('verbose'):
 887             if tb is None:
 888                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 889                     tb = ''
 890                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 891                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 892                     tb += encode_compat_str(traceback.format_exc())
 893                 else:
 894                     tb_data = traceback.format_list(traceback.extract_stack())
 895                     tb = ''.join(tb_data)
 896             if tb:
 897                 self.to_stderr(tb)
 898         if not is_error:
 899             return
 900         if not self.params.get('ignoreerrors'):
 901             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 902                 exc_info = sys.exc_info()[1].exc_info
 903             else:
 904                 exc_info = sys.exc_info()
 905             raise DownloadError(message, exc_info)
 906         self._download_retcode = 1
 907
 908     Styles = Namespace(
 909         HEADERS='yellow',
 910         EMPHASIS='light blue',
 911         FILENAME='green',
 912         ID='green',
 913         DELIM='blue',
 914         ERROR='red',
 915         WARNING='yellow',
 916         SUPPRESS='light black',
 917     )
 918
 919     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 920         text = str(text)
 921         if test_encoding:
 922             original_text = text
 923             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 924             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 925             text = text.encode(encoding, 'ignore').decode(encoding)
 926             if fallback is not None and text != original_text:
 927                 text = fallback
 928         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 929
 930     def _format_out(self, *args, **kwargs):
 931         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
 932
 933     def _format_screen(self, *args, **kwargs):
 934         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
 935
 936     def _format_err(self, *args, **kwargs):
 937         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
 938
 939     def report_warning(self, message, only_once=False):
 940         '''
 941         Print the message to stderr, it will be prefixed with 'WARNING:'
 942         If stderr is a tty file the 'WARNING:' will be colored
 943         '''
 944         if self.params.get('logger') is not None:
 945             self.params['logger'].warning(message)
 946         else:
 947             if self.params.get('no_warnings'):
 948                 return
 949             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 950
 951     def deprecation_warning(self, message):
 952         if self.params.get('logger') is not None:
 953             self.params['logger'].warning(f'DeprecationWarning: {message}')
 954         else:
 955             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 956
 957     def report_error(self, message, *args, **kwargs):
 958         '''
 959         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 960         in red if stderr is a tty file.
 961         '''
 962         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 963
 964     def write_debug(self, message, only_once=False):
 965         '''Log debug message or Print message to stderr'''
 966         if not self.params.get('verbose', False):
 967             return
 968         message = f'[debug] {message}'
 969         if self.params.get('logger'):
 970             self.params['logger'].debug(message)
 971         else:
 972             self.to_stderr(message, only_once)
 973
 974     def report_file_already_downloaded(self, file_name):
 975         """Report file has already been fully downloaded."""
 976         try:
 977             self.to_screen('[download] %s has already been downloaded' % file_name)
 978         except UnicodeEncodeError:
 979             self.to_screen('[download] The file has already been downloaded')
 980
 981     def report_file_delete(self, file_name):
 982         """Report that existing file will be deleted."""
 983         try:
 984             self.to_screen('Deleting existing file %s' % file_name)
 985         except UnicodeEncodeError:
 986             self.to_screen('Deleting existing file')
 987
 988     def raise_no_formats(self, info, forced=False, *, msg=None):
 989         has_drm = info.get('_has_drm')
 990         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
 991         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
 992         if forced or not ignored:
 993             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 994                                  expected=has_drm or ignored or expected)
 995         else:
 996             self.report_warning(msg)
 997
 998     def parse_outtmpl(self):
 999         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1000         self._parse_outtmpl()
1001         return self.params['outtmpl']
1002
1003     def _parse_outtmpl(self):
1004         sanitize = lambda x: x
1005         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1006             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1007
1008         outtmpl = self.params.setdefault('outtmpl', {})
1009         if not isinstance(outtmpl, dict):
1010             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1011         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1012
1013     def get_output_path(self, dir_type='', filename=None):
1014         paths = self.params.get('paths', {})
1015         assert isinstance(paths, dict)
1016         path = os.path.join(
1017             expand_path(paths.get('home', '').strip()),
1018             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1019             filename or '')
1020         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1021
1022     @staticmethod
1023     def _outtmpl_expandpath(outtmpl):
1024         # expand_path translates '%%' into '%' and '$$' into '$'
1025         # correspondingly that is not what we want since we need to keep
1026         # '%%' intact for template dict substitution step. Working around
1027         # with boundary-alike separator hack.
1028         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1029         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1030
1031         # outtmpl should be expand_path'ed before template dict substitution
1032         # because meta fields may contain env variables we don't want to
1033         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1034         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1035         return expand_path(outtmpl).replace(sep, '')
1036
1037     @staticmethod
1038     def escape_outtmpl(outtmpl):
1039         ''' Escape any remaining strings like %s, %abc% etc. '''
1040         return re.sub(
1041             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1042             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1043             outtmpl)
1044
1045     @classmethod
1046     def validate_outtmpl(cls, outtmpl):
1047         ''' @return None or Exception object '''
1048         outtmpl = re.sub(
1049             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1050             lambda mobj: f'{mobj.group(0)[:-1]}s',
1051             cls._outtmpl_expandpath(outtmpl))
1052         try:
1053             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1054             return None
1055         except ValueError as err:
1056             return err
1057
1058     @staticmethod
1059     def _copy_infodict(info_dict):
1060         info_dict = dict(info_dict)
1061         info_dict.pop('__postprocessors', None)
1062         info_dict.pop('__pending_error', None)
1063         return info_dict
1064
1065     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1066         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1067         @param sanitize    Whether to sanitize the output as a filename.
1068                            For backward compatibility, a function can also be passed
1069         """
1070
1071         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1072
1073         info_dict = self._copy_infodict(info_dict)
1074         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1075             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1076             if info_dict.get('duration', None) is not None
1077             else None)
1078         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1079         info_dict['video_autonumber'] = self._num_videos
1080         if info_dict.get('resolution') is None:
1081             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1082
1083         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1084         # of %(field)s to %(field)0Nd for backward compatibility
1085         field_size_compat_map = {
1086             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1087             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1088             'autonumber': self.params.get('autonumber_size') or 5,
1089         }
1090
1091         TMPL_DICT = {}
1092         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1093         MATH_FUNCTIONS = {
1094             '+': float.__add__,
1095             '-': float.__sub__,
1096         }
1097         # Field is of the form key1.key2...
1098         # where keys (except first) can be string, int or slice
1099         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1100         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1101         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1102         INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
1103             (?P<negate>-)?
1104             (?P<fields>{FIELD_RE})
1105             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1106             (?:>(?P<strf_format>.+?))?
1107             (?P<remaining>
1108                 (?P<alternate>(?<!\\),[^|&)]+)?
1109                 (?:&(?P<replacement>.*?))?
1110                 (?:\|(?P<default>.*?))?
1111             )$''')
1112
1113         def _traverse_infodict(k):
1114             k = k.split('.')
1115             if k[0] == '':
1116                 k.pop(0)
1117             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1118
1119         def get_value(mdict):
1120             # Object traversal
1121             value = _traverse_infodict(mdict['fields'])
1122             # Negative
1123             if mdict['negate']:
1124                 value = float_or_none(value)
1125                 if value is not None:
1126                     value *= -1
1127             # Do maths
1128             offset_key = mdict['maths']
1129             if offset_key:
1130                 value = float_or_none(value)
1131                 operator = None
1132                 while offset_key:
1133                     item = re.match(
1134                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1135                         offset_key).group(0)
1136                     offset_key = offset_key[len(item):]
1137                     if operator is None:
1138                         operator = MATH_FUNCTIONS[item]
1139                         continue
1140                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1141                     offset = float_or_none(item)
1142                     if offset is None:
1143                         offset = float_or_none(_traverse_infodict(item))
1144                     try:
1145                         value = operator(value, multiplier * offset)
1146                     except (TypeError, ZeroDivisionError):
1147                         return None
1148                     operator = None
1149             # Datetime formatting
1150             if mdict['strf_format']:
1151                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1152
1153             return value
1154
1155         na = self.params.get('outtmpl_na_placeholder', 'NA')
1156
1157         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1158             return sanitize_filename(str(value), restricted=restricted, is_id=(
1159                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1160                 if 'filename-sanitization' in self.params['compat_opts']
1161                 else NO_DEFAULT))
1162
1163         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1164         sanitize = bool(sanitize)
1165
1166         def _dumpjson_default(obj):
1167             if isinstance(obj, (set, LazyList)):
1168                 return list(obj)
1169             return repr(obj)
1170
1171         def create_key(outer_mobj):
1172             if not outer_mobj.group('has_key'):
1173                 return outer_mobj.group(0)
1174             key = outer_mobj.group('key')
1175             mobj = re.match(INTERNAL_FORMAT_RE, key)
1176             initial_field = mobj.group('fields') if mobj else ''
1177             value, replacement, default = None, None, na
1178             while mobj:
1179                 mobj = mobj.groupdict()
1180                 default = mobj['default'] if mobj['default'] is not None else default
1181                 value = get_value(mobj)
1182                 replacement = mobj['replacement']
1183                 if value is None and mobj['alternate']:
1184                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1185                 else:
1186                     break
1187
1188             fmt = outer_mobj.group('format')
1189             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1190                 fmt = f'0{field_size_compat_map[key]:d}d'
1191
1192             value = default if value is None else value if replacement is None else replacement
1193
1194             flags = outer_mobj.group('conversion') or ''
1195             str_fmt = f'{fmt[:-1]}s'
1196             if fmt[-1] == 'l':  # list
1197                 delim = '\n' if '#' in flags else ', '
1198                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1199             elif fmt[-1] == 'j':  # json
1200                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1201             elif fmt[-1] == 'q':  # quoted
1202                 value = map(str, variadic(value) if '#' in flags else [value])
1203                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1204             elif fmt[-1] == 'B':  # bytes
1205                 value = f'%{str_fmt}'.encode() % str(value).encode()
1206                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1207             elif fmt[-1] == 'U':  # unicode normalized
1208                 value, fmt = unicodedata.normalize(
1209                     # "+" = compatibility equivalence, "#" = NFD
1210                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1211                     value), str_fmt
1212             elif fmt[-1] == 'D':  # decimal suffix
1213                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1214                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1215                                               factor=1024 if '#' in flags else 1000)
1216             elif fmt[-1] == 'S':  # filename sanitization
1217                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1218             elif fmt[-1] == 'c':
1219                 if value:
1220                     value = str(value)[0]
1221                 else:
1222                     fmt = str_fmt
1223             elif fmt[-1] not in 'rs':  # numeric
1224                 value = float_or_none(value)
1225                 if value is None:
1226                     value, fmt = default, 's'
1227
1228             if sanitize:
1229                 if fmt[-1] == 'r':
1230                     # If value is an object, sanitize might convert it to a string
1231                     # So we convert it to repr first
1232                     value, fmt = repr(value), str_fmt
1233                 if fmt[-1] in 'csr':
1234                     value = sanitizer(initial_field, value)
1235
1236             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1237             TMPL_DICT[key] = value
1238             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1239
1240         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1241
1242     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1243         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1244         return self.escape_outtmpl(outtmpl) % info_dict
1245
1246     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1247         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1248         if outtmpl is None:
1249             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1250         try:
1251             outtmpl = self._outtmpl_expandpath(outtmpl)
1252             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1253             if not filename:
1254                 return None
1255
1256             if tmpl_type in ('', 'temp'):
1257                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1258                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1259                     filename = replace_extension(filename, ext, final_ext)
1260             elif tmpl_type:
1261                 force_ext = OUTTMPL_TYPES[tmpl_type]
1262                 if force_ext:
1263                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1264
1265             # https://github.com/blackjack4494/youtube-dlc/issues/85
1266             trim_file_name = self.params.get('trim_file_name', False)
1267             if trim_file_name:
1268                 no_ext, *ext = filename.rsplit('.', 2)
1269                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1270
1271             return filename
1272         except ValueError as err:
1273             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1274             return None
1275
1276     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1277         """Generate the output filename"""
1278         if outtmpl:
1279             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1280             dir_type = None
1281         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1282         if not filename and dir_type not in ('', 'temp'):
1283             return ''
1284
1285         if warn:
1286             if not self.params.get('paths'):
1287                 pass
1288             elif filename == '-':
1289                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1290             elif os.path.isabs(filename):
1291                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1292         if filename == '-' or not filename:
1293             return filename
1294
1295         return self.get_output_path(dir_type, filename)
1296
1297     def _match_entry(self, info_dict, incomplete=False, silent=False):
1298         """ Returns None if the file should be downloaded """
1299
1300         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1301
1302         def check_filter():
1303             if 'title' in info_dict:
1304                 # This can happen when we're just evaluating the playlist
1305                 title = info_dict['title']
1306                 matchtitle = self.params.get('matchtitle', False)
1307                 if matchtitle:
1308                     if not re.search(matchtitle, title, re.IGNORECASE):
1309                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1310                 rejecttitle = self.params.get('rejecttitle', False)
1311                 if rejecttitle:
1312                     if re.search(rejecttitle, title, re.IGNORECASE):
1313                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1314             date = info_dict.get('upload_date')
1315             if date is not None:
1316                 dateRange = self.params.get('daterange', DateRange())
1317                 if date not in dateRange:
1318                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1319             view_count = info_dict.get('view_count')
1320             if view_count is not None:
1321                 min_views = self.params.get('min_views')
1322                 if min_views is not None and view_count < min_views:
1323                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1324                 max_views = self.params.get('max_views')
1325                 if max_views is not None and view_count > max_views:
1326                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1327             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1328                 return 'Skipping "%s" because it is age restricted' % video_title
1329
1330             match_filter = self.params.get('match_filter')
1331             if match_filter is not None:
1332                 try:
1333                     ret = match_filter(info_dict, incomplete=incomplete)
1334                 except TypeError:
1335                     # For backward compatibility
1336                     ret = None if incomplete else match_filter(info_dict)
1337                 if ret is NO_DEFAULT:
1338                     while True:
1339                         filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1340                         reply = input(self._format_screen(
1341                             f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1342                         if reply in {'y', ''}:
1343                             return None
1344                         elif reply == 'n':
1345                             return f'Skipping {video_title}'
1346                 elif ret is not None:
1347                     return ret
1348             return None
1349
1350         if self.in_download_archive(info_dict):
1351             reason = '%s has already been recorded in the archive' % video_title
1352             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1353         else:
1354             reason = check_filter()
1355             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1356         if reason is not None:
1357             if not silent:
1358                 self.to_screen('[download] ' + reason)
1359             if self.params.get(break_opt, False):
1360                 raise break_err()
1361         return reason
1362
1363     @staticmethod
1364     def add_extra_info(info_dict, extra_info):
1365         '''Set the keys from extra_info in info dict if they are missing'''
1366         for key, value in extra_info.items():
1367             info_dict.setdefault(key, value)
1368
1369     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1370                      process=True, force_generic_extractor=False):
1371         """
1372         Return a list with a dictionary for each video extracted.
1373
1374         Arguments:
1375         url -- URL to extract
1376
1377         Keyword arguments:
1378         download -- whether to download videos during extraction
1379         ie_key -- extractor key hint
1380         extra_info -- dictionary containing the extra values to add to each result
1381         process -- whether to resolve all unresolved references (URLs, playlist items),
1382             must be True for download to work.
1383         force_generic_extractor -- force using the generic extractor
1384         """
1385
1386         if extra_info is None:
1387             extra_info = {}
1388
1389         if not ie_key and force_generic_extractor:
1390             ie_key = 'Generic'
1391
1392         if ie_key:
1393             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1394         else:
1395             ies = self._ies
1396
1397         for ie_key, ie in ies.items():
1398             if not ie.suitable(url):
1399                 continue
1400
1401             if not ie.working():
1402                 self.report_warning('The program functionality for this site has been marked as broken, '
1403                                     'and will probably not work.')
1404
1405             temp_id = ie.get_temp_id(url)
1406             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1407                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1408                 if self.params.get('break_on_existing', False):
1409                     raise ExistingVideoReached()
1410                 break
1411             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1412         else:
1413             self.report_error('no suitable InfoExtractor for URL %s' % url)
1414
1415     def _handle_extraction_exceptions(func):
1416         @functools.wraps(func)
1417         def wrapper(self, *args, **kwargs):
1418             while True:
1419                 try:
1420                     return func(self, *args, **kwargs)
1421                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1422                     raise
1423                 except ReExtractInfo as e:
1424                     if e.expected:
1425                         self.to_screen(f'{e}; Re-extracting data')
1426                     else:
1427                         self.to_stderr('\r')
1428                         self.report_warning(f'{e}; Re-extracting data')
1429                     continue
1430                 except GeoRestrictedError as e:
1431                     msg = e.msg
1432                     if e.countries:
1433                         msg += '\nThis video is available in %s.' % ', '.join(
1434                             map(ISO3166Utils.short2full, e.countries))
1435                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1436                     self.report_error(msg)
1437                 except ExtractorError as e:  # An error we somewhat expected
1438                     self.report_error(str(e), e.format_traceback())
1439                 except Exception as e:
1440                     if self.params.get('ignoreerrors'):
1441                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1442                     else:
1443                         raise
1444                 break
1445         return wrapper
1446
1447     def _wait_for_video(self, ie_result):
1448         if (not self.params.get('wait_for_video')
1449                 or ie_result.get('_type', 'video') != 'video'
1450                 or ie_result.get('formats') or ie_result.get('url')):
1451             return
1452
1453         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1454         last_msg = ''
1455
1456         def progress(msg):
1457             nonlocal last_msg
1458             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1459             last_msg = msg
1460
1461         min_wait, max_wait = self.params.get('wait_for_video')
1462         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1463         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1464             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1465             self.report_warning('Release time of video is not known')
1466         elif (diff or 0) <= 0:
1467             self.report_warning('Video should already be available according to extracted info')
1468         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1469         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1470
1471         wait_till = time.time() + diff
1472         try:
1473             while True:
1474                 diff = wait_till - time.time()
1475                 if diff <= 0:
1476                     progress('')
1477                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1478                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1479                 time.sleep(1)
1480         except KeyboardInterrupt:
1481             progress('')
1482             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1483         except BaseException as e:
1484             if not isinstance(e, ReExtractInfo):
1485                 self.to_screen('')
1486             raise
1487
1488     @_handle_extraction_exceptions
1489     def __extract_info(self, url, ie, download, extra_info, process):
1490         ie_result = ie.extract(url)
1491         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1492             return
1493         if isinstance(ie_result, list):
1494             # Backwards compatibility: old IE result format
1495             ie_result = {
1496                 '_type': 'compat_list',
1497                 'entries': ie_result,
1498             }
1499         if extra_info.get('original_url'):
1500             ie_result.setdefault('original_url', extra_info['original_url'])
1501         self.add_default_extra_info(ie_result, ie, url)
1502         if process:
1503             self._wait_for_video(ie_result)
1504             return self.process_ie_result(ie_result, download, extra_info)
1505         else:
1506             return ie_result
1507
1508     def add_default_extra_info(self, ie_result, ie, url):
1509         if url is not None:
1510             self.add_extra_info(ie_result, {
1511                 'webpage_url': url,
1512                 'original_url': url,
1513             })
1514         webpage_url = ie_result.get('webpage_url')
1515         if webpage_url:
1516             self.add_extra_info(ie_result, {
1517                 'webpage_url_basename': url_basename(webpage_url),
1518                 'webpage_url_domain': get_domain(webpage_url),
1519             })
1520         if ie is not None:
1521             self.add_extra_info(ie_result, {
1522                 'extractor': ie.IE_NAME,
1523                 'extractor_key': ie.ie_key(),
1524             })
1525
1526     def process_ie_result(self, ie_result, download=True, extra_info=None):
1527         """
1528         Take the result of the ie(may be modified) and resolve all unresolved
1529         references (URLs, playlist items).
1530
1531         It will also download the videos if 'download'.
1532         Returns the resolved ie_result.
1533         """
1534         if extra_info is None:
1535             extra_info = {}
1536         result_type = ie_result.get('_type', 'video')
1537
1538         if result_type in ('url', 'url_transparent'):
1539             ie_result['url'] = sanitize_url(ie_result['url'])
1540             if ie_result.get('original_url'):
1541                 extra_info.setdefault('original_url', ie_result['original_url'])
1542
1543             extract_flat = self.params.get('extract_flat', False)
1544             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1545                     or extract_flat is True):
1546                 info_copy = ie_result.copy()
1547                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1548                 if ie and not ie_result.get('id'):
1549                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1550                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1551                 self.add_extra_info(info_copy, extra_info)
1552                 info_copy, _ = self.pre_process(info_copy)
1553                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1554                 self._raise_pending_errors(info_copy)
1555                 if self.params.get('force_write_download_archive', False):
1556                     self.record_download_archive(info_copy)
1557                 return ie_result
1558
1559         if result_type == 'video':
1560             self.add_extra_info(ie_result, extra_info)
1561             ie_result = self.process_video_result(ie_result, download=download)
1562             self._raise_pending_errors(ie_result)
1563             additional_urls = (ie_result or {}).get('additional_urls')
1564             if additional_urls:
1565                 # TODO: Improve MetadataParserPP to allow setting a list
1566                 if isinstance(additional_urls, compat_str):
1567                     additional_urls = [additional_urls]
1568                 self.to_screen(
1569                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1570                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1571                 ie_result['additional_entries'] = [
1572                     self.extract_info(
1573                         url, download, extra_info=extra_info,
1574                         force_generic_extractor=self.params.get('force_generic_extractor'))
1575                     for url in additional_urls
1576                 ]
1577             return ie_result
1578         elif result_type == 'url':
1579             # We have to add extra_info to the results because it may be
1580             # contained in a playlist
1581             return self.extract_info(
1582                 ie_result['url'], download,
1583                 ie_key=ie_result.get('ie_key'),
1584                 extra_info=extra_info)
1585         elif result_type == 'url_transparent':
1586             # Use the information from the embedding page
1587             info = self.extract_info(
1588                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1589                 extra_info=extra_info, download=False, process=False)
1590
1591             # extract_info may return None when ignoreerrors is enabled and
1592             # extraction failed with an error, don't crash and return early
1593             # in this case
1594             if not info:
1595                 return info
1596
1597             new_result = info.copy()
1598             new_result.update(filter_dict(ie_result, lambda k, v: (
1599                 v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
1600
1601             # Extracted info may not be a video result (i.e.
1602             # info.get('_type', 'video') != video) but rather an url or
1603             # url_transparent. In such cases outer metadata (from ie_result)
1604             # should be propagated to inner one (info). For this to happen
1605             # _type of info should be overridden with url_transparent. This
1606             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1607             if new_result.get('_type') == 'url':
1608                 new_result['_type'] = 'url_transparent'
1609
1610             return self.process_ie_result(
1611                 new_result, download=download, extra_info=extra_info)
1612         elif result_type in ('playlist', 'multi_video'):
1613             # Protect from infinite recursion due to recursively nested playlists
1614             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1615             webpage_url = ie_result['webpage_url']
1616             if webpage_url in self._playlist_urls:
1617                 self.to_screen(
1618                     '[download] Skipping already downloaded playlist: %s'
1619                     % ie_result.get('title') or ie_result.get('id'))
1620                 return
1621
1622             self._playlist_level += 1
1623             self._playlist_urls.add(webpage_url)
1624             self._fill_common_fields(ie_result, False)
1625             self._sanitize_thumbnails(ie_result)
1626             try:
1627                 return self.__process_playlist(ie_result, download)
1628             finally:
1629                 self._playlist_level -= 1
1630                 if not self._playlist_level:
1631                     self._playlist_urls.clear()
1632         elif result_type == 'compat_list':
1633             self.report_warning(
1634                 'Extractor %s returned a compat_list result. '
1635                 'It needs to be updated.' % ie_result.get('extractor'))
1636
1637             def _fixup(r):
1638                 self.add_extra_info(r, {
1639                     'extractor': ie_result['extractor'],
1640                     'webpage_url': ie_result['webpage_url'],
1641                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1642                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1643                     'extractor_key': ie_result['extractor_key'],
1644                 })
1645                 return r
1646             ie_result['entries'] = [
1647                 self.process_ie_result(_fixup(r), download, extra_info)
1648                 for r in ie_result['entries']
1649             ]
1650             return ie_result
1651         else:
1652             raise Exception('Invalid result type: %s' % result_type)
1653
1654     def _ensure_dir_exists(self, path):
1655         return make_dir(path, self.report_error)
1656
1657     @staticmethod
1658     def _playlist_infodict(ie_result, **kwargs):
1659         return {
1660             **ie_result,
1661             'playlist': ie_result.get('title') or ie_result.get('id'),
1662             'playlist_id': ie_result.get('id'),
1663             'playlist_title': ie_result.get('title'),
1664             'playlist_uploader': ie_result.get('uploader'),
1665             'playlist_uploader_id': ie_result.get('uploader_id'),
1666             'playlist_index': 0,
1667             **kwargs,
1668         }
1669
1670     def __process_playlist(self, ie_result, download):
1671         """Process each entry in the playlist"""
1672         title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
1673         self.to_screen(f'[download] Downloading playlist: {title}')
1674
1675         all_entries = PlaylistEntries(self, ie_result)
1676         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1677
1678         lazy = self.params.get('lazy_playlist')
1679         if lazy:
1680             resolved_entries, n_entries = [], 'N/A'
1681             ie_result['requested_entries'], ie_result['entries'] = None, None
1682         else:
1683             entries = resolved_entries = list(entries)
1684             n_entries = len(resolved_entries)
1685             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1686         if not ie_result.get('playlist_count'):
1687             # Better to do this after potentially exhausting entries
1688             ie_result['playlist_count'] = all_entries.get_full_count()
1689
1690         _infojson_written = False
1691         write_playlist_files = self.params.get('allow_playlist_files', True)
1692         if write_playlist_files and self.params.get('list_thumbnails'):
1693             self.list_thumbnails(ie_result)
1694         if write_playlist_files and not self.params.get('simulate'):
1695             ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1696             _infojson_written = self._write_info_json(
1697                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1698             if _infojson_written is None:
1699                 return
1700             if self._write_description('playlist', ie_result,
1701                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1702                 return
1703             # TODO: This should be passed to ThumbnailsConvertor if necessary
1704             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1705
1706         if lazy:
1707             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1708                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1709         elif self.params.get('playlistreverse'):
1710             entries.reverse()
1711         elif self.params.get('playlistrandom'):
1712             random.shuffle(entries)
1713
1714         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
1715                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1716
1717         failures = 0
1718         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1719         for i, (playlist_index, entry) in enumerate(entries):
1720             if lazy:
1721                 resolved_entries.append((playlist_index, entry))
1722
1723             # TODO: Add auto-generated fields
1724             if self._match_entry(entry, incomplete=True) is not None:
1725                 continue
1726
1727             self.to_screen('[download] Downloading video %s of %s' % (
1728                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1729
1730             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1731             if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1732                 playlist_index = ie_result['requested_entries'][i]
1733
1734             entry_result = self.__process_iterable_entry(entry, download, {
1735                 'n_entries': int_or_none(n_entries),
1736                 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1737                 'playlist_count': ie_result.get('playlist_count'),
1738                 'playlist_index': playlist_index,
1739                 'playlist_autonumber': i + 1,
1740                 'playlist': title,
1741                 'playlist_id': ie_result.get('id'),
1742                 'playlist_title': ie_result.get('title'),
1743                 'playlist_uploader': ie_result.get('uploader'),
1744                 'playlist_uploader_id': ie_result.get('uploader_id'),
1745                 'extractor': ie_result['extractor'],
1746                 'webpage_url': ie_result['webpage_url'],
1747                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1748                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1749                 'extractor_key': ie_result['extractor_key'],
1750             })
1751             if not entry_result:
1752                 failures += 1
1753             if failures >= max_failures:
1754                 self.report_error(
1755                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1756                 break
1757             resolved_entries[i] = (playlist_index, entry_result)
1758
1759         # Update with processed data
1760         ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1761
1762         # Write the updated info to json
1763         if _infojson_written is True and self._write_info_json(
1764                 'updated playlist', ie_result,
1765                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1766             return
1767
1768         ie_result = self.run_all_pps('playlist', ie_result)
1769         self.to_screen(f'[download] Finished downloading playlist: {title}')
1770         return ie_result
1771
1772     @_handle_extraction_exceptions
1773     def __process_iterable_entry(self, entry, download, extra_info):
1774         return self.process_ie_result(
1775             entry, download=download, extra_info=extra_info)
1776
1777     def _build_format_filter(self, filter_spec):
1778         " Returns a function to filter the formats according to the filter_spec "
1779
1780         OPERATORS = {
1781             '<': operator.lt,
1782             '<=': operator.le,
1783             '>': operator.gt,
1784             '>=': operator.ge,
1785             '=': operator.eq,
1786             '!=': operator.ne,
1787         }
1788         operator_rex = re.compile(r'''(?x)\s*
1789             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1790             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1791             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1792             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1793         m = operator_rex.fullmatch(filter_spec)
1794         if m:
1795             try:
1796                 comparison_value = int(m.group('value'))
1797             except ValueError:
1798                 comparison_value = parse_filesize(m.group('value'))
1799                 if comparison_value is None:
1800                     comparison_value = parse_filesize(m.group('value') + 'B')
1801                 if comparison_value is None:
1802                     raise ValueError(
1803                         'Invalid value %r in format specification %r' % (
1804                             m.group('value'), filter_spec))
1805             op = OPERATORS[m.group('op')]
1806
1807         if not m:
1808             STR_OPERATORS = {
1809                 '=': operator.eq,
1810                 '^=': lambda attr, value: attr.startswith(value),
1811                 '$=': lambda attr, value: attr.endswith(value),
1812                 '*=': lambda attr, value: value in attr,
1813                 '~=': lambda attr, value: value.search(attr) is not None
1814             }
1815             str_operator_rex = re.compile(r'''(?x)\s*
1816                 (?P<key>[a-zA-Z0-9._-]+)\s*
1817                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1818                 (?P<quote>["'])?
1819                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1820                 (?(quote)(?P=quote))\s*
1821                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1822             m = str_operator_rex.fullmatch(filter_spec)
1823             if m:
1824                 if m.group('op') == '~=':
1825                     comparison_value = re.compile(m.group('value'))
1826                 else:
1827                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1828                 str_op = STR_OPERATORS[m.group('op')]
1829                 if m.group('negation'):
1830                     op = lambda attr, value: not str_op(attr, value)
1831                 else:
1832                     op = str_op
1833
1834         if not m:
1835             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1836
1837         def _filter(f):
1838             actual_value = f.get(m.group('key'))
1839             if actual_value is None:
1840                 return m.group('none_inclusive')
1841             return op(actual_value, comparison_value)
1842         return _filter
1843
1844     def _check_formats(self, formats):
1845         for f in formats:
1846             self.to_screen('[info] Testing format %s' % f['format_id'])
1847             path = self.get_output_path('temp')
1848             if not self._ensure_dir_exists(f'{path}/'):
1849                 continue
1850             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1851             temp_file.close()
1852             try:
1853                 success, _ = self.dl(temp_file.name, f, test=True)
1854             except (DownloadError, OSError, ValueError) + network_exceptions:
1855                 success = False
1856             finally:
1857                 if os.path.exists(temp_file.name):
1858                     try:
1859                         os.remove(temp_file.name)
1860                     except OSError:
1861                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1862             if success:
1863                 yield f
1864             else:
1865                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1866
1867     def _default_format_spec(self, info_dict, download=True):
1868
1869         def can_merge():
1870             merger = FFmpegMergerPP(self)
1871             return merger.available and merger.can_merge()
1872
1873         prefer_best = (
1874             not self.params.get('simulate')
1875             and download
1876             and (
1877                 not can_merge()
1878                 or info_dict.get('is_live') and not self.params.get('live_from_start')
1879                 or self.params['outtmpl']['default'] == '-'))
1880         compat = (
1881             prefer_best
1882             or self.params.get('allow_multiple_audio_streams', False)
1883             or 'format-spec' in self.params['compat_opts'])
1884
1885         return (
1886             'best/bestvideo+bestaudio' if prefer_best
1887             else 'bestvideo*+bestaudio/best' if not compat
1888             else 'bestvideo+bestaudio/best')
1889
1890     def build_format_selector(self, format_spec):
1891         def syntax_error(note, start):
1892             message = (
1893                 'Invalid format specification: '
1894                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
1895             return SyntaxError(message)
1896
1897         PICKFIRST = 'PICKFIRST'
1898         MERGE = 'MERGE'
1899         SINGLE = 'SINGLE'
1900         GROUP = 'GROUP'
1901         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1902
1903         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1904                                   'video': self.params.get('allow_multiple_video_streams', False)}
1905
1906         check_formats = self.params.get('check_formats') == 'selected'
1907
1908         def _parse_filter(tokens):
1909             filter_parts = []
1910             for type, string, start, _, _ in tokens:
1911                 if type == tokenize.OP and string == ']':
1912                     return ''.join(filter_parts)
1913                 else:
1914                     filter_parts.append(string)
1915
1916         def _remove_unused_ops(tokens):
1917             # Remove operators that we don't use and join them with the surrounding strings
1918             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1919             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1920             last_string, last_start, last_end, last_line = None, None, None, None
1921             for type, string, start, end, line in tokens:
1922                 if type == tokenize.OP and string == '[':
1923                     if last_string:
1924                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1925                         last_string = None
1926                     yield type, string, start, end, line
1927                     # everything inside brackets will be handled by _parse_filter
1928                     for type, string, start, end, line in tokens:
1929                         yield type, string, start, end, line
1930                         if type == tokenize.OP and string == ']':
1931                             break
1932                 elif type == tokenize.OP and string in ALLOWED_OPS:
1933                     if last_string:
1934                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1935                         last_string = None
1936                     yield type, string, start, end, line
1937                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1938                     if not last_string:
1939                         last_string = string
1940                         last_start = start
1941                         last_end = end
1942                     else:
1943                         last_string += string
1944             if last_string:
1945                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1946
1947         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1948             selectors = []
1949             current_selector = None
1950             for type, string, start, _, _ in tokens:
1951                 # ENCODING is only defined in python 3.x
1952                 if type == getattr(tokenize, 'ENCODING', None):
1953                     continue
1954                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1955                     current_selector = FormatSelector(SINGLE, string, [])
1956                 elif type == tokenize.OP:
1957                     if string == ')':
1958                         if not inside_group:
1959                             # ')' will be handled by the parentheses group
1960                             tokens.restore_last_token()
1961                         break
1962                     elif inside_merge and string in ['/', ',']:
1963                         tokens.restore_last_token()
1964                         break
1965                     elif inside_choice and string == ',':
1966                         tokens.restore_last_token()
1967                         break
1968                     elif string == ',':
1969                         if not current_selector:
1970                             raise syntax_error('"," must follow a format selector', start)
1971                         selectors.append(current_selector)
1972                         current_selector = None
1973                     elif string == '/':
1974                         if not current_selector:
1975                             raise syntax_error('"/" must follow a format selector', start)
1976                         first_choice = current_selector
1977                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1978                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1979                     elif string == '[':
1980                         if not current_selector:
1981                             current_selector = FormatSelector(SINGLE, 'best', [])
1982                         format_filter = _parse_filter(tokens)
1983                         current_selector.filters.append(format_filter)
1984                     elif string == '(':
1985                         if current_selector:
1986                             raise syntax_error('Unexpected "("', start)
1987                         group = _parse_format_selection(tokens, inside_group=True)
1988                         current_selector = FormatSelector(GROUP, group, [])
1989                     elif string == '+':
1990                         if not current_selector:
1991                             raise syntax_error('Unexpected "+"', start)
1992                         selector_1 = current_selector
1993                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1994                         if not selector_2:
1995                             raise syntax_error('Expected a selector', start)
1996                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1997                     else:
1998                         raise syntax_error(f'Operator not recognized: "{string}"', start)
1999                 elif type == tokenize.ENDMARKER:
2000                     break
2001             if current_selector:
2002                 selectors.append(current_selector)
2003             return selectors
2004
2005         def _merge(formats_pair):
2006             format_1, format_2 = formats_pair
2007
2008             formats_info = []
2009             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2010             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2011
2012             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2013                 get_no_more = {'video': False, 'audio': False}
2014                 for (i, fmt_info) in enumerate(formats_info):
2015                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2016                         formats_info.pop(i)
2017                         continue
2018                     for aud_vid in ['audio', 'video']:
2019                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2020                             if get_no_more[aud_vid]:
2021                                 formats_info.pop(i)
2022                                 break
2023                             get_no_more[aud_vid] = True
2024
2025             if len(formats_info) == 1:
2026                 return formats_info[0]
2027
2028             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2029             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2030
2031             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2032             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2033
2034             output_ext = self.params.get('merge_output_format')
2035             if not output_ext:
2036                 if the_only_video:
2037                     output_ext = the_only_video['ext']
2038                 elif the_only_audio and not video_fmts:
2039                     output_ext = the_only_audio['ext']
2040                 else:
2041                     output_ext = 'mkv'
2042
2043             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2044
2045             new_dict = {
2046                 'requested_formats': formats_info,
2047                 'format': '+'.join(filtered('format')),
2048                 'format_id': '+'.join(filtered('format_id')),
2049                 'ext': output_ext,
2050                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2051                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2052                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2053                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2054                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2055             }
2056
2057             if the_only_video:
2058                 new_dict.update({
2059                     'width': the_only_video.get('width'),
2060                     'height': the_only_video.get('height'),
2061                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2062                     'fps': the_only_video.get('fps'),
2063                     'dynamic_range': the_only_video.get('dynamic_range'),
2064                     'vcodec': the_only_video.get('vcodec'),
2065                     'vbr': the_only_video.get('vbr'),
2066                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2067                 })
2068
2069             if the_only_audio:
2070                 new_dict.update({
2071                     'acodec': the_only_audio.get('acodec'),
2072                     'abr': the_only_audio.get('abr'),
2073                     'asr': the_only_audio.get('asr'),
2074                 })
2075
2076             return new_dict
2077
2078         def _check_formats(formats):
2079             if not check_formats:
2080                 yield from formats
2081                 return
2082             yield from self._check_formats(formats)
2083
2084         def _build_selector_function(selector):
2085             if isinstance(selector, list):  # ,
2086                 fs = [_build_selector_function(s) for s in selector]
2087
2088                 def selector_function(ctx):
2089                     for f in fs:
2090                         yield from f(ctx)
2091                 return selector_function
2092
2093             elif selector.type == GROUP:  # ()
2094                 selector_function = _build_selector_function(selector.selector)
2095
2096             elif selector.type == PICKFIRST:  # /
2097                 fs = [_build_selector_function(s) for s in selector.selector]
2098
2099                 def selector_function(ctx):
2100                     for f in fs:
2101                         picked_formats = list(f(ctx))
2102                         if picked_formats:
2103                             return picked_formats
2104                     return []
2105
2106             elif selector.type == MERGE:  # +
2107                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2108
2109                 def selector_function(ctx):
2110                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2111                         yield _merge(pair)
2112
2113             elif selector.type == SINGLE:  # atom
2114                 format_spec = selector.selector or 'best'
2115
2116                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2117                 if format_spec == 'all':
2118                     def selector_function(ctx):
2119                         yield from _check_formats(ctx['formats'][::-1])
2120                 elif format_spec == 'mergeall':
2121                     def selector_function(ctx):
2122                         formats = list(_check_formats(
2123                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2124                         if not formats:
2125                             return
2126                         merged_format = formats[-1]
2127                         for f in formats[-2::-1]:
2128                             merged_format = _merge((merged_format, f))
2129                         yield merged_format
2130
2131                 else:
2132                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2133                     mobj = re.match(
2134                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2135                         format_spec)
2136                     if mobj is not None:
2137                         format_idx = int_or_none(mobj.group('n'), default=1)
2138                         format_reverse = mobj.group('bw')[0] == 'b'
2139                         format_type = (mobj.group('type') or [None])[0]
2140                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2141                         format_modified = mobj.group('mod') is not None
2142
2143                         format_fallback = not format_type and not format_modified  # for b, w
2144                         _filter_f = (
2145                             (lambda f: f.get('%scodec' % format_type) != 'none')
2146                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2147                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2148                             if format_type  # bv, ba, wv, wa
2149                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2150                             if not format_modified  # b, w
2151                             else lambda f: True)  # b*, w*
2152                         filter_f = lambda f: _filter_f(f) and (
2153                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2154                     else:
2155                         if format_spec in self._format_selection_exts['audio']:
2156                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2157                         elif format_spec in self._format_selection_exts['video']:
2158                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2159                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2160                         elif format_spec in self._format_selection_exts['storyboards']:
2161                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2162                         else:
2163                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2164
2165                     def selector_function(ctx):
2166                         formats = list(ctx['formats'])
2167                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2168                         if not matches:
2169                             if format_fallback and ctx['incomplete_formats']:
2170                                 # for extractors with incomplete formats (audio only (soundcloud)
2171                                 # or video only (imgur)) best/worst will fallback to
2172                                 # best/worst {video,audio}-only format
2173                                 matches = formats
2174                             elif seperate_fallback and not ctx['has_merged_format']:
2175                                 # for compatibility with youtube-dl when there is no pre-merged format
2176                                 matches = list(filter(seperate_fallback, formats))
2177                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2178                         try:
2179                             yield matches[format_idx - 1]
2180                         except LazyList.IndexError:
2181                             return
2182
2183             filters = [self._build_format_filter(f) for f in selector.filters]
2184
2185             def final_selector(ctx):
2186                 ctx_copy = dict(ctx)
2187                 for _filter in filters:
2188                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2189                 return selector_function(ctx_copy)
2190             return final_selector
2191
2192         stream = io.BytesIO(format_spec.encode())
2193         try:
2194             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2195         except tokenize.TokenError:
2196             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2197
2198         class TokenIterator:
2199             def __init__(self, tokens):
2200                 self.tokens = tokens
2201                 self.counter = 0
2202
2203             def __iter__(self):
2204                 return self
2205
2206             def __next__(self):
2207                 if self.counter >= len(self.tokens):
2208                     raise StopIteration()
2209                 value = self.tokens[self.counter]
2210                 self.counter += 1
2211                 return value
2212
2213             next = __next__
2214
2215             def restore_last_token(self):
2216                 self.counter -= 1
2217
2218         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2219         return _build_selector_function(parsed_selector)
2220
2221     def _calc_headers(self, info_dict):
2222         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2223
2224         cookies = self._calc_cookies(info_dict['url'])
2225         if cookies:
2226             res['Cookie'] = cookies
2227
2228         if 'X-Forwarded-For' not in res:
2229             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2230             if x_forwarded_for_ip:
2231                 res['X-Forwarded-For'] = x_forwarded_for_ip
2232
2233         return res
2234
2235     def _calc_cookies(self, url):
2236         pr = sanitized_Request(url)
2237         self.cookiejar.add_cookie_header(pr)
2238         return pr.get_header('Cookie')
2239
2240     def _sort_thumbnails(self, thumbnails):
2241         thumbnails.sort(key=lambda t: (
2242             t.get('preference') if t.get('preference') is not None else -1,
2243             t.get('width') if t.get('width') is not None else -1,
2244             t.get('height') if t.get('height') is not None else -1,
2245             t.get('id') if t.get('id') is not None else '',
2246             t.get('url')))
2247
2248     def _sanitize_thumbnails(self, info_dict):
2249         thumbnails = info_dict.get('thumbnails')
2250         if thumbnails is None:
2251             thumbnail = info_dict.get('thumbnail')
2252             if thumbnail:
2253                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2254         if not thumbnails:
2255             return
2256
2257         def check_thumbnails(thumbnails):
2258             for t in thumbnails:
2259                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2260                 try:
2261                     self.urlopen(HEADRequest(t['url']))
2262                 except network_exceptions as err:
2263                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2264                     continue
2265                 yield t
2266
2267         self._sort_thumbnails(thumbnails)
2268         for i, t in enumerate(thumbnails):
2269             if t.get('id') is None:
2270                 t['id'] = '%d' % i
2271             if t.get('width') and t.get('height'):
2272                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2273             t['url'] = sanitize_url(t['url'])
2274
2275         if self.params.get('check_formats') is True:
2276             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2277         else:
2278             info_dict['thumbnails'] = thumbnails
2279
2280     def _fill_common_fields(self, info_dict, is_video=True):
2281         # TODO: move sanitization here
2282         if is_video:
2283             # playlists are allowed to lack "title"
2284             title = info_dict.get('title', NO_DEFAULT)
2285             if title is NO_DEFAULT:
2286                 raise ExtractorError('Missing "title" field in extractor result',
2287                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2288             info_dict['fulltitle'] = title
2289             if not title:
2290                 if title == '':
2291                     self.write_debug('Extractor gave empty title. Creating a generic title')
2292                 else:
2293                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2294                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2295
2296         if info_dict.get('duration') is not None:
2297             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2298
2299         for ts_key, date_key in (
2300                 ('timestamp', 'upload_date'),
2301                 ('release_timestamp', 'release_date'),
2302                 ('modified_timestamp', 'modified_date'),
2303         ):
2304             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2305                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2306                 # see http://bugs.python.org/issue1646728)
2307                 with contextlib.suppress(ValueError, OverflowError, OSError):
2308                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2309                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2310
2311         live_keys = ('is_live', 'was_live')
2312         live_status = info_dict.get('live_status')
2313         if live_status is None:
2314             for key in live_keys:
2315                 if info_dict.get(key) is False:
2316                     continue
2317                 if info_dict.get(key):
2318                     live_status = key
2319                 break
2320             if all(info_dict.get(key) is False for key in live_keys):
2321                 live_status = 'not_live'
2322         if live_status:
2323             info_dict['live_status'] = live_status
2324             for key in live_keys:
2325                 if info_dict.get(key) is None:
2326                     info_dict[key] = (live_status == key)
2327
2328         # Auto generate title fields corresponding to the *_number fields when missing
2329         # in order to always have clean titles. This is very common for TV series.
2330         for field in ('chapter', 'season', 'episode'):
2331             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2332                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2333
2334     def _raise_pending_errors(self, info):
2335         err = info.pop('__pending_error', None)
2336         if err:
2337             self.report_error(err, tb=False)
2338
2339     def process_video_result(self, info_dict, download=True):
2340         assert info_dict.get('_type', 'video') == 'video'
2341         self._num_videos += 1
2342
2343         if 'id' not in info_dict:
2344             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2345         elif not info_dict.get('id'):
2346             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2347
2348         def report_force_conversion(field, field_not, conversion):
2349             self.report_warning(
2350                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2351                 % (field, field_not, conversion))
2352
2353         def sanitize_string_field(info, string_field):
2354             field = info.get(string_field)
2355             if field is None or isinstance(field, compat_str):
2356                 return
2357             report_force_conversion(string_field, 'a string', 'string')
2358             info[string_field] = compat_str(field)
2359
2360         def sanitize_numeric_fields(info):
2361             for numeric_field in self._NUMERIC_FIELDS:
2362                 field = info.get(numeric_field)
2363                 if field is None or isinstance(field, (int, float)):
2364                     continue
2365                 report_force_conversion(numeric_field, 'numeric', 'int')
2366                 info[numeric_field] = int_or_none(field)
2367
2368         sanitize_string_field(info_dict, 'id')
2369         sanitize_numeric_fields(info_dict)
2370         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2371             self.report_warning('"duration" field is negative, there is an error in extractor')
2372
2373         if 'playlist' not in info_dict:
2374             # It isn't part of a playlist
2375             info_dict['playlist'] = None
2376             info_dict['playlist_index'] = None
2377
2378         self._sanitize_thumbnails(info_dict)
2379
2380         thumbnail = info_dict.get('thumbnail')
2381         thumbnails = info_dict.get('thumbnails')
2382         if thumbnail:
2383             info_dict['thumbnail'] = sanitize_url(thumbnail)
2384         elif thumbnails:
2385             info_dict['thumbnail'] = thumbnails[-1]['url']
2386
2387         if info_dict.get('display_id') is None and 'id' in info_dict:
2388             info_dict['display_id'] = info_dict['id']
2389
2390         self._fill_common_fields(info_dict)
2391
2392         for cc_kind in ('subtitles', 'automatic_captions'):
2393             cc = info_dict.get(cc_kind)
2394             if cc:
2395                 for _, subtitle in cc.items():
2396                     for subtitle_format in subtitle:
2397                         if subtitle_format.get('url'):
2398                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2399                         if subtitle_format.get('ext') is None:
2400                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2401
2402         automatic_captions = info_dict.get('automatic_captions')
2403         subtitles = info_dict.get('subtitles')
2404
2405         info_dict['requested_subtitles'] = self.process_subtitles(
2406             info_dict['id'], subtitles, automatic_captions)
2407
2408         if info_dict.get('formats') is None:
2409             # There's only one format available
2410             formats = [info_dict]
2411         else:
2412             formats = info_dict['formats']
2413
2414         # or None ensures --clean-infojson removes it
2415         info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
2416         if not self.params.get('allow_unplayable_formats'):
2417             formats = [f for f in formats if not f.get('has_drm')]
2418             if info_dict['_has_drm'] and all(
2419                     f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2420                 self.report_warning(
2421                     'This video is DRM protected and only images are available for download. '
2422                     'Use --list-formats to see them')
2423
2424         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2425         if not get_from_start:
2426             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2427         if info_dict.get('is_live') and formats:
2428             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2429             if get_from_start and not formats:
2430                 self.raise_no_formats(info_dict, msg=(
2431                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2432                     'If you want to download from the current time, use --no-live-from-start'))
2433
2434         if not formats:
2435             self.raise_no_formats(info_dict)
2436
2437         def is_wellformed(f):
2438             url = f.get('url')
2439             if not url:
2440                 self.report_warning(
2441                     '"url" field is missing or empty - skipping format, '
2442                     'there is an error in extractor')
2443                 return False
2444             if isinstance(url, bytes):
2445                 sanitize_string_field(f, 'url')
2446             return True
2447
2448         # Filter out malformed formats for better extraction robustness
2449         formats = list(filter(is_wellformed, formats))
2450
2451         formats_dict = {}
2452
2453         # We check that all the formats have the format and format_id fields
2454         for i, format in enumerate(formats):
2455             sanitize_string_field(format, 'format_id')
2456             sanitize_numeric_fields(format)
2457             format['url'] = sanitize_url(format['url'])
2458             if not format.get('format_id'):
2459                 format['format_id'] = compat_str(i)
2460             else:
2461                 # Sanitize format_id from characters used in format selector expression
2462                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2463             format_id = format['format_id']
2464             if format_id not in formats_dict:
2465                 formats_dict[format_id] = []
2466             formats_dict[format_id].append(format)
2467
2468         # Make sure all formats have unique format_id
2469         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2470         for format_id, ambiguous_formats in formats_dict.items():
2471             ambigious_id = len(ambiguous_formats) > 1
2472             for i, format in enumerate(ambiguous_formats):
2473                 if ambigious_id:
2474                     format['format_id'] = '%s-%d' % (format_id, i)
2475                 if format.get('ext') is None:
2476                     format['ext'] = determine_ext(format['url']).lower()
2477                 # Ensure there is no conflict between id and ext in format selection
2478                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2479                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2480                     format['format_id'] = 'f%s' % format['format_id']
2481
2482         for i, format in enumerate(formats):
2483             if format.get('format') is None:
2484                 format['format'] = '{id} - {res}{note}'.format(
2485                     id=format['format_id'],
2486                     res=self.format_resolution(format),
2487                     note=format_field(format, 'format_note', ' (%s)'),
2488                 )
2489             if format.get('protocol') is None:
2490                 format['protocol'] = determine_protocol(format)
2491             if format.get('resolution') is None:
2492                 format['resolution'] = self.format_resolution(format, default=None)
2493             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2494                 format['dynamic_range'] = 'SDR'
2495             if (info_dict.get('duration') and format.get('tbr')
2496                     and not format.get('filesize') and not format.get('filesize_approx')):
2497                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2498
2499             # Add HTTP headers, so that external programs can use them from the
2500             # json output
2501             full_format_info = info_dict.copy()
2502             full_format_info.update(format)
2503             format['http_headers'] = self._calc_headers(full_format_info)
2504         # Remove private housekeeping stuff
2505         if '__x_forwarded_for_ip' in info_dict:
2506             del info_dict['__x_forwarded_for_ip']
2507
2508         if self.params.get('check_formats') is True:
2509             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2510
2511         if not formats or formats[0] is not info_dict:
2512             # only set the 'formats' fields if the original info_dict list them
2513             # otherwise we end up with a circular reference, the first (and unique)
2514             # element in the 'formats' field in info_dict is info_dict itself,
2515             # which can't be exported to json
2516             info_dict['formats'] = formats
2517
2518         info_dict, _ = self.pre_process(info_dict)
2519
2520         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2521             return info_dict
2522
2523         self.post_extract(info_dict)
2524         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2525
2526         # The pre-processors may have modified the formats
2527         formats = info_dict.get('formats', [info_dict])
2528
2529         list_only = self.params.get('simulate') is None and (
2530             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2531         interactive_format_selection = not list_only and self.format_selector == '-'
2532         if self.params.get('list_thumbnails'):
2533             self.list_thumbnails(info_dict)
2534         if self.params.get('listsubtitles'):
2535             if 'automatic_captions' in info_dict:
2536                 self.list_subtitles(
2537                     info_dict['id'], automatic_captions, 'automatic captions')
2538             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2539         if self.params.get('listformats') or interactive_format_selection:
2540             self.list_formats(info_dict)
2541         if list_only:
2542             # Without this printing, -F --print-json will not work
2543             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2544             return info_dict
2545
2546         format_selector = self.format_selector
2547         if format_selector is None:
2548             req_format = self._default_format_spec(info_dict, download=download)
2549             self.write_debug('Default format spec: %s' % req_format)
2550             format_selector = self.build_format_selector(req_format)
2551
2552         while True:
2553             if interactive_format_selection:
2554                 req_format = input(
2555                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2556                 try:
2557                     format_selector = self.build_format_selector(req_format)
2558                 except SyntaxError as err:
2559                     self.report_error(err, tb=False, is_error=False)
2560                     continue
2561
2562             formats_to_download = list(format_selector({
2563                 'formats': formats,
2564                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2565                 'incomplete_formats': (
2566                     # All formats are video-only or
2567                     all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2568                     # all formats are audio-only
2569                     or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2570             }))
2571             if interactive_format_selection and not formats_to_download:
2572                 self.report_error('Requested format is not available', tb=False, is_error=False)
2573                 continue
2574             break
2575
2576         if not formats_to_download:
2577             if not self.params.get('ignore_no_formats_error'):
2578                 raise ExtractorError(
2579                     'Requested format is not available. Use --list-formats for a list of available formats',
2580                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2581             self.report_warning('Requested format is not available')
2582             # Process what we can, even without any available formats.
2583             formats_to_download = [{}]
2584
2585         requested_ranges = self.params.get('download_ranges')
2586         if requested_ranges:
2587             requested_ranges = tuple(requested_ranges(info_dict, self))
2588
2589         best_format, downloaded_formats = formats_to_download[-1], []
2590         if download:
2591             if best_format:
2592                 def to_screen(*msg):
2593                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2594
2595                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2596                           (f['format_id'] for f in formats_to_download))
2597                 if requested_ranges:
2598                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2599                               (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
2600             max_downloads_reached = False
2601
2602             for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
2603                 new_info = self._copy_infodict(info_dict)
2604                 new_info.update(fmt)
2605                 if chapter:
2606                     new_info.update({
2607                         'section_start': chapter.get('start_time'),
2608                         'section_end': chapter.get('end_time', 0),
2609                         'section_title': chapter.get('title'),
2610                         'section_number': chapter.get('index'),
2611                     })
2612                 downloaded_formats.append(new_info)
2613                 try:
2614                     self.process_info(new_info)
2615                 except MaxDownloadsReached:
2616                     max_downloads_reached = True
2617                 self._raise_pending_errors(new_info)
2618                 # Remove copied info
2619                 for key, val in tuple(new_info.items()):
2620                     if info_dict.get(key) == val:
2621                         new_info.pop(key)
2622                 if max_downloads_reached:
2623                     break
2624
2625             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2626             assert write_archive.issubset({True, False, 'ignore'})
2627             if True in write_archive and False not in write_archive:
2628                 self.record_download_archive(info_dict)
2629
2630             info_dict['requested_downloads'] = downloaded_formats
2631             info_dict = self.run_all_pps('after_video', info_dict)
2632             if max_downloads_reached:
2633                 raise MaxDownloadsReached()
2634
2635         # We update the info dict with the selected best quality format (backwards compatibility)
2636         info_dict.update(best_format)
2637         return info_dict
2638
2639     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2640         """Select the requested subtitles and their format"""
2641         available_subs, normal_sub_langs = {}, []
2642         if normal_subtitles and self.params.get('writesubtitles'):
2643             available_subs.update(normal_subtitles)
2644             normal_sub_langs = tuple(normal_subtitles.keys())
2645         if automatic_captions and self.params.get('writeautomaticsub'):
2646             for lang, cap_info in automatic_captions.items():
2647                 if lang not in available_subs:
2648                     available_subs[lang] = cap_info
2649
2650         if (not self.params.get('writesubtitles') and not
2651                 self.params.get('writeautomaticsub') or not
2652                 available_subs):
2653             return None
2654
2655         all_sub_langs = tuple(available_subs.keys())
2656         if self.params.get('allsubtitles', False):
2657             requested_langs = all_sub_langs
2658         elif self.params.get('subtitleslangs', False):
2659             # A list is used so that the order of languages will be the same as
2660             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2661             requested_langs = []
2662             for lang_re in self.params.get('subtitleslangs'):
2663                 discard = lang_re[0] == '-'
2664                 if discard:
2665                     lang_re = lang_re[1:]
2666                 if lang_re == 'all':
2667                     if discard:
2668                         requested_langs = []
2669                     else:
2670                         requested_langs.extend(all_sub_langs)
2671                     continue
2672                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2673                 if discard:
2674                     for lang in current_langs:
2675                         while lang in requested_langs:
2676                             requested_langs.remove(lang)
2677                 else:
2678                     requested_langs.extend(current_langs)
2679             requested_langs = orderedSet(requested_langs)
2680         elif normal_sub_langs:
2681             requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
2682         else:
2683             requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
2684         if requested_langs:
2685             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2686
2687         formats_query = self.params.get('subtitlesformat', 'best')
2688         formats_preference = formats_query.split('/') if formats_query else []
2689         subs = {}
2690         for lang in requested_langs:
2691             formats = available_subs.get(lang)
2692             if formats is None:
2693                 self.report_warning(f'{lang} subtitles not available for {video_id}')
2694                 continue
2695             for ext in formats_preference:
2696                 if ext == 'best':
2697                     f = formats[-1]
2698                     break
2699                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2700                 if matches:
2701                     f = matches[-1]
2702                     break
2703             else:
2704                 f = formats[-1]
2705                 self.report_warning(
2706                     'No subtitle format found matching "%s" for language %s, '
2707                     'using %s' % (formats_query, lang, f['ext']))
2708             subs[lang] = f
2709         return subs
2710
2711     def _forceprint(self, key, info_dict):
2712         if info_dict is None:
2713             return
2714         info_copy = info_dict.copy()
2715         info_copy['formats_table'] = self.render_formats_table(info_dict)
2716         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2717         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2718         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2719
2720         def format_tmpl(tmpl):
2721             mobj = re.match(r'\w+(=?)$', tmpl)
2722             if mobj and mobj.group(1):
2723                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2724             elif mobj:
2725                 return f'%({tmpl})s'
2726             return tmpl
2727
2728         for tmpl in self.params['forceprint'].get(key, []):
2729             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2730
2731         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2732             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2733             tmpl = format_tmpl(tmpl)
2734             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2735             if self._ensure_dir_exists(filename):
2736                 with open(filename, 'a', encoding='utf-8') as f:
2737                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2738
2739     def __forced_printings(self, info_dict, filename, incomplete):
2740         def print_mandatory(field, actual_field=None):
2741             if actual_field is None:
2742                 actual_field = field
2743             if (self.params.get('force%s' % field, False)
2744                     and (not incomplete or info_dict.get(actual_field) is not None)):
2745                 self.to_stdout(info_dict[actual_field])
2746
2747         def print_optional(field):
2748             if (self.params.get('force%s' % field, False)
2749                     and info_dict.get(field) is not None):
2750                 self.to_stdout(info_dict[field])
2751
2752         info_dict = info_dict.copy()
2753         if filename is not None:
2754             info_dict['filename'] = filename
2755         if info_dict.get('requested_formats') is not None:
2756             # For RTMP URLs, also include the playpath
2757             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2758         elif info_dict.get('url'):
2759             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2760
2761         if (self.params.get('forcejson')
2762                 or self.params['forceprint'].get('video')
2763                 or self.params['print_to_file'].get('video')):
2764             self.post_extract(info_dict)
2765         self._forceprint('video', info_dict)
2766
2767         print_mandatory('title')
2768         print_mandatory('id')
2769         print_mandatory('url', 'urls')
2770         print_optional('thumbnail')
2771         print_optional('description')
2772         print_optional('filename')
2773         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2774             self.to_stdout(formatSeconds(info_dict['duration']))
2775         print_mandatory('format')
2776
2777         if self.params.get('forcejson'):
2778             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2779
2780     def dl(self, name, info, subtitle=False, test=False):
2781         if not info.get('url'):
2782             self.raise_no_formats(info, True)
2783
2784         if test:
2785             verbose = self.params.get('verbose')
2786             params = {
2787                 'test': True,
2788                 'quiet': self.params.get('quiet') or not verbose,
2789                 'verbose': verbose,
2790                 'noprogress': not verbose,
2791                 'nopart': True,
2792                 'skip_unavailable_fragments': False,
2793                 'keep_fragments': False,
2794                 'overwrites': True,
2795                 '_no_ytdl_file': True,
2796             }
2797         else:
2798             params = self.params
2799         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2800         if not test:
2801             for ph in self._progress_hooks:
2802                 fd.add_progress_hook(ph)
2803             urls = '", "'.join(
2804                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2805                 for f in info.get('requested_formats', []) or [info])
2806             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
2807
2808         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2809         # But it may contain objects that are not deep-copyable
2810         new_info = self._copy_infodict(info)
2811         if new_info.get('http_headers') is None:
2812             new_info['http_headers'] = self._calc_headers(new_info)
2813         return fd.download(name, new_info, subtitle)
2814
2815     def existing_file(self, filepaths, *, default_overwrite=True):
2816         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2817         if existing_files and not self.params.get('overwrites', default_overwrite):
2818             return existing_files[0]
2819
2820         for file in existing_files:
2821             self.report_file_delete(file)
2822             os.remove(file)
2823         return None
2824
2825     def process_info(self, info_dict):
2826         """Process a single resolved IE result. (Modifies it in-place)"""
2827
2828         assert info_dict.get('_type', 'video') == 'video'
2829         original_infodict = info_dict
2830
2831         if 'format' not in info_dict and 'ext' in info_dict:
2832             info_dict['format'] = info_dict['ext']
2833
2834         # This is mostly just for backward compatibility of process_info
2835         # As a side-effect, this allows for format-specific filters
2836         if self._match_entry(info_dict) is not None:
2837             info_dict['__write_download_archive'] = 'ignore'
2838             return
2839
2840         # Does nothing under normal operation - for backward compatibility of process_info
2841         self.post_extract(info_dict)
2842         self._num_downloads += 1
2843
2844         # info_dict['_filename'] needs to be set for backward compatibility
2845         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2846         temp_filename = self.prepare_filename(info_dict, 'temp')
2847         files_to_move = {}
2848
2849         # Forced printings
2850         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2851
2852         def check_max_downloads():
2853             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
2854                 raise MaxDownloadsReached()
2855
2856         if self.params.get('simulate'):
2857             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2858             check_max_downloads()
2859             return
2860
2861         if full_filename is None:
2862             return
2863         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2864             return
2865         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2866             return
2867
2868         if self._write_description('video', info_dict,
2869                                    self.prepare_filename(info_dict, 'description')) is None:
2870             return
2871
2872         sub_files = self._write_subtitles(info_dict, temp_filename)
2873         if sub_files is None:
2874             return
2875         files_to_move.update(dict(sub_files))
2876
2877         thumb_files = self._write_thumbnails(
2878             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2879         if thumb_files is None:
2880             return
2881         files_to_move.update(dict(thumb_files))
2882
2883         infofn = self.prepare_filename(info_dict, 'infojson')
2884         _infojson_written = self._write_info_json('video', info_dict, infofn)
2885         if _infojson_written:
2886             info_dict['infojson_filename'] = infofn
2887             # For backward compatibility, even though it was a private field
2888             info_dict['__infojson_filename'] = infofn
2889         elif _infojson_written is None:
2890             return
2891
2892         # Note: Annotations are deprecated
2893         annofn = None
2894         if self.params.get('writeannotations', False):
2895             annofn = self.prepare_filename(info_dict, 'annotation')
2896         if annofn:
2897             if not self._ensure_dir_exists(encodeFilename(annofn)):
2898                 return
2899             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2900                 self.to_screen('[info] Video annotations are already present')
2901             elif not info_dict.get('annotations'):
2902                 self.report_warning('There are no annotations to write.')
2903             else:
2904                 try:
2905                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2906                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2907                         annofile.write(info_dict['annotations'])
2908                 except (KeyError, TypeError):
2909                     self.report_warning('There are no annotations to write.')
2910                 except OSError:
2911                     self.report_error('Cannot write annotations file: ' + annofn)
2912                     return
2913
2914         # Write internet shortcut files
2915         def _write_link_file(link_type):
2916             url = try_get(info_dict['webpage_url'], iri_to_uri)
2917             if not url:
2918                 self.report_warning(
2919                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2920                 return True
2921             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2922             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2923                 return False
2924             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2925                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2926                 return True
2927             try:
2928                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2929                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2930                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2931                     template_vars = {'url': url}
2932                     if link_type == 'desktop':
2933                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2934                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2935             except OSError:
2936                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2937                 return False
2938             return True
2939
2940         write_links = {
2941             'url': self.params.get('writeurllink'),
2942             'webloc': self.params.get('writewebloclink'),
2943             'desktop': self.params.get('writedesktoplink'),
2944         }
2945         if self.params.get('writelink'):
2946             link_type = ('webloc' if sys.platform == 'darwin'
2947                          else 'desktop' if sys.platform.startswith('linux')
2948                          else 'url')
2949             write_links[link_type] = True
2950
2951         if any(should_write and not _write_link_file(link_type)
2952                for link_type, should_write in write_links.items()):
2953             return
2954
2955         def replace_info_dict(new_info):
2956             nonlocal info_dict
2957             if new_info == info_dict:
2958                 return
2959             info_dict.clear()
2960             info_dict.update(new_info)
2961
2962         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2963         replace_info_dict(new_info)
2964
2965         if self.params.get('skip_download'):
2966             info_dict['filepath'] = temp_filename
2967             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2968             info_dict['__files_to_move'] = files_to_move
2969             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2970             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2971         else:
2972             # Download
2973             info_dict.setdefault('__postprocessors', [])
2974             try:
2975
2976                 def existing_video_file(*filepaths):
2977                     ext = info_dict.get('ext')
2978                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2979                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2980                                               default_overwrite=False)
2981                     if file:
2982                         info_dict['ext'] = os.path.splitext(file)[1][1:]
2983                     return file
2984
2985                 success = True
2986                 merger, fd = FFmpegMergerPP(self), None
2987                 if info_dict.get('protocol') or info_dict.get('url'):
2988                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2989                     if fd is not FFmpegFD and (
2990                             info_dict.get('section_start') or info_dict.get('section_end')):
2991                         msg = ('This format cannot be partially downloaded' if merger.available
2992                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
2993                         self.report_error(f'{msg}. Aborting')
2994                         return
2995
2996                 if info_dict.get('requested_formats') is not None:
2997
2998                     def compatible_formats(formats):
2999                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3000                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
3001                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
3002                         if len(video_formats) > 2 or len(audio_formats) > 2:
3003                             return False
3004
3005                         # Check extension
3006                         exts = {format.get('ext') for format in formats}
3007                         COMPATIBLE_EXTS = (
3008                             {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
3009                             {'webm'},
3010                         )
3011                         for ext_sets in COMPATIBLE_EXTS:
3012                             if ext_sets.issuperset(exts):
3013                                 return True
3014                         # TODO: Check acodec/vcodec
3015                         return False
3016
3017                     requested_formats = info_dict['requested_formats']
3018                     old_ext = info_dict['ext']
3019                     if self.params.get('merge_output_format') is None:
3020                         if not compatible_formats(requested_formats):
3021                             info_dict['ext'] = 'mkv'
3022                             self.report_warning(
3023                                 'Requested formats are incompatible for merge and will be merged into mkv')
3024                         if (info_dict['ext'] == 'webm'
3025                                 and info_dict.get('thumbnails')
3026                                 # check with type instead of pp_key, __name__, or isinstance
3027                                 # since we dont want any custom PPs to trigger this
3028                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3029                             info_dict['ext'] = 'mkv'
3030                             self.report_warning(
3031                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3032                     new_ext = info_dict['ext']
3033
3034                     def correct_ext(filename, ext=new_ext):
3035                         if filename == '-':
3036                             return filename
3037                         filename_real_ext = os.path.splitext(filename)[1][1:]
3038                         filename_wo_ext = (
3039                             os.path.splitext(filename)[0]
3040                             if filename_real_ext in (old_ext, new_ext)
3041                             else filename)
3042                         return f'{filename_wo_ext}.{ext}'
3043
3044                     # Ensure filename always has a correct extension for successful merge
3045                     full_filename = correct_ext(full_filename)
3046                     temp_filename = correct_ext(temp_filename)
3047                     dl_filename = existing_video_file(full_filename, temp_filename)
3048                     info_dict['__real_download'] = False
3049
3050                     downloaded = []
3051                     if dl_filename is not None:
3052                         self.report_file_already_downloaded(dl_filename)
3053                     elif fd:
3054                         for f in requested_formats if fd != FFmpegFD else []:
3055                             f['filepath'] = fname = prepend_extension(
3056                                 correct_ext(temp_filename, info_dict['ext']),
3057                                 'f%s' % f['format_id'], info_dict['ext'])
3058                             downloaded.append(fname)
3059                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3060                         success, real_download = self.dl(temp_filename, info_dict)
3061                         info_dict['__real_download'] = real_download
3062                     else:
3063                         if self.params.get('allow_unplayable_formats'):
3064                             self.report_warning(
3065                                 'You have requested merging of multiple formats '
3066                                 'while also allowing unplayable formats to be downloaded. '
3067                                 'The formats won\'t be merged to prevent data corruption.')
3068                         elif not merger.available:
3069                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3070                             if not self.params.get('ignoreerrors'):
3071                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3072                                 return
3073                             self.report_warning(f'{msg}. The formats won\'t be merged')
3074
3075                         if temp_filename == '-':
3076                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3077                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3078                                       else 'but ffmpeg is not installed')
3079                             self.report_warning(
3080                                 f'You have requested downloading multiple formats to stdout {reason}. '
3081                                 'The formats will be streamed one after the other')
3082                             fname = temp_filename
3083                         for f in requested_formats:
3084                             new_info = dict(info_dict)
3085                             del new_info['requested_formats']
3086                             new_info.update(f)
3087                             if temp_filename != '-':
3088                                 fname = prepend_extension(
3089                                     correct_ext(temp_filename, new_info['ext']),
3090                                     'f%s' % f['format_id'], new_info['ext'])
3091                                 if not self._ensure_dir_exists(fname):
3092                                     return
3093                                 f['filepath'] = fname
3094                                 downloaded.append(fname)
3095                             partial_success, real_download = self.dl(fname, new_info)
3096                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3097                             success = success and partial_success
3098
3099                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3100                         info_dict['__postprocessors'].append(merger)
3101                         info_dict['__files_to_merge'] = downloaded
3102                         # Even if there were no downloads, it is being merged only now
3103                         info_dict['__real_download'] = True
3104                     else:
3105                         for file in downloaded:
3106                             files_to_move[file] = None
3107                 else:
3108                     # Just a single file
3109                     dl_filename = existing_video_file(full_filename, temp_filename)
3110                     if dl_filename is None or dl_filename == temp_filename:
3111                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3112                         # So we should try to resume the download
3113                         success, real_download = self.dl(temp_filename, info_dict)
3114                         info_dict['__real_download'] = real_download
3115                     else:
3116                         self.report_file_already_downloaded(dl_filename)
3117
3118                 dl_filename = dl_filename or temp_filename
3119                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3120
3121             except network_exceptions as err:
3122                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3123                 return
3124             except OSError as err:
3125                 raise UnavailableVideoError(err)
3126             except (ContentTooShortError, ) as err:
3127                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3128                 return
3129
3130             self._raise_pending_errors(info_dict)
3131             if success and full_filename != '-':
3132
3133                 def fixup():
3134                     do_fixup = True
3135                     fixup_policy = self.params.get('fixup')
3136                     vid = info_dict['id']
3137
3138                     if fixup_policy in ('ignore', 'never'):
3139                         return
3140                     elif fixup_policy == 'warn':
3141                         do_fixup = 'warn'
3142                     elif fixup_policy != 'force':
3143                         assert fixup_policy in ('detect_or_warn', None)
3144                         if not info_dict.get('__real_download'):
3145                             do_fixup = False
3146
3147                     def ffmpeg_fixup(cndn, msg, cls):
3148                         if not (do_fixup and cndn):
3149                             return
3150                         elif do_fixup == 'warn':
3151                             self.report_warning(f'{vid}: {msg}')
3152                             return
3153                         pp = cls(self)
3154                         if pp.available:
3155                             info_dict['__postprocessors'].append(pp)
3156                         else:
3157                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3158
3159                     stretched_ratio = info_dict.get('stretched_ratio')
3160                     ffmpeg_fixup(
3161                         stretched_ratio not in (1, None),
3162                         f'Non-uniform pixel ratio {stretched_ratio}',
3163                         FFmpegFixupStretchedPP)
3164
3165                     ffmpeg_fixup(
3166                         (info_dict.get('requested_formats') is None
3167                          and info_dict.get('container') == 'm4a_dash'
3168                          and info_dict.get('ext') == 'm4a'),
3169                         'writing DASH m4a. Only some players support this container',
3170                         FFmpegFixupM4aPP)
3171
3172                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3173                     downloader = downloader.FD_NAME if downloader else None
3174
3175                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3176                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3177                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3178                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3179                                      FFmpegFixupM3u8PP)
3180                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3181                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3182
3183                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3184                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3185
3186                 fixup()
3187                 try:
3188                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3189                 except PostProcessingError as err:
3190                     self.report_error('Postprocessing: %s' % str(err))
3191                     return
3192                 try:
3193                     for ph in self._post_hooks:
3194                         ph(info_dict['filepath'])
3195                 except Exception as err:
3196                     self.report_error('post hooks: %s' % str(err))
3197                     return
3198                 info_dict['__write_download_archive'] = True
3199
3200         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3201         if self.params.get('force_write_download_archive'):
3202             info_dict['__write_download_archive'] = True
3203         check_max_downloads()
3204
3205     def __download_wrapper(self, func):
3206         @functools.wraps(func)
3207         def wrapper(*args, **kwargs):
3208             try:
3209                 res = func(*args, **kwargs)
3210             except UnavailableVideoError as e:
3211                 self.report_error(e)
3212             except DownloadCancelled as e:
3213                 self.to_screen(f'[info] {e}')
3214                 if not self.params.get('break_per_url'):
3215                     raise
3216             else:
3217                 if self.params.get('dump_single_json', False):
3218                     self.post_extract(res)
3219                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3220         return wrapper
3221
3222     def download(self, url_list):
3223         """Download a given list of URLs."""
3224         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3225         outtmpl = self.params['outtmpl']['default']
3226         if (len(url_list) > 1
3227                 and outtmpl != '-'
3228                 and '%' not in outtmpl
3229                 and self.params.get('max_downloads') != 1):
3230             raise SameFileError(outtmpl)
3231
3232         for url in url_list:
3233             self.__download_wrapper(self.extract_info)(
3234                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3235
3236         return self._download_retcode
3237
3238     def download_with_info_file(self, info_filename):
3239         with contextlib.closing(fileinput.FileInput(
3240                 [info_filename], mode='r',
3241                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3242             # FileInput doesn't have a read method, we can't call json.load
3243             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3244         try:
3245             self.__download_wrapper(self.process_ie_result)(info, download=True)
3246         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3247             if not isinstance(e, EntryNotInPlaylist):
3248                 self.to_stderr('\r')
3249             webpage_url = info.get('webpage_url')
3250             if webpage_url is not None:
3251                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3252                 return self.download([webpage_url])
3253             else:
3254                 raise
3255         return self._download_retcode
3256
3257     @staticmethod
3258     def sanitize_info(info_dict, remove_private_keys=False):
3259         ''' Sanitize the infodict for converting to json '''
3260         if info_dict is None:
3261             return info_dict
3262         info_dict.setdefault('epoch', int(time.time()))
3263         info_dict.setdefault('_type', 'video')
3264
3265         if remove_private_keys:
3266             reject = lambda k, v: v is None or k.startswith('__') or k in {
3267                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3268                 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3269             }
3270         else:
3271             reject = lambda k, v: False
3272
3273         def filter_fn(obj):
3274             if isinstance(obj, dict):
3275                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3276             elif isinstance(obj, (list, tuple, set, LazyList)):
3277                 return list(map(filter_fn, obj))
3278             elif obj is None or isinstance(obj, (str, int, float, bool)):
3279                 return obj
3280             else:
3281                 return repr(obj)
3282
3283         return filter_fn(info_dict)
3284
3285     @staticmethod
3286     def filter_requested_info(info_dict, actually_filter=True):
3287         ''' Alias of sanitize_info for backward compatibility '''
3288         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3289
3290     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3291         for filename in set(filter(None, files_to_delete)):
3292             if msg:
3293                 self.to_screen(msg % filename)
3294             try:
3295                 os.remove(filename)
3296             except OSError:
3297                 self.report_warning(f'Unable to delete file {filename}')
3298             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3299                 del info['__files_to_move'][filename]
3300
3301     @staticmethod
3302     def post_extract(info_dict):
3303         def actual_post_extract(info_dict):
3304             if info_dict.get('_type') in ('playlist', 'multi_video'):
3305                 for video_dict in info_dict.get('entries', {}):
3306                     actual_post_extract(video_dict or {})
3307                 return
3308
3309             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3310             info_dict.update(post_extractor())
3311
3312         actual_post_extract(info_dict or {})
3313
3314     def run_pp(self, pp, infodict):
3315         files_to_delete = []
3316         if '__files_to_move' not in infodict:
3317             infodict['__files_to_move'] = {}
3318         try:
3319             files_to_delete, infodict = pp.run(infodict)
3320         except PostProcessingError as e:
3321             # Must be True and not 'only_download'
3322             if self.params.get('ignoreerrors') is True:
3323                 self.report_error(e)
3324                 return infodict
3325             raise
3326
3327         if not files_to_delete:
3328             return infodict
3329         if self.params.get('keepvideo', False):
3330             for f in files_to_delete:
3331                 infodict['__files_to_move'].setdefault(f, '')
3332         else:
3333             self._delete_downloaded_files(
3334                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3335         return infodict
3336
3337     def run_all_pps(self, key, info, *, additional_pps=None):
3338         self._forceprint(key, info)
3339         for pp in (additional_pps or []) + self._pps[key]:
3340             info = self.run_pp(pp, info)
3341         return info
3342
3343     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3344         info = dict(ie_info)
3345         info['__files_to_move'] = files_to_move or {}
3346         try:
3347             info = self.run_all_pps(key, info)
3348         except PostProcessingError as err:
3349             msg = f'Preprocessing: {err}'
3350             info.setdefault('__pending_error', msg)
3351             self.report_error(msg, is_error=False)
3352         return info, info.pop('__files_to_move', None)
3353
3354     def post_process(self, filename, info, files_to_move=None):
3355         """Run all the postprocessors on the given file."""
3356         info['filepath'] = filename
3357         info['__files_to_move'] = files_to_move or {}
3358         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3359         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3360         del info['__files_to_move']
3361         return self.run_all_pps('after_move', info)
3362
3363     def _make_archive_id(self, info_dict):
3364         video_id = info_dict.get('id')
3365         if not video_id:
3366             return
3367         # Future-proof against any change in case
3368         # and backwards compatibility with prior versions
3369         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3370         if extractor is None:
3371             url = str_or_none(info_dict.get('url'))
3372             if not url:
3373                 return
3374             # Try to find matching extractor for the URL and take its ie_key
3375             for ie_key, ie in self._ies.items():
3376                 if ie.suitable(url):
3377                     extractor = ie_key
3378                     break
3379             else:
3380                 return
3381         return f'{extractor.lower()} {video_id}'
3382
3383     def in_download_archive(self, info_dict):
3384         fn = self.params.get('download_archive')
3385         if fn is None:
3386             return False
3387
3388         vid_id = self._make_archive_id(info_dict)
3389         if not vid_id:
3390             return False  # Incomplete video information
3391
3392         return vid_id in self.archive
3393
3394     def record_download_archive(self, info_dict):
3395         fn = self.params.get('download_archive')
3396         if fn is None:
3397             return
3398         vid_id = self._make_archive_id(info_dict)
3399         assert vid_id
3400         self.write_debug(f'Adding to archive: {vid_id}')
3401         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3402             archive_file.write(vid_id + '\n')
3403         self.archive.add(vid_id)
3404
3405     @staticmethod
3406     def format_resolution(format, default='unknown'):
3407         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3408             return 'audio only'
3409         if format.get('resolution') is not None:
3410             return format['resolution']
3411         if format.get('width') and format.get('height'):
3412             return '%dx%d' % (format['width'], format['height'])
3413         elif format.get('height'):
3414             return '%sp' % format['height']
3415         elif format.get('width'):
3416             return '%dx?' % format['width']
3417         return default
3418
3419     def _list_format_headers(self, *headers):
3420         if self.params.get('listformats_table', True) is not False:
3421             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3422         return headers
3423
3424     def _format_note(self, fdict):
3425         res = ''
3426         if fdict.get('ext') in ['f4f', 'f4m']:
3427             res += '(unsupported)'
3428         if fdict.get('language'):
3429             if res:
3430                 res += ' '
3431             res += '[%s]' % fdict['language']
3432         if fdict.get('format_note') is not None:
3433             if res:
3434                 res += ' '
3435             res += fdict['format_note']
3436         if fdict.get('tbr') is not None:
3437             if res:
3438                 res += ', '
3439             res += '%4dk' % fdict['tbr']
3440         if fdict.get('container') is not None:
3441             if res:
3442                 res += ', '
3443             res += '%s container' % fdict['container']
3444         if (fdict.get('vcodec') is not None
3445                 and fdict.get('vcodec') != 'none'):
3446             if res:
3447                 res += ', '
3448             res += fdict['vcodec']
3449             if fdict.get('vbr') is not None:
3450                 res += '@'
3451         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3452             res += 'video@'
3453         if fdict.get('vbr') is not None:
3454             res += '%4dk' % fdict['vbr']
3455         if fdict.get('fps') is not None:
3456             if res:
3457                 res += ', '
3458             res += '%sfps' % fdict['fps']
3459         if fdict.get('acodec') is not None:
3460             if res:
3461                 res += ', '
3462             if fdict['acodec'] == 'none':
3463                 res += 'video only'
3464             else:
3465                 res += '%-5s' % fdict['acodec']
3466         elif fdict.get('abr') is not None:
3467             if res:
3468                 res += ', '
3469             res += 'audio'
3470         if fdict.get('abr') is not None:
3471             res += '@%3dk' % fdict['abr']
3472         if fdict.get('asr') is not None:
3473             res += ' (%5dHz)' % fdict['asr']
3474         if fdict.get('filesize') is not None:
3475             if res:
3476                 res += ', '
3477             res += format_bytes(fdict['filesize'])
3478         elif fdict.get('filesize_approx') is not None:
3479             if res:
3480                 res += ', '
3481             res += '~' + format_bytes(fdict['filesize_approx'])
3482         return res
3483
3484     def render_formats_table(self, info_dict):
3485         if not info_dict.get('formats') and not info_dict.get('url'):
3486             return None
3487
3488         formats = info_dict.get('formats', [info_dict])
3489         if not self.params.get('listformats_table', True) is not False:
3490             table = [
3491                 [
3492                     format_field(f, 'format_id'),
3493                     format_field(f, 'ext'),
3494                     self.format_resolution(f),
3495                     self._format_note(f)
3496                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3497             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3498
3499         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3500         table = [
3501             [
3502                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3503                 format_field(f, 'ext'),
3504                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3505                 format_field(f, 'fps', '\t%d'),
3506                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3507                 delim,
3508                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3509                 format_field(f, 'tbr', '\t%dk'),
3510                 shorten_protocol_name(f.get('protocol', '')),
3511                 delim,
3512                 format_field(f, 'vcodec', default='unknown').replace(
3513                     'none', 'images' if f.get('acodec') == 'none'
3514                             else self._format_out('audio only', self.Styles.SUPPRESS)),
3515                 format_field(f, 'vbr', '\t%dk'),
3516                 format_field(f, 'acodec', default='unknown').replace(
3517                     'none', '' if f.get('vcodec') == 'none'
3518                             else self._format_out('video only', self.Styles.SUPPRESS)),
3519                 format_field(f, 'abr', '\t%dk'),
3520                 format_field(f, 'asr', '\t%dHz'),
3521                 join_nonempty(
3522                     self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3523                     format_field(f, 'language', '[%s]'),
3524                     join_nonempty(format_field(f, 'format_note'),
3525                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3526                                   delim=', '),
3527                     delim=' '),
3528             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3529         header_line = self._list_format_headers(
3530             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3531             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3532
3533         return render_table(
3534             header_line, table, hide_empty=True,
3535             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3536
3537     def render_thumbnails_table(self, info_dict):
3538         thumbnails = list(info_dict.get('thumbnails') or [])
3539         if not thumbnails:
3540             return None
3541         return render_table(
3542             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3543             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3544
3545     def render_subtitles_table(self, video_id, subtitles):
3546         def _row(lang, formats):
3547             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3548             if len(set(names)) == 1:
3549                 names = [] if names[0] == 'unknown' else names[:1]
3550             return [lang, ', '.join(names), ', '.join(exts)]
3551
3552         if not subtitles:
3553             return None
3554         return render_table(
3555             self._list_format_headers('Language', 'Name', 'Formats'),
3556             [_row(lang, formats) for lang, formats in subtitles.items()],
3557             hide_empty=True)
3558
3559     def __list_table(self, video_id, name, func, *args):
3560         table = func(*args)
3561         if not table:
3562             self.to_screen(f'{video_id} has no {name}')
3563             return
3564         self.to_screen(f'[info] Available {name} for {video_id}:')
3565         self.to_stdout(table)
3566
3567     def list_formats(self, info_dict):
3568         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3569
3570     def list_thumbnails(self, info_dict):
3571         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3572
3573     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3574         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3575
3576     def urlopen(self, req):
3577         """ Start an HTTP download """
3578         if isinstance(req, str):
3579             req = sanitized_Request(req)
3580         return self._opener.open(req, timeout=self._socket_timeout)
3581
3582     def print_debug_header(self):
3583         if not self.params.get('verbose'):
3584             return
3585
3586         # These imports can be slow. So import them only as needed
3587         from .extractor.extractors import _LAZY_LOADER
3588         from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3589
3590         def get_encoding(stream):
3591             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3592             if not supports_terminal_sequences(stream):
3593                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3594                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3595             return ret
3596
3597         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3598             locale.getpreferredencoding(),
3599             sys.getfilesystemencoding(),
3600             self.get_encoding(),
3601             ', '.join(
3602                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3603                 if stream is not None and key != 'console')
3604         )
3605
3606         logger = self.params.get('logger')
3607         if logger:
3608             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3609             write_debug(encoding_str)
3610         else:
3611             write_string(f'[debug] {encoding_str}\n', encoding=None)
3612             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3613
3614         source = detect_variant()
3615         write_debug(join_nonempty(
3616             'yt-dlp version', __version__,
3617             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3618             '' if source == 'unknown' else f'({source})',
3619             delim=' '))
3620         if not _LAZY_LOADER:
3621             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3622                 write_debug('Lazy loading extractors is forcibly disabled')
3623             else:
3624                 write_debug('Lazy loading extractors is disabled')
3625         if plugin_extractors or plugin_postprocessors:
3626             write_debug('Plugins: %s' % [
3627                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3628                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3629         if self.params['compat_opts']:
3630             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3631
3632         if source == 'source':
3633             try:
3634                 stdout, _, _ = Popen.run(
3635                     ['git', 'rev-parse', '--short', 'HEAD'],
3636                     text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
3637                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3638                 if re.fullmatch('[0-9a-f]+', stdout.strip()):
3639                     write_debug(f'Git HEAD: {stdout.strip()}')
3640             except Exception:
3641                 with contextlib.suppress(Exception):
3642                     sys.exc_clear()
3643
3644         def python_implementation():
3645             impl_name = platform.python_implementation()
3646             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3647                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3648             return impl_name
3649
3650         write_debug('Python version %s (%s %s) - %s' % (
3651             platform.python_version(),
3652             python_implementation(),
3653             platform.architecture()[0],
3654             platform_name()))
3655
3656         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3657         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3658         if ffmpeg_features:
3659             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3660
3661         exe_versions['rtmpdump'] = rtmpdump_version()
3662         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3663         exe_str = ', '.join(
3664             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3665         ) or 'none'
3666         write_debug('exe versions: %s' % exe_str)
3667
3668         from .compat.compat_utils import get_package_info
3669         from .dependencies import available_dependencies
3670
3671         write_debug('Optional libraries: %s' % (', '.join(sorted({
3672             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3673         })) or 'none'))
3674
3675         self._setup_opener()
3676         proxy_map = {}
3677         for handler in self._opener.handlers:
3678             if hasattr(handler, 'proxies'):
3679                 proxy_map.update(handler.proxies)
3680         write_debug(f'Proxy map: {proxy_map}')
3681
3682         # Not implemented
3683         if False and self.params.get('call_home'):
3684             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3685             write_debug('Public IP address: %s' % ipaddr)
3686             latest_version = self.urlopen(
3687                 'https://yt-dl.org/latest/version').read().decode()
3688             if version_tuple(latest_version) > version_tuple(__version__):
3689                 self.report_warning(
3690                     'You are using an outdated version (newest version: %s)! '
3691                     'See https://yt-dl.org/update if you need help updating.' %
3692                     latest_version)
3693
3694     def _setup_opener(self):
3695         if hasattr(self, '_opener'):
3696             return
3697         timeout_val = self.params.get('socket_timeout')
3698         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3699
3700         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3701         opts_cookiefile = self.params.get('cookiefile')
3702         opts_proxy = self.params.get('proxy')
3703
3704         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3705
3706         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3707         if opts_proxy is not None:
3708             if opts_proxy == '':
3709                 proxies = {}
3710             else:
3711                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3712         else:
3713             proxies = compat_urllib_request.getproxies()
3714             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3715             if 'http' in proxies and 'https' not in proxies:
3716                 proxies['https'] = proxies['http']
3717         proxy_handler = PerRequestProxyHandler(proxies)
3718
3719         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3720         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3721         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3722         redirect_handler = YoutubeDLRedirectHandler()
3723         data_handler = urllib.request.DataHandler()
3724
3725         # When passing our own FileHandler instance, build_opener won't add the
3726         # default FileHandler and allows us to disable the file protocol, which
3727         # can be used for malicious purposes (see
3728         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3729         file_handler = compat_urllib_request.FileHandler()
3730
3731         def file_open(*args, **kwargs):
3732             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3733         file_handler.file_open = file_open
3734
3735         opener = compat_urllib_request.build_opener(
3736             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3737
3738         # Delete the default user-agent header, which would otherwise apply in
3739         # cases where our custom HTTP handler doesn't come into play
3740         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3741         opener.addheaders = []
3742         self._opener = opener
3743
3744     def encode(self, s):
3745         if isinstance(s, bytes):
3746             return s  # Already encoded
3747
3748         try:
3749             return s.encode(self.get_encoding())
3750         except UnicodeEncodeError as err:
3751             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3752             raise
3753
3754     def get_encoding(self):
3755         encoding = self.params.get('encoding')
3756         if encoding is None:
3757             encoding = preferredencoding()
3758         return encoding
3759
3760     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3761         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3762         if overwrite is None:
3763             overwrite = self.params.get('overwrites', True)
3764         if not self.params.get('writeinfojson'):
3765             return False
3766         elif not infofn:
3767             self.write_debug(f'Skipping writing {label} infojson')
3768             return False
3769         elif not self._ensure_dir_exists(infofn):
3770             return None
3771         elif not overwrite and os.path.exists(infofn):
3772             self.to_screen(f'[info] {label.title()} metadata is already present')
3773             return 'exists'
3774
3775         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3776         try:
3777             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3778             return True
3779         except OSError:
3780             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3781             return None
3782
3783     def _write_description(self, label, ie_result, descfn):
3784         ''' Write description and returns True = written, False = skip, None = error '''
3785         if not self.params.get('writedescription'):
3786             return False
3787         elif not descfn:
3788             self.write_debug(f'Skipping writing {label} description')
3789             return False
3790         elif not self._ensure_dir_exists(descfn):
3791             return None
3792         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3793             self.to_screen(f'[info] {label.title()} description is already present')
3794         elif ie_result.get('description') is None:
3795             self.report_warning(f'There\'s no {label} description to write')
3796             return False
3797         else:
3798             try:
3799                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3800                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3801                     descfile.write(ie_result['description'])
3802             except OSError:
3803                 self.report_error(f'Cannot write {label} description file {descfn}')
3804                 return None
3805         return True
3806
3807     def _write_subtitles(self, info_dict, filename):
3808         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3809         ret = []
3810         subtitles = info_dict.get('requested_subtitles')
3811         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3812             # subtitles download errors are already managed as troubles in relevant IE
3813             # that way it will silently go on when used with unsupporting IE
3814             return ret
3815
3816         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3817         if not sub_filename_base:
3818             self.to_screen('[info] Skipping writing video subtitles')
3819             return ret
3820         for sub_lang, sub_info in subtitles.items():
3821             sub_format = sub_info['ext']
3822             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3823             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3824             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3825             if existing_sub:
3826                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3827                 sub_info['filepath'] = existing_sub
3828                 ret.append((existing_sub, sub_filename_final))
3829                 continue
3830
3831             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3832             if sub_info.get('data') is not None:
3833                 try:
3834                     # Use newline='' to prevent conversion of newline characters
3835                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3836                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3837                         subfile.write(sub_info['data'])
3838                     sub_info['filepath'] = sub_filename
3839                     ret.append((sub_filename, sub_filename_final))
3840                     continue
3841                 except OSError:
3842                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3843                     return None
3844
3845             try:
3846                 sub_copy = sub_info.copy()
3847                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3848                 self.dl(sub_filename, sub_copy, subtitle=True)
3849                 sub_info['filepath'] = sub_filename
3850                 ret.append((sub_filename, sub_filename_final))
3851             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3852                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3853                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3854                     if not self.params.get('ignoreerrors'):
3855                         self.report_error(msg)
3856                     raise DownloadError(msg)
3857                 self.report_warning(msg)
3858         return ret
3859
3860     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3861         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3862         write_all = self.params.get('write_all_thumbnails', False)
3863         thumbnails, ret = [], []
3864         if write_all or self.params.get('writethumbnail', False):
3865             thumbnails = info_dict.get('thumbnails') or []
3866         multiple = write_all and len(thumbnails) > 1
3867
3868         if thumb_filename_base is None:
3869             thumb_filename_base = filename
3870         if thumbnails and not thumb_filename_base:
3871             self.write_debug(f'Skipping writing {label} thumbnail')
3872             return ret
3873
3874         for idx, t in list(enumerate(thumbnails))[::-1]:
3875             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3876             thumb_display_id = f'{label} thumbnail {t["id"]}'
3877             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3878             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3879
3880             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3881             if existing_thumb:
3882                 self.to_screen('[info] %s is already present' % (
3883                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3884                 t['filepath'] = existing_thumb
3885                 ret.append((existing_thumb, thumb_filename_final))
3886             else:
3887                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3888                 try:
3889                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3890                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3891                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3892                         shutil.copyfileobj(uf, thumbf)
3893                     ret.append((thumb_filename, thumb_filename_final))
3894                     t['filepath'] = thumb_filename
3895                 except network_exceptions as err:
3896                     thumbnails.pop(idx)
3897                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3898             if ret and not write_all:
3899                 break
3900         return ret