yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 import collections
   3 import contextlib
   4 import datetime
   5 import errno
   6 import fileinput
   7 import functools
   8 import io
   9 import itertools
  10 import json
  11 import locale
  12 import operator
  13 import os
  14 import platform
  15 import random
  16 import re
  17 import shutil
  18 import subprocess
  19 import sys
  20 import tempfile
  21 import time
  22 import tokenize
  23 import traceback
  24 import unicodedata
  25 import urllib.request
  26 from string import ascii_letters
  27
  28 from .cache import Cache
  29 from .compat import (
  30     HAS_LEGACY as compat_has_legacy,
  31     compat_get_terminal_size,
  32     compat_os_name,
  33     compat_shlex_quote,
  34     compat_str,
  35     compat_urllib_error,
  36     compat_urllib_request,
  37 )
  38 from .cookies import load_cookies
  39 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  40 from .downloader.rtmp import rtmpdump_version
  41 from .extractor import gen_extractor_classes, get_info_extractor
  42 from .extractor.openload import PhantomJSwrapper
  43 from .minicurses import format_text
  44 from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
  45 from .postprocessor import (
  46     EmbedThumbnailPP,
  47     FFmpegFixupDuplicateMoovPP,
  48     FFmpegFixupDurationPP,
  49     FFmpegFixupM3u8PP,
  50     FFmpegFixupM4aPP,
  51     FFmpegFixupStretchedPP,
  52     FFmpegFixupTimestampPP,
  53     FFmpegMergerPP,
  54     FFmpegPostProcessor,
  55     MoveFilesAfterDownloadPP,
  56     get_postprocessor,
  57 )
  58 from .update import detect_variant
  59 from .utils import (
  60     DEFAULT_OUTTMPL,
  61     LINK_TEMPLATES,
  62     NO_DEFAULT,
  63     NUMBER_RE,
  64     OUTTMPL_TYPES,
  65     POSTPROCESS_WHEN,
  66     STR_FORMAT_RE_TMPL,
  67     STR_FORMAT_TYPES,
  68     ContentTooShortError,
  69     DateRange,
  70     DownloadCancelled,
  71     DownloadError,
  72     EntryNotInPlaylist,
  73     ExistingVideoReached,
  74     ExtractorError,
  75     GeoRestrictedError,
  76     HEADRequest,
  77     ISO3166Utils,
  78     LazyList,
  79     MaxDownloadsReached,
  80     Namespace,
  81     PagedList,
  82     PerRequestProxyHandler,
  83     PlaylistEntries,
  84     Popen,
  85     PostProcessingError,
  86     ReExtractInfo,
  87     RejectedVideoReached,
  88     SameFileError,
  89     UnavailableVideoError,
  90     YoutubeDLCookieProcessor,
  91     YoutubeDLHandler,
  92     YoutubeDLRedirectHandler,
  93     age_restricted,
  94     args_to_str,
  95     date_from_str,
  96     determine_ext,
  97     determine_protocol,
  98     encode_compat_str,
  99     encodeFilename,
 100     error_to_compat_str,
 101     expand_path,
 102     filter_dict,
 103     float_or_none,
 104     format_bytes,
 105     format_decimal_suffix,
 106     format_field,
 107     formatSeconds,
 108     get_domain,
 109     int_or_none,
 110     iri_to_uri,
 111     join_nonempty,
 112     locked_file,
 113     make_dir,
 114     make_HTTPS_handler,
 115     merge_headers,
 116     network_exceptions,
 117     number_of_digits,
 118     orderedSet,
 119     parse_filesize,
 120     platform_name,
 121     preferredencoding,
 122     prepend_extension,
 123     register_socks_protocols,
 124     remove_terminal_sequences,
 125     render_table,
 126     replace_extension,
 127     sanitize_filename,
 128     sanitize_path,
 129     sanitize_url,
 130     sanitized_Request,
 131     std_headers,
 132     str_or_none,
 133     strftime_or_none,
 134     subtitles_filename,
 135     supports_terminal_sequences,
 136     timetuple_from_msec,
 137     to_high_limit_path,
 138     traverse_obj,
 139     try_get,
 140     url_basename,
 141     variadic,
 142     version_tuple,
 143     windows_enable_vt_mode,
 144     write_json_file,
 145     write_string,
 146 )
 147 from .version import RELEASE_GIT_HEAD, __version__
 148
 149 if compat_os_name == 'nt':
 150     import ctypes
 151
 152
 153 class YoutubeDL:
 154     """YoutubeDL class.
 155
 156     YoutubeDL objects are the ones responsible of downloading the
 157     actual video file and writing it to disk if the user has requested
 158     it, among some other tasks. In most cases there should be one per
 159     program. As, given a video URL, the downloader doesn't know how to
 160     extract all the needed information, task that InfoExtractors do, it
 161     has to pass the URL to one of them.
 162
 163     For this, YoutubeDL objects have a method that allows
 164     InfoExtractors to be registered in a given order. When it is passed
 165     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 166     finds that reports being able to handle it. The InfoExtractor extracts
 167     all the information about the video or videos the URL refers to, and
 168     YoutubeDL process the extracted information, possibly using a File
 169     Downloader to download the video.
 170
 171     YoutubeDL objects accept a lot of parameters. In order not to saturate
 172     the object constructor with arguments, it receives a dictionary of
 173     options instead. These options are available through the params
 174     attribute for the InfoExtractors to use. The YoutubeDL also
 175     registers itself as the downloader in charge for the InfoExtractors
 176     that are added to it, so this is a "mutual registration".
 177
 178     Available options:
 179
 180     username:          Username for authentication purposes.
 181     password:          Password for authentication purposes.
 182     videopassword:     Password for accessing a video.
 183     ap_mso:            Adobe Pass multiple-system operator identifier.
 184     ap_username:       Multiple-system operator account username.
 185     ap_password:       Multiple-system operator account password.
 186     usenetrc:          Use netrc for authentication instead.
 187     verbose:           Print additional info to stdout.
 188     quiet:             Do not print messages to stdout.
 189     no_warnings:       Do not print out anything for warnings.
 190     forceprint:        A dict with keys WHEN mapped to a list of templates to
 191                        print to stdout. The allowed keys are video or any of the
 192                        items in utils.POSTPROCESS_WHEN.
 193                        For compatibility, a single list is also accepted
 194     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 195                        a list of tuples with (template, filename)
 196     forcejson:         Force printing info_dict as JSON.
 197     dump_single_json:  Force printing the info_dict of the whole playlist
 198                        (or video) as a single JSON line.
 199     force_write_download_archive: Force writing download archive regardless
 200                        of 'skip_download' or 'simulate'.
 201     simulate:          Do not download the video files. If unset (or None),
 202                        simulate only if listsubtitles, listformats or list_thumbnails is used
 203     format:            Video format code. see "FORMAT SELECTION" for more details.
 204                        You can also pass a function. The function takes 'ctx' as
 205                        argument and returns the formats to download.
 206                        See "build_format_selector" for an implementation
 207     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 208     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 209                        extracting metadata even if the video is not actually
 210                        available for download (experimental)
 211     format_sort:       A list of fields by which to sort the video formats.
 212                        See "Sorting Formats" for more details.
 213     format_sort_force: Force the given format_sort. see "Sorting Formats"
 214                        for more details.
 215     prefer_free_formats: Whether to prefer video formats with free containers
 216                        over non-free ones of same quality.
 217     allow_multiple_video_streams:   Allow multiple video streams to be merged
 218                        into a single file
 219     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 220                        into a single file
 221     check_formats      Whether to test if the formats are downloadable.
 222                        Can be True (check all), False (check none),
 223                        'selected' (check selected formats),
 224                        or None (check only if requested by extractor)
 225     paths:             Dictionary of output paths. The allowed keys are 'home'
 226                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 227     outtmpl:           Dictionary of templates for output names. Allowed keys
 228                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 229                        For compatibility with youtube-dl, a single string can also be used
 230     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 231     restrictfilenames: Do not allow "&" and spaces in file names
 232     trim_file_name:    Limit length of filename (extension excluded)
 233     windowsfilenames:  Force the filenames to be windows compatible
 234     ignoreerrors:      Do not stop on download/postprocessing errors.
 235                        Can be 'only_download' to ignore only download errors.
 236                        Default is 'only_download' for CLI, but False for API
 237     skip_playlist_after_errors: Number of allowed failures until the rest of
 238                        the playlist is skipped
 239     force_generic_extractor: Force downloader to use the generic extractor
 240     overwrites:        Overwrite all video and metadata files if True,
 241                        overwrite only non-video files if None
 242                        and don't overwrite any file if False
 243                        For compatibility with youtube-dl,
 244                        "nooverwrites" may also be used instead
 245     playlist_items:    Specific indices of playlist to download.
 246     playlistrandom:    Download playlist items in random order.
 247     lazy_playlist:     Process playlist entries as they are received.
 248     matchtitle:        Download only matching titles.
 249     rejecttitle:       Reject downloads for matching titles.
 250     logger:            Log messages to a logging.Logger instance.
 251     logtostderr:       Log messages to stderr instead of stdout.
 252     consoletitle:       Display progress in console window's titlebar.
 253     writedescription:  Write the video description to a .description file
 254     writeinfojson:     Write the video description to a .info.json file
 255     clean_infojson:    Remove private fields from the infojson
 256     getcomments:       Extract video comments. This will not be written to disk
 257                        unless writeinfojson is also given
 258     writeannotations:  Write the video annotations to a .annotations.xml file
 259     writethumbnail:    Write the thumbnail image to a file
 260     allow_playlist_files: Whether to write playlists' description, infojson etc
 261                        also to disk when using the 'write*' options
 262     write_all_thumbnails:  Write all thumbnail formats to files
 263     writelink:         Write an internet shortcut file, depending on the
 264                        current platform (.url/.webloc/.desktop)
 265     writeurllink:      Write a Windows internet shortcut file (.url)
 266     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 267     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 268     writesubtitles:    Write the video subtitles to a file
 269     writeautomaticsub: Write the automatically generated subtitles to a file
 270     listsubtitles:     Lists all available subtitles for the video
 271     subtitlesformat:   The format code for subtitles
 272     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 273                        The list may contain "all" to refer to all the available
 274                        subtitles. The language can be prefixed with a "-" to
 275                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 276     keepvideo:         Keep the video file after post-processing
 277     daterange:         A DateRange object, download only if the upload_date is in the range.
 278     skip_download:     Skip the actual download of the video file
 279     cachedir:          Location of the cache files in the filesystem.
 280                        False to disable filesystem cache.
 281     noplaylist:        Download single video instead of a playlist if in doubt.
 282     age_limit:         An integer representing the user's age in years.
 283                        Unsuitable videos for the given age are skipped.
 284     min_views:         An integer representing the minimum view count the video
 285                        must have in order to not be skipped.
 286                        Videos without view count information are always
 287                        downloaded. None for no limit.
 288     max_views:         An integer representing the maximum view count.
 289                        Videos that are more popular than that are not
 290                        downloaded.
 291                        Videos without view count information are always
 292                        downloaded. None for no limit.
 293     download_archive:  File name of a file where all downloads are recorded.
 294                        Videos already present in the file are not downloaded
 295                        again.
 296     break_on_existing: Stop the download process after attempting to download a
 297                        file that is in the archive.
 298     break_on_reject:   Stop the download process when encountering a video that
 299                        has been filtered out.
 300     break_per_url:     Whether break_on_reject and break_on_existing
 301                        should act on each input URL as opposed to for the entire queue
 302     cookiefile:        File name or text stream from where cookies should be read and dumped to
 303     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 304                        name/pathfrom where cookies are loaded, and the name of the
 305                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 306     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 307                        support RFC 5746 secure renegotiation
 308     nocheckcertificate:  Do not verify SSL certificates
 309     client_certificate:  Path to client certificate file in PEM format. May include the private key
 310     client_certificate_key:  Path to private key file for client certificate
 311     client_certificate_password:  Password for client certificate private key, if encrypted.
 312                         If not provided and the key is encrypted, yt-dlp will ask interactively
 313     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 314                        At the moment, this is only supported by YouTube.
 315     http_headers:      A dictionary of custom headers to be used for all requests
 316     proxy:             URL of the proxy server to use
 317     geo_verification_proxy:  URL of the proxy to use for IP address verification
 318                        on geo-restricted sites.
 319     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 320     bidi_workaround:   Work around buggy terminals without bidirectional text
 321                        support, using fridibi
 322     debug_printtraffic:Print out sent and received HTTP traffic
 323     default_search:    Prepend this string if an input url is not valid.
 324                        'auto' for elaborate guessing
 325     encoding:          Use this encoding instead of the system-specified.
 326     extract_flat:      Do not resolve URLs, return the immediate result.
 327                        Pass in 'in_playlist' to only show this behavior for
 328                        playlist items.
 329     wait_for_video:    If given, wait for scheduled streams to become available.
 330                        The value should be a tuple containing the range
 331                        (min_secs, max_secs) to wait between retries
 332     postprocessors:    A list of dictionaries, each with an entry
 333                        * key:  The name of the postprocessor. See
 334                                yt_dlp/postprocessor/__init__.py for a list.
 335                        * when: When to run the postprocessor. Allowed values are
 336                                the entries of utils.POSTPROCESS_WHEN
 337                                Assumed to be 'post_process' if not given
 338     progress_hooks:    A list of functions that get called on download
 339                        progress, with a dictionary with the entries
 340                        * status: One of "downloading", "error", or "finished".
 341                                  Check this first and ignore unknown values.
 342                        * info_dict: The extracted info_dict
 343
 344                        If status is one of "downloading", or "finished", the
 345                        following properties may also be present:
 346                        * filename: The final filename (always present)
 347                        * tmpfilename: The filename we're currently writing to
 348                        * downloaded_bytes: Bytes on disk
 349                        * total_bytes: Size of the whole file, None if unknown
 350                        * total_bytes_estimate: Guess of the eventual file size,
 351                                                None if unavailable.
 352                        * elapsed: The number of seconds since download started.
 353                        * eta: The estimated time in seconds, None if unknown
 354                        * speed: The download speed in bytes/second, None if
 355                                 unknown
 356                        * fragment_index: The counter of the currently
 357                                          downloaded video fragment.
 358                        * fragment_count: The number of fragments (= individual
 359                                          files that will be merged)
 360
 361                        Progress hooks are guaranteed to be called at least once
 362                        (with status "finished") if the download is successful.
 363     postprocessor_hooks:  A list of functions that get called on postprocessing
 364                        progress, with a dictionary with the entries
 365                        * status: One of "started", "processing", or "finished".
 366                                  Check this first and ignore unknown values.
 367                        * postprocessor: Name of the postprocessor
 368                        * info_dict: The extracted info_dict
 369
 370                        Progress hooks are guaranteed to be called at least twice
 371                        (with status "started" and "finished") if the processing is successful.
 372     merge_output_format: Extension to use when merging formats.
 373     final_ext:         Expected final extension; used to detect when the file was
 374                        already downloaded and converted
 375     fixup:             Automatically correct known faults of the file.
 376                        One of:
 377                        - "never": do nothing
 378                        - "warn": only emit a warning
 379                        - "detect_or_warn": check whether we can do anything
 380                                            about it, warn otherwise (default)
 381     source_address:    Client-side IP address to bind to.
 382     sleep_interval_requests: Number of seconds to sleep between requests
 383                        during extraction
 384     sleep_interval:    Number of seconds to sleep before each download when
 385                        used alone or a lower bound of a range for randomized
 386                        sleep before each download (minimum possible number
 387                        of seconds to sleep) when used along with
 388                        max_sleep_interval.
 389     max_sleep_interval:Upper bound of a range for randomized sleep before each
 390                        download (maximum possible number of seconds to sleep).
 391                        Must only be used along with sleep_interval.
 392                        Actual sleep time will be a random float from range
 393                        [sleep_interval; max_sleep_interval].
 394     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 395     listformats:       Print an overview of available video formats and exit.
 396     list_thumbnails:   Print a table of all thumbnails and exit.
 397     match_filter:      A function that gets called for every video with the signature
 398                        (info_dict, *, incomplete: bool) -> Optional[str]
 399                        For backward compatibility with youtube-dl, the signature
 400                        (info_dict) -> Optional[str] is also allowed.
 401                        - If it returns a message, the video is ignored.
 402                        - If it returns None, the video is downloaded.
 403                        - If it returns utils.NO_DEFAULT, the user is interactively
 404                          asked whether to download the video.
 405                        match_filter_func in utils.py is one example for this.
 406     no_color:          Do not emit color codes in output.
 407     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 408                        HTTP header
 409     geo_bypass_country:
 410                        Two-letter ISO 3166-2 country code that will be used for
 411                        explicit geographic restriction bypassing via faking
 412                        X-Forwarded-For HTTP header
 413     geo_bypass_ip_block:
 414                        IP range in CIDR notation that will be used similarly to
 415                        geo_bypass_country
 416     external_downloader: A dictionary of protocol keys and the executable of the
 417                        external downloader to use for it. The allowed protocols
 418                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 419                        Set the value to 'native' to use the native downloader
 420     compat_opts:       Compatibility options. See "Differences in default behavior".
 421                        The following options do not work when used through the API:
 422                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 423                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 424                        Refer __init__.py for their implementation
 425     progress_template: Dictionary of templates for progress outputs.
 426                        Allowed keys are 'download', 'postprocess',
 427                        'download-title' (console title) and 'postprocess-title'.
 428                        The template is mapped on a dictionary with keys 'progress' and 'info'
 429     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 430                        as argument and returns the time to sleep in seconds.
 431                        Allowed keys are 'http', 'fragment', 'file_access'
 432     download_ranges:   A function that gets called for every video with the signature
 433                        (info_dict, *, ydl) -> Iterable[Section].
 434                        Only the returned sections will be downloaded. Each Section contains:
 435                        * start_time: Start time of the section in seconds
 436                        * end_time: End time of the section in seconds
 437                        * title: Section title (Optional)
 438                        * index: Section number (Optional)
 439
 440     The following parameters are not used by YoutubeDL itself, they are used by
 441     the downloader (see yt_dlp/downloader/common.py):
 442     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 443     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 444     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 445     external_downloader_args, concurrent_fragment_downloads.
 446
 447     The following options are used by the post processors:
 448     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 449                        to the binary or its containing directory.
 450     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 451                        and a list of additional command-line arguments for the
 452                        postprocessor/executable. The dict can also have "PP+EXE" keys
 453                        which are used when the given exe is used by the given PP.
 454                        Use 'default' as the name for arguments to passed to all PP
 455                        For compatibility with youtube-dl, a single list of args
 456                        can also be used
 457
 458     The following options are used by the extractors:
 459     extractor_retries: Number of times to retry for known errors
 460     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 461     hls_split_discontinuity: Split HLS playlists to different formats at
 462                        discontinuities such as ad breaks (default: False)
 463     extractor_args:    A dictionary of arguments to be passed to the extractors.
 464                        See "EXTRACTOR ARGUMENTS" for details.
 465                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 466     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 467
 468     The following options are deprecated and may be removed in the future:
 469
 470     playliststart:     - Use playlist_items
 471                        Playlist item to start at.
 472     playlistend:       - Use playlist_items
 473                        Playlist item to end at.
 474     playlistreverse:   - Use playlist_items
 475                        Download playlist items in reverse order.
 476     forceurl:          - Use forceprint
 477                        Force printing final URL.
 478     forcetitle:        - Use forceprint
 479                        Force printing title.
 480     forceid:           - Use forceprint
 481                        Force printing ID.
 482     forcethumbnail:    - Use forceprint
 483                        Force printing thumbnail URL.
 484     forcedescription:  - Use forceprint
 485                        Force printing description.
 486     forcefilename:     - Use forceprint
 487                        Force printing final filename.
 488     forceduration:     - Use forceprint
 489                        Force printing duration.
 490     allsubtitles:      - Use subtitleslangs = ['all']
 491                        Downloads all the subtitles of the video
 492                        (requires writesubtitles or writeautomaticsub)
 493     include_ads:       - Doesn't work
 494                        Download ads as well
 495     call_home:         - Not implemented
 496                        Boolean, true iff we are allowed to contact the
 497                        yt-dlp servers for debugging.
 498     post_hooks:        - Register a custom postprocessor
 499                        A list of functions that get called as the final step
 500                        for each video file, after all postprocessors have been
 501                        called. The filename will be passed as the only argument.
 502     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 503                        Use the native HLS downloader instead of ffmpeg/avconv
 504                        if True, otherwise use ffmpeg/avconv if False, otherwise
 505                        use downloader suggested by extractor if None.
 506     prefer_ffmpeg:     - avconv support is deprecated
 507                        If False, use avconv instead of ffmpeg if both are available,
 508                        otherwise prefer ffmpeg.
 509     youtube_include_dash_manifest: - Use extractor_args
 510                        If True (default), DASH manifests and related
 511                        data will be downloaded and processed by extractor.
 512                        You can reduce network I/O by disabling it if you don't
 513                        care about DASH. (only for youtube)
 514     youtube_include_hls_manifest: - Use extractor_args
 515                        If True (default), HLS manifests and related
 516                        data will be downloaded and processed by extractor.
 517                        You can reduce network I/O by disabling it if you don't
 518                        care about HLS. (only for youtube)
 519     """
 520
 521     _NUMERIC_FIELDS = {
 522         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 523         'timestamp', 'release_timestamp',
 524         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 525         'average_rating', 'comment_count', 'age_limit',
 526         'start_time', 'end_time',
 527         'chapter_number', 'season_number', 'episode_number',
 528         'track_number', 'disc_number', 'release_year',
 529     }
 530
 531     _format_fields = {
 532         # NB: Keep in sync with the docstring of extractor/common.py
 533         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 534         'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
 535         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
 536         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 537         'preference', 'language', 'language_preference', 'quality', 'source_preference',
 538         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
 539         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 540     }
 541     _format_selection_exts = {
 542         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 543         'video': {'mp4', 'flv', 'webm', '3gp'},
 544         'storyboards': {'mhtml'},
 545     }
 546
 547     def __init__(self, params=None, auto_init=True):
 548         """Create a FileDownloader object with the given options.
 549         @param auto_init    Whether to load the default extractors and print header (if verbose).
 550                             Set to 'no_verbose_header' to not print the header
 551         """
 552         if params is None:
 553             params = {}
 554         self.params = params
 555         self._ies = {}
 556         self._ies_instances = {}
 557         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 558         self._printed_messages = set()
 559         self._first_webpage_request = True
 560         self._post_hooks = []
 561         self._progress_hooks = []
 562         self._postprocessor_hooks = []
 563         self._download_retcode = 0
 564         self._num_downloads = 0
 565         self._num_videos = 0
 566         self._playlist_level = 0
 567         self._playlist_urls = set()
 568         self.cache = Cache(self)
 569
 570         windows_enable_vt_mode()
 571         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 572         self._out_files = Namespace(
 573             out=stdout,
 574             error=sys.stderr,
 575             screen=sys.stderr if self.params.get('quiet') else stdout,
 576             console=None if compat_os_name == 'nt' else next(
 577                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 578         )
 579         self._allow_colors = Namespace(**{
 580             type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
 581             for type_, stream in self._out_files.items_ if type_ != 'console'
 582         })
 583
 584         if sys.version_info < (3, 6):
 585             self.report_warning(
 586                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 587
 588         if self.params.get('allow_unplayable_formats'):
 589             self.report_warning(
 590                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 591                 'This is a developer option intended for debugging. \n'
 592                 '         If you experience any issues while using this option, '
 593                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 594
 595         def check_deprecated(param, option, suggestion):
 596             if self.params.get(param) is not None:
 597                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 598                 return True
 599             return False
 600
 601         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 602             if self.params.get('geo_verification_proxy') is None:
 603                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 604
 605         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 606         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 607         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 608
 609         for msg in self.params.get('_warnings', []):
 610             self.report_warning(msg)
 611         for msg in self.params.get('_deprecation_warnings', []):
 612             self.deprecation_warning(msg)
 613
 614         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 615         if not compat_has_legacy:
 616             self.params['compat_opts'].add('no-compat-legacy')
 617         if 'list-formats' in self.params['compat_opts']:
 618             self.params['listformats_table'] = False
 619
 620         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 621             # nooverwrites was unnecessarily changed to overwrites
 622             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 623             # This ensures compatibility with both keys
 624             self.params['overwrites'] = not self.params['nooverwrites']
 625         elif self.params.get('overwrites') is None:
 626             self.params.pop('overwrites', None)
 627         else:
 628             self.params['nooverwrites'] = not self.params['overwrites']
 629
 630         self.params.setdefault('forceprint', {})
 631         self.params.setdefault('print_to_file', {})
 632
 633         # Compatibility with older syntax
 634         if not isinstance(params['forceprint'], dict):
 635             self.params['forceprint'] = {'video': params['forceprint']}
 636
 637         if self.params.get('bidi_workaround', False):
 638             try:
 639                 import pty
 640                 master, slave = pty.openpty()
 641                 width = compat_get_terminal_size().columns
 642                 width_args = [] if width is None else ['-w', str(width)]
 643                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 644                 try:
 645                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 646                 except OSError:
 647                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 648                 self._output_channel = os.fdopen(master, 'rb')
 649             except OSError as ose:
 650                 if ose.errno == errno.ENOENT:
 651                     self.report_warning(
 652                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 653                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 654                 else:
 655                     raise
 656
 657         if auto_init:
 658             if auto_init != 'no_verbose_header':
 659                 self.print_debug_header()
 660             self.add_default_info_extractors()
 661
 662         if (sys.platform != 'win32'
 663                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 664                 and not self.params.get('restrictfilenames', False)):
 665             # Unicode filesystem API will throw errors (#1474, #13027)
 666             self.report_warning(
 667                 'Assuming --restrict-filenames since file system encoding '
 668                 'cannot encode all characters. '
 669                 'Set the LC_ALL environment variable to fix this.')
 670             self.params['restrictfilenames'] = True
 671
 672         self._parse_outtmpl()
 673
 674         # Creating format selector here allows us to catch syntax errors before the extraction
 675         self.format_selector = (
 676             self.params.get('format') if self.params.get('format') in (None, '-')
 677             else self.params['format'] if callable(self.params['format'])
 678             else self.build_format_selector(self.params['format']))
 679
 680         # Set http_headers defaults according to std_headers
 681         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 682
 683         hooks = {
 684             'post_hooks': self.add_post_hook,
 685             'progress_hooks': self.add_progress_hook,
 686             'postprocessor_hooks': self.add_postprocessor_hook,
 687         }
 688         for opt, fn in hooks.items():
 689             for ph in self.params.get(opt, []):
 690                 fn(ph)
 691
 692         for pp_def_raw in self.params.get('postprocessors', []):
 693             pp_def = dict(pp_def_raw)
 694             when = pp_def.pop('when', 'post_process')
 695             self.add_post_processor(
 696                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 697                 when=when)
 698
 699         self._setup_opener()
 700         register_socks_protocols()
 701
 702         def preload_download_archive(fn):
 703             """Preload the archive, if any is specified"""
 704             if fn is None:
 705                 return False
 706             self.write_debug(f'Loading archive file {fn!r}')
 707             try:
 708                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 709                     for line in archive_file:
 710                         self.archive.add(line.strip())
 711             except OSError as ioe:
 712                 if ioe.errno != errno.ENOENT:
 713                     raise
 714                 return False
 715             return True
 716
 717         self.archive = set()
 718         preload_download_archive(self.params.get('download_archive'))
 719
 720     def warn_if_short_id(self, argv):
 721         # short YouTube ID starting with dash?
 722         idxs = [
 723             i for i, a in enumerate(argv)
 724             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 725         if idxs:
 726             correct_argv = (
 727                 ['yt-dlp']
 728                 + [a for i, a in enumerate(argv) if i not in idxs]
 729                 + ['--'] + [argv[i] for i in idxs]
 730             )
 731             self.report_warning(
 732                 'Long argument string detected. '
 733                 'Use -- to separate parameters and URLs, like this:\n%s' %
 734                 args_to_str(correct_argv))
 735
 736     def add_info_extractor(self, ie):
 737         """Add an InfoExtractor object to the end of the list."""
 738         ie_key = ie.ie_key()
 739         self._ies[ie_key] = ie
 740         if not isinstance(ie, type):
 741             self._ies_instances[ie_key] = ie
 742             ie.set_downloader(self)
 743
 744     def _get_info_extractor_class(self, ie_key):
 745         ie = self._ies.get(ie_key)
 746         if ie is None:
 747             ie = get_info_extractor(ie_key)
 748             self.add_info_extractor(ie)
 749         return ie
 750
 751     def get_info_extractor(self, ie_key):
 752         """
 753         Get an instance of an IE with name ie_key, it will try to get one from
 754         the _ies list, if there's no instance it will create a new one and add
 755         it to the extractor list.
 756         """
 757         ie = self._ies_instances.get(ie_key)
 758         if ie is None:
 759             ie = get_info_extractor(ie_key)()
 760             self.add_info_extractor(ie)
 761         return ie
 762
 763     def add_default_info_extractors(self):
 764         """
 765         Add the InfoExtractors returned by gen_extractors to the end of the list
 766         """
 767         for ie in gen_extractor_classes():
 768             self.add_info_extractor(ie)
 769
 770     def add_post_processor(self, pp, when='post_process'):
 771         """Add a PostProcessor object to the end of the chain."""
 772         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 773         self._pps[when].append(pp)
 774         pp.set_downloader(self)
 775
 776     def add_post_hook(self, ph):
 777         """Add the post hook"""
 778         self._post_hooks.append(ph)
 779
 780     def add_progress_hook(self, ph):
 781         """Add the download progress hook"""
 782         self._progress_hooks.append(ph)
 783
 784     def add_postprocessor_hook(self, ph):
 785         """Add the postprocessing progress hook"""
 786         self._postprocessor_hooks.append(ph)
 787         for pps in self._pps.values():
 788             for pp in pps:
 789                 pp.add_progress_hook(ph)
 790
 791     def _bidi_workaround(self, message):
 792         if not hasattr(self, '_output_channel'):
 793             return message
 794
 795         assert hasattr(self, '_output_process')
 796         assert isinstance(message, compat_str)
 797         line_count = message.count('\n') + 1
 798         self._output_process.stdin.write((message + '\n').encode())
 799         self._output_process.stdin.flush()
 800         res = ''.join(self._output_channel.readline().decode()
 801                       for _ in range(line_count))
 802         return res[:-len('\n')]
 803
 804     def _write_string(self, message, out=None, only_once=False):
 805         if only_once:
 806             if message in self._printed_messages:
 807                 return
 808             self._printed_messages.add(message)
 809         write_string(message, out=out, encoding=self.params.get('encoding'))
 810
 811     def to_stdout(self, message, skip_eol=False, quiet=None):
 812         """Print message to stdout"""
 813         if quiet is not None:
 814             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
 815         if skip_eol is not False:
 816             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
 817         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 818
 819     def to_screen(self, message, skip_eol=False, quiet=None):
 820         """Print message to screen if not in quiet mode"""
 821         if self.params.get('logger'):
 822             self.params['logger'].debug(message)
 823             return
 824         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 825             return
 826         self._write_string(
 827             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 828             self._out_files.screen)
 829
 830     def to_stderr(self, message, only_once=False):
 831         """Print message to stderr"""
 832         assert isinstance(message, compat_str)
 833         if self.params.get('logger'):
 834             self.params['logger'].error(message)
 835         else:
 836             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 837
 838     def _send_console_code(self, code):
 839         if compat_os_name == 'nt' or not self._out_files.console:
 840             return
 841         self._write_string(code, self._out_files.console)
 842
 843     def to_console_title(self, message):
 844         if not self.params.get('consoletitle', False):
 845             return
 846         message = remove_terminal_sequences(message)
 847         if compat_os_name == 'nt':
 848             if ctypes.windll.kernel32.GetConsoleWindow():
 849                 # c_wchar_p() might not be necessary if `message` is
 850                 # already of type unicode()
 851                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 852         else:
 853             self._send_console_code(f'\033]0;{message}\007')
 854
 855     def save_console_title(self):
 856         if not self.params.get('consoletitle') or self.params.get('simulate'):
 857             return
 858         self._send_console_code('\033[22;0t')  # Save the title on stack
 859
 860     def restore_console_title(self):
 861         if not self.params.get('consoletitle') or self.params.get('simulate'):
 862             return
 863         self._send_console_code('\033[23;0t')  # Restore the title from stack
 864
 865     def __enter__(self):
 866         self.save_console_title()
 867         return self
 868
 869     def __exit__(self, *args):
 870         self.restore_console_title()
 871
 872         if self.params.get('cookiefile') is not None:
 873             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 874
 875     def trouble(self, message=None, tb=None, is_error=True):
 876         """Determine action to take when a download problem appears.
 877
 878         Depending on if the downloader has been configured to ignore
 879         download errors or not, this method may throw an exception or
 880         not when errors are found, after printing the message.
 881
 882         @param tb          If given, is additional traceback information
 883         @param is_error    Whether to raise error according to ignorerrors
 884         """
 885         if message is not None:
 886             self.to_stderr(message)
 887         if self.params.get('verbose'):
 888             if tb is None:
 889                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 890                     tb = ''
 891                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 892                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 893                     tb += encode_compat_str(traceback.format_exc())
 894                 else:
 895                     tb_data = traceback.format_list(traceback.extract_stack())
 896                     tb = ''.join(tb_data)
 897             if tb:
 898                 self.to_stderr(tb)
 899         if not is_error:
 900             return
 901         if not self.params.get('ignoreerrors'):
 902             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 903                 exc_info = sys.exc_info()[1].exc_info
 904             else:
 905                 exc_info = sys.exc_info()
 906             raise DownloadError(message, exc_info)
 907         self._download_retcode = 1
 908
 909     Styles = Namespace(
 910         HEADERS='yellow',
 911         EMPHASIS='light blue',
 912         FILENAME='green',
 913         ID='green',
 914         DELIM='blue',
 915         ERROR='red',
 916         WARNING='yellow',
 917         SUPPRESS='light black',
 918     )
 919
 920     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 921         text = str(text)
 922         if test_encoding:
 923             original_text = text
 924             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 925             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 926             text = text.encode(encoding, 'ignore').decode(encoding)
 927             if fallback is not None and text != original_text:
 928                 text = fallback
 929         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 930
 931     def _format_out(self, *args, **kwargs):
 932         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
 933
 934     def _format_screen(self, *args, **kwargs):
 935         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
 936
 937     def _format_err(self, *args, **kwargs):
 938         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
 939
 940     def report_warning(self, message, only_once=False):
 941         '''
 942         Print the message to stderr, it will be prefixed with 'WARNING:'
 943         If stderr is a tty file the 'WARNING:' will be colored
 944         '''
 945         if self.params.get('logger') is not None:
 946             self.params['logger'].warning(message)
 947         else:
 948             if self.params.get('no_warnings'):
 949                 return
 950             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 951
 952     def deprecation_warning(self, message):
 953         if self.params.get('logger') is not None:
 954             self.params['logger'].warning(f'DeprecationWarning: {message}')
 955         else:
 956             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 957
 958     def report_error(self, message, *args, **kwargs):
 959         '''
 960         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 961         in red if stderr is a tty file.
 962         '''
 963         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 964
 965     def write_debug(self, message, only_once=False):
 966         '''Log debug message or Print message to stderr'''
 967         if not self.params.get('verbose', False):
 968             return
 969         message = f'[debug] {message}'
 970         if self.params.get('logger'):
 971             self.params['logger'].debug(message)
 972         else:
 973             self.to_stderr(message, only_once)
 974
 975     def report_file_already_downloaded(self, file_name):
 976         """Report file has already been fully downloaded."""
 977         try:
 978             self.to_screen('[download] %s has already been downloaded' % file_name)
 979         except UnicodeEncodeError:
 980             self.to_screen('[download] The file has already been downloaded')
 981
 982     def report_file_delete(self, file_name):
 983         """Report that existing file will be deleted."""
 984         try:
 985             self.to_screen('Deleting existing file %s' % file_name)
 986         except UnicodeEncodeError:
 987             self.to_screen('Deleting existing file')
 988
 989     def raise_no_formats(self, info, forced=False, *, msg=None):
 990         has_drm = info.get('_has_drm')
 991         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
 992         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
 993         if forced or not ignored:
 994             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 995                                  expected=has_drm or ignored or expected)
 996         else:
 997             self.report_warning(msg)
 998
 999     def parse_outtmpl(self):
1000         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1001         self._parse_outtmpl()
1002         return self.params['outtmpl']
1003
1004     def _parse_outtmpl(self):
1005         sanitize = lambda x: x
1006         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1007             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1008
1009         outtmpl = self.params.setdefault('outtmpl', {})
1010         if not isinstance(outtmpl, dict):
1011             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1012         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1013
1014     def get_output_path(self, dir_type='', filename=None):
1015         paths = self.params.get('paths', {})
1016         assert isinstance(paths, dict)
1017         path = os.path.join(
1018             expand_path(paths.get('home', '').strip()),
1019             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1020             filename or '')
1021         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1022
1023     @staticmethod
1024     def _outtmpl_expandpath(outtmpl):
1025         # expand_path translates '%%' into '%' and '$$' into '$'
1026         # correspondingly that is not what we want since we need to keep
1027         # '%%' intact for template dict substitution step. Working around
1028         # with boundary-alike separator hack.
1029         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1030         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1031
1032         # outtmpl should be expand_path'ed before template dict substitution
1033         # because meta fields may contain env variables we don't want to
1034         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1035         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1036         return expand_path(outtmpl).replace(sep, '')
1037
1038     @staticmethod
1039     def escape_outtmpl(outtmpl):
1040         ''' Escape any remaining strings like %s, %abc% etc. '''
1041         return re.sub(
1042             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1043             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1044             outtmpl)
1045
1046     @classmethod
1047     def validate_outtmpl(cls, outtmpl):
1048         ''' @return None or Exception object '''
1049         outtmpl = re.sub(
1050             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1051             lambda mobj: f'{mobj.group(0)[:-1]}s',
1052             cls._outtmpl_expandpath(outtmpl))
1053         try:
1054             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1055             return None
1056         except ValueError as err:
1057             return err
1058
1059     @staticmethod
1060     def _copy_infodict(info_dict):
1061         info_dict = dict(info_dict)
1062         info_dict.pop('__postprocessors', None)
1063         info_dict.pop('__pending_error', None)
1064         return info_dict
1065
1066     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1067         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1068         @param sanitize    Whether to sanitize the output as a filename.
1069                            For backward compatibility, a function can also be passed
1070         """
1071
1072         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1073
1074         info_dict = self._copy_infodict(info_dict)
1075         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1076             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1077             if info_dict.get('duration', None) is not None
1078             else None)
1079         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1080         info_dict['video_autonumber'] = self._num_videos
1081         if info_dict.get('resolution') is None:
1082             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1083
1084         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1085         # of %(field)s to %(field)0Nd for backward compatibility
1086         field_size_compat_map = {
1087             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1088             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1089             'autonumber': self.params.get('autonumber_size') or 5,
1090         }
1091
1092         TMPL_DICT = {}
1093         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1094         MATH_FUNCTIONS = {
1095             '+': float.__add__,
1096             '-': float.__sub__,
1097         }
1098         # Field is of the form key1.key2...
1099         # where keys (except first) can be string, int or slice
1100         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1101         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1102         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1103         INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
1104             (?P<negate>-)?
1105             (?P<fields>{FIELD_RE})
1106             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1107             (?:>(?P<strf_format>.+?))?
1108             (?P<remaining>
1109                 (?P<alternate>(?<!\\),[^|&)]+)?
1110                 (?:&(?P<replacement>.*?))?
1111                 (?:\|(?P<default>.*?))?
1112             )$''')
1113
1114         def _traverse_infodict(k):
1115             k = k.split('.')
1116             if k[0] == '':
1117                 k.pop(0)
1118             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1119
1120         def get_value(mdict):
1121             # Object traversal
1122             value = _traverse_infodict(mdict['fields'])
1123             # Negative
1124             if mdict['negate']:
1125                 value = float_or_none(value)
1126                 if value is not None:
1127                     value *= -1
1128             # Do maths
1129             offset_key = mdict['maths']
1130             if offset_key:
1131                 value = float_or_none(value)
1132                 operator = None
1133                 while offset_key:
1134                     item = re.match(
1135                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1136                         offset_key).group(0)
1137                     offset_key = offset_key[len(item):]
1138                     if operator is None:
1139                         operator = MATH_FUNCTIONS[item]
1140                         continue
1141                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1142                     offset = float_or_none(item)
1143                     if offset is None:
1144                         offset = float_or_none(_traverse_infodict(item))
1145                     try:
1146                         value = operator(value, multiplier * offset)
1147                     except (TypeError, ZeroDivisionError):
1148                         return None
1149                     operator = None
1150             # Datetime formatting
1151             if mdict['strf_format']:
1152                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1153
1154             return value
1155
1156         na = self.params.get('outtmpl_na_placeholder', 'NA')
1157
1158         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1159             return sanitize_filename(str(value), restricted=restricted, is_id=(
1160                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1161                 if 'filename-sanitization' in self.params['compat_opts']
1162                 else NO_DEFAULT))
1163
1164         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1165         sanitize = bool(sanitize)
1166
1167         def _dumpjson_default(obj):
1168             if isinstance(obj, (set, LazyList)):
1169                 return list(obj)
1170             return repr(obj)
1171
1172         def create_key(outer_mobj):
1173             if not outer_mobj.group('has_key'):
1174                 return outer_mobj.group(0)
1175             key = outer_mobj.group('key')
1176             mobj = re.match(INTERNAL_FORMAT_RE, key)
1177             initial_field = mobj.group('fields') if mobj else ''
1178             value, replacement, default = None, None, na
1179             while mobj:
1180                 mobj = mobj.groupdict()
1181                 default = mobj['default'] if mobj['default'] is not None else default
1182                 value = get_value(mobj)
1183                 replacement = mobj['replacement']
1184                 if value is None and mobj['alternate']:
1185                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1186                 else:
1187                     break
1188
1189             fmt = outer_mobj.group('format')
1190             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1191                 fmt = f'0{field_size_compat_map[key]:d}d'
1192
1193             value = default if value is None else value if replacement is None else replacement
1194
1195             flags = outer_mobj.group('conversion') or ''
1196             str_fmt = f'{fmt[:-1]}s'
1197             if fmt[-1] == 'l':  # list
1198                 delim = '\n' if '#' in flags else ', '
1199                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1200             elif fmt[-1] == 'j':  # json
1201                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1202             elif fmt[-1] == 'q':  # quoted
1203                 value = map(str, variadic(value) if '#' in flags else [value])
1204                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1205             elif fmt[-1] == 'B':  # bytes
1206                 value = f'%{str_fmt}'.encode() % str(value).encode()
1207                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1208             elif fmt[-1] == 'U':  # unicode normalized
1209                 value, fmt = unicodedata.normalize(
1210                     # "+" = compatibility equivalence, "#" = NFD
1211                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1212                     value), str_fmt
1213             elif fmt[-1] == 'D':  # decimal suffix
1214                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1215                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1216                                               factor=1024 if '#' in flags else 1000)
1217             elif fmt[-1] == 'S':  # filename sanitization
1218                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1219             elif fmt[-1] == 'c':
1220                 if value:
1221                     value = str(value)[0]
1222                 else:
1223                     fmt = str_fmt
1224             elif fmt[-1] not in 'rs':  # numeric
1225                 value = float_or_none(value)
1226                 if value is None:
1227                     value, fmt = default, 's'
1228
1229             if sanitize:
1230                 if fmt[-1] == 'r':
1231                     # If value is an object, sanitize might convert it to a string
1232                     # So we convert it to repr first
1233                     value, fmt = repr(value), str_fmt
1234                 if fmt[-1] in 'csr':
1235                     value = sanitizer(initial_field, value)
1236
1237             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1238             TMPL_DICT[key] = value
1239             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1240
1241         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1242
1243     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1244         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1245         return self.escape_outtmpl(outtmpl) % info_dict
1246
1247     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1248         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1249         if outtmpl is None:
1250             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1251         try:
1252             outtmpl = self._outtmpl_expandpath(outtmpl)
1253             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1254             if not filename:
1255                 return None
1256
1257             if tmpl_type in ('', 'temp'):
1258                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1259                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1260                     filename = replace_extension(filename, ext, final_ext)
1261             elif tmpl_type:
1262                 force_ext = OUTTMPL_TYPES[tmpl_type]
1263                 if force_ext:
1264                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1265
1266             # https://github.com/blackjack4494/youtube-dlc/issues/85
1267             trim_file_name = self.params.get('trim_file_name', False)
1268             if trim_file_name:
1269                 no_ext, *ext = filename.rsplit('.', 2)
1270                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1271
1272             return filename
1273         except ValueError as err:
1274             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1275             return None
1276
1277     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1278         """Generate the output filename"""
1279         if outtmpl:
1280             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1281             dir_type = None
1282         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1283         if not filename and dir_type not in ('', 'temp'):
1284             return ''
1285
1286         if warn:
1287             if not self.params.get('paths'):
1288                 pass
1289             elif filename == '-':
1290                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1291             elif os.path.isabs(filename):
1292                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1293         if filename == '-' or not filename:
1294             return filename
1295
1296         return self.get_output_path(dir_type, filename)
1297
1298     def _match_entry(self, info_dict, incomplete=False, silent=False):
1299         """ Returns None if the file should be downloaded """
1300
1301         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1302
1303         def check_filter():
1304             if 'title' in info_dict:
1305                 # This can happen when we're just evaluating the playlist
1306                 title = info_dict['title']
1307                 matchtitle = self.params.get('matchtitle', False)
1308                 if matchtitle:
1309                     if not re.search(matchtitle, title, re.IGNORECASE):
1310                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1311                 rejecttitle = self.params.get('rejecttitle', False)
1312                 if rejecttitle:
1313                     if re.search(rejecttitle, title, re.IGNORECASE):
1314                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1315             date = info_dict.get('upload_date')
1316             if date is not None:
1317                 dateRange = self.params.get('daterange', DateRange())
1318                 if date not in dateRange:
1319                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1320             view_count = info_dict.get('view_count')
1321             if view_count is not None:
1322                 min_views = self.params.get('min_views')
1323                 if min_views is not None and view_count < min_views:
1324                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1325                 max_views = self.params.get('max_views')
1326                 if max_views is not None and view_count > max_views:
1327                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1328             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1329                 return 'Skipping "%s" because it is age restricted' % video_title
1330
1331             match_filter = self.params.get('match_filter')
1332             if match_filter is not None:
1333                 try:
1334                     ret = match_filter(info_dict, incomplete=incomplete)
1335                 except TypeError:
1336                     # For backward compatibility
1337                     ret = None if incomplete else match_filter(info_dict)
1338                 if ret is NO_DEFAULT:
1339                     while True:
1340                         filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1341                         reply = input(self._format_screen(
1342                             f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1343                         if reply in {'y', ''}:
1344                             return None
1345                         elif reply == 'n':
1346                             return f'Skipping {video_title}'
1347                 elif ret is not None:
1348                     return ret
1349             return None
1350
1351         if self.in_download_archive(info_dict):
1352             reason = '%s has already been recorded in the archive' % video_title
1353             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1354         else:
1355             reason = check_filter()
1356             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1357         if reason is not None:
1358             if not silent:
1359                 self.to_screen('[download] ' + reason)
1360             if self.params.get(break_opt, False):
1361                 raise break_err()
1362         return reason
1363
1364     @staticmethod
1365     def add_extra_info(info_dict, extra_info):
1366         '''Set the keys from extra_info in info dict if they are missing'''
1367         for key, value in extra_info.items():
1368             info_dict.setdefault(key, value)
1369
1370     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1371                      process=True, force_generic_extractor=False):
1372         """
1373         Return a list with a dictionary for each video extracted.
1374
1375         Arguments:
1376         url -- URL to extract
1377
1378         Keyword arguments:
1379         download -- whether to download videos during extraction
1380         ie_key -- extractor key hint
1381         extra_info -- dictionary containing the extra values to add to each result
1382         process -- whether to resolve all unresolved references (URLs, playlist items),
1383             must be True for download to work.
1384         force_generic_extractor -- force using the generic extractor
1385         """
1386
1387         if extra_info is None:
1388             extra_info = {}
1389
1390         if not ie_key and force_generic_extractor:
1391             ie_key = 'Generic'
1392
1393         if ie_key:
1394             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1395         else:
1396             ies = self._ies
1397
1398         for ie_key, ie in ies.items():
1399             if not ie.suitable(url):
1400                 continue
1401
1402             if not ie.working():
1403                 self.report_warning('The program functionality for this site has been marked as broken, '
1404                                     'and will probably not work.')
1405
1406             temp_id = ie.get_temp_id(url)
1407             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1408                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1409                 if self.params.get('break_on_existing', False):
1410                     raise ExistingVideoReached()
1411                 break
1412             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1413         else:
1414             self.report_error('no suitable InfoExtractor for URL %s' % url)
1415
1416     def _handle_extraction_exceptions(func):
1417         @functools.wraps(func)
1418         def wrapper(self, *args, **kwargs):
1419             while True:
1420                 try:
1421                     return func(self, *args, **kwargs)
1422                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1423                     raise
1424                 except ReExtractInfo as e:
1425                     if e.expected:
1426                         self.to_screen(f'{e}; Re-extracting data')
1427                     else:
1428                         self.to_stderr('\r')
1429                         self.report_warning(f'{e}; Re-extracting data')
1430                     continue
1431                 except GeoRestrictedError as e:
1432                     msg = e.msg
1433                     if e.countries:
1434                         msg += '\nThis video is available in %s.' % ', '.join(
1435                             map(ISO3166Utils.short2full, e.countries))
1436                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1437                     self.report_error(msg)
1438                 except ExtractorError as e:  # An error we somewhat expected
1439                     self.report_error(str(e), e.format_traceback())
1440                 except Exception as e:
1441                     if self.params.get('ignoreerrors'):
1442                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1443                     else:
1444                         raise
1445                 break
1446         return wrapper
1447
1448     def _wait_for_video(self, ie_result):
1449         if (not self.params.get('wait_for_video')
1450                 or ie_result.get('_type', 'video') != 'video'
1451                 or ie_result.get('formats') or ie_result.get('url')):
1452             return
1453
1454         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1455         last_msg = ''
1456
1457         def progress(msg):
1458             nonlocal last_msg
1459             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1460             last_msg = msg
1461
1462         min_wait, max_wait = self.params.get('wait_for_video')
1463         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1464         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1465             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1466             self.report_warning('Release time of video is not known')
1467         elif (diff or 0) <= 0:
1468             self.report_warning('Video should already be available according to extracted info')
1469         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1470         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1471
1472         wait_till = time.time() + diff
1473         try:
1474             while True:
1475                 diff = wait_till - time.time()
1476                 if diff <= 0:
1477                     progress('')
1478                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1479                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1480                 time.sleep(1)
1481         except KeyboardInterrupt:
1482             progress('')
1483             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1484         except BaseException as e:
1485             if not isinstance(e, ReExtractInfo):
1486                 self.to_screen('')
1487             raise
1488
1489     @_handle_extraction_exceptions
1490     def __extract_info(self, url, ie, download, extra_info, process):
1491         ie_result = ie.extract(url)
1492         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1493             return
1494         if isinstance(ie_result, list):
1495             # Backwards compatibility: old IE result format
1496             ie_result = {
1497                 '_type': 'compat_list',
1498                 'entries': ie_result,
1499             }
1500         if extra_info.get('original_url'):
1501             ie_result.setdefault('original_url', extra_info['original_url'])
1502         self.add_default_extra_info(ie_result, ie, url)
1503         if process:
1504             self._wait_for_video(ie_result)
1505             return self.process_ie_result(ie_result, download, extra_info)
1506         else:
1507             return ie_result
1508
1509     def add_default_extra_info(self, ie_result, ie, url):
1510         if url is not None:
1511             self.add_extra_info(ie_result, {
1512                 'webpage_url': url,
1513                 'original_url': url,
1514             })
1515         webpage_url = ie_result.get('webpage_url')
1516         if webpage_url:
1517             self.add_extra_info(ie_result, {
1518                 'webpage_url_basename': url_basename(webpage_url),
1519                 'webpage_url_domain': get_domain(webpage_url),
1520             })
1521         if ie is not None:
1522             self.add_extra_info(ie_result, {
1523                 'extractor': ie.IE_NAME,
1524                 'extractor_key': ie.ie_key(),
1525             })
1526
1527     def process_ie_result(self, ie_result, download=True, extra_info=None):
1528         """
1529         Take the result of the ie(may be modified) and resolve all unresolved
1530         references (URLs, playlist items).
1531
1532         It will also download the videos if 'download'.
1533         Returns the resolved ie_result.
1534         """
1535         if extra_info is None:
1536             extra_info = {}
1537         result_type = ie_result.get('_type', 'video')
1538
1539         if result_type in ('url', 'url_transparent'):
1540             ie_result['url'] = sanitize_url(ie_result['url'])
1541             if ie_result.get('original_url'):
1542                 extra_info.setdefault('original_url', ie_result['original_url'])
1543
1544             extract_flat = self.params.get('extract_flat', False)
1545             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1546                     or extract_flat is True):
1547                 info_copy = ie_result.copy()
1548                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1549                 if ie and not ie_result.get('id'):
1550                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1551                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1552                 self.add_extra_info(info_copy, extra_info)
1553                 info_copy, _ = self.pre_process(info_copy)
1554                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1555                 self._raise_pending_errors(info_copy)
1556                 if self.params.get('force_write_download_archive', False):
1557                     self.record_download_archive(info_copy)
1558                 return ie_result
1559
1560         if result_type == 'video':
1561             self.add_extra_info(ie_result, extra_info)
1562             ie_result = self.process_video_result(ie_result, download=download)
1563             self._raise_pending_errors(ie_result)
1564             additional_urls = (ie_result or {}).get('additional_urls')
1565             if additional_urls:
1566                 # TODO: Improve MetadataParserPP to allow setting a list
1567                 if isinstance(additional_urls, compat_str):
1568                     additional_urls = [additional_urls]
1569                 self.to_screen(
1570                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1571                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1572                 ie_result['additional_entries'] = [
1573                     self.extract_info(
1574                         url, download, extra_info=extra_info,
1575                         force_generic_extractor=self.params.get('force_generic_extractor'))
1576                     for url in additional_urls
1577                 ]
1578             return ie_result
1579         elif result_type == 'url':
1580             # We have to add extra_info to the results because it may be
1581             # contained in a playlist
1582             return self.extract_info(
1583                 ie_result['url'], download,
1584                 ie_key=ie_result.get('ie_key'),
1585                 extra_info=extra_info)
1586         elif result_type == 'url_transparent':
1587             # Use the information from the embedding page
1588             info = self.extract_info(
1589                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1590                 extra_info=extra_info, download=False, process=False)
1591
1592             # extract_info may return None when ignoreerrors is enabled and
1593             # extraction failed with an error, don't crash and return early
1594             # in this case
1595             if not info:
1596                 return info
1597
1598             new_result = info.copy()
1599             new_result.update(filter_dict(ie_result, lambda k, v: (
1600                 v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
1601
1602             # Extracted info may not be a video result (i.e.
1603             # info.get('_type', 'video') != video) but rather an url or
1604             # url_transparent. In such cases outer metadata (from ie_result)
1605             # should be propagated to inner one (info). For this to happen
1606             # _type of info should be overridden with url_transparent. This
1607             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1608             if new_result.get('_type') == 'url':
1609                 new_result['_type'] = 'url_transparent'
1610
1611             return self.process_ie_result(
1612                 new_result, download=download, extra_info=extra_info)
1613         elif result_type in ('playlist', 'multi_video'):
1614             # Protect from infinite recursion due to recursively nested playlists
1615             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1616             webpage_url = ie_result['webpage_url']
1617             if webpage_url in self._playlist_urls:
1618                 self.to_screen(
1619                     '[download] Skipping already downloaded playlist: %s'
1620                     % ie_result.get('title') or ie_result.get('id'))
1621                 return
1622
1623             self._playlist_level += 1
1624             self._playlist_urls.add(webpage_url)
1625             self._fill_common_fields(ie_result, False)
1626             self._sanitize_thumbnails(ie_result)
1627             try:
1628                 return self.__process_playlist(ie_result, download)
1629             finally:
1630                 self._playlist_level -= 1
1631                 if not self._playlist_level:
1632                     self._playlist_urls.clear()
1633         elif result_type == 'compat_list':
1634             self.report_warning(
1635                 'Extractor %s returned a compat_list result. '
1636                 'It needs to be updated.' % ie_result.get('extractor'))
1637
1638             def _fixup(r):
1639                 self.add_extra_info(r, {
1640                     'extractor': ie_result['extractor'],
1641                     'webpage_url': ie_result['webpage_url'],
1642                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1643                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1644                     'extractor_key': ie_result['extractor_key'],
1645                 })
1646                 return r
1647             ie_result['entries'] = [
1648                 self.process_ie_result(_fixup(r), download, extra_info)
1649                 for r in ie_result['entries']
1650             ]
1651             return ie_result
1652         else:
1653             raise Exception('Invalid result type: %s' % result_type)
1654
1655     def _ensure_dir_exists(self, path):
1656         return make_dir(path, self.report_error)
1657
1658     @staticmethod
1659     def _playlist_infodict(ie_result, **kwargs):
1660         return {
1661             **ie_result,
1662             'playlist': ie_result.get('title') or ie_result.get('id'),
1663             'playlist_id': ie_result.get('id'),
1664             'playlist_title': ie_result.get('title'),
1665             'playlist_uploader': ie_result.get('uploader'),
1666             'playlist_uploader_id': ie_result.get('uploader_id'),
1667             'playlist_index': 0,
1668             **kwargs,
1669         }
1670
1671     def __process_playlist(self, ie_result, download):
1672         """Process each entry in the playlist"""
1673         title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
1674         self.to_screen(f'[download] Downloading playlist: {title}')
1675
1676         all_entries = PlaylistEntries(self, ie_result)
1677         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1678
1679         lazy = self.params.get('lazy_playlist')
1680         if lazy:
1681             resolved_entries, n_entries = [], 'N/A'
1682             ie_result['requested_entries'], ie_result['entries'] = None, None
1683         else:
1684             entries = resolved_entries = list(entries)
1685             n_entries = len(resolved_entries)
1686             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1687         if not ie_result.get('playlist_count'):
1688             # Better to do this after potentially exhausting entries
1689             ie_result['playlist_count'] = all_entries.get_full_count()
1690
1691         _infojson_written = False
1692         write_playlist_files = self.params.get('allow_playlist_files', True)
1693         if write_playlist_files and self.params.get('list_thumbnails'):
1694             self.list_thumbnails(ie_result)
1695         if write_playlist_files and not self.params.get('simulate'):
1696             ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1697             _infojson_written = self._write_info_json(
1698                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1699             if _infojson_written is None:
1700                 return
1701             if self._write_description('playlist', ie_result,
1702                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1703                 return
1704             # TODO: This should be passed to ThumbnailsConvertor if necessary
1705             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1706
1707         if lazy:
1708             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1709                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1710         elif self.params.get('playlistreverse'):
1711             entries.reverse()
1712         elif self.params.get('playlistrandom'):
1713             random.shuffle(entries)
1714
1715         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
1716                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1717
1718         failures = 0
1719         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1720         for i, (playlist_index, entry) in enumerate(entries):
1721             if lazy:
1722                 resolved_entries.append((playlist_index, entry))
1723
1724             # TODO: Add auto-generated fields
1725             if self._match_entry(entry, incomplete=True) is not None:
1726                 continue
1727
1728             self.to_screen('[download] Downloading video %s of %s' % (
1729                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1730
1731             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1732             if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1733                 playlist_index = ie_result['requested_entries'][i]
1734
1735             entry_result = self.__process_iterable_entry(entry, download, {
1736                 'n_entries': int_or_none(n_entries),
1737                 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1738                 'playlist_count': ie_result.get('playlist_count'),
1739                 'playlist_index': playlist_index,
1740                 'playlist_autonumber': i + 1,
1741                 'playlist': title,
1742                 'playlist_id': ie_result.get('id'),
1743                 'playlist_title': ie_result.get('title'),
1744                 'playlist_uploader': ie_result.get('uploader'),
1745                 'playlist_uploader_id': ie_result.get('uploader_id'),
1746                 'extractor': ie_result['extractor'],
1747                 'webpage_url': ie_result['webpage_url'],
1748                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1749                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1750                 'extractor_key': ie_result['extractor_key'],
1751             })
1752             if not entry_result:
1753                 failures += 1
1754             if failures >= max_failures:
1755                 self.report_error(
1756                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1757                 break
1758             resolved_entries[i] = (playlist_index, entry_result)
1759
1760         # Update with processed data
1761         ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1762
1763         # Write the updated info to json
1764         if _infojson_written is True and self._write_info_json(
1765                 'updated playlist', ie_result,
1766                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1767             return
1768
1769         ie_result = self.run_all_pps('playlist', ie_result)
1770         self.to_screen(f'[download] Finished downloading playlist: {title}')
1771         return ie_result
1772
1773     @_handle_extraction_exceptions
1774     def __process_iterable_entry(self, entry, download, extra_info):
1775         return self.process_ie_result(
1776             entry, download=download, extra_info=extra_info)
1777
1778     def _build_format_filter(self, filter_spec):
1779         " Returns a function to filter the formats according to the filter_spec "
1780
1781         OPERATORS = {
1782             '<': operator.lt,
1783             '<=': operator.le,
1784             '>': operator.gt,
1785             '>=': operator.ge,
1786             '=': operator.eq,
1787             '!=': operator.ne,
1788         }
1789         operator_rex = re.compile(r'''(?x)\s*
1790             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1791             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1792             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1793             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1794         m = operator_rex.fullmatch(filter_spec)
1795         if m:
1796             try:
1797                 comparison_value = int(m.group('value'))
1798             except ValueError:
1799                 comparison_value = parse_filesize(m.group('value'))
1800                 if comparison_value is None:
1801                     comparison_value = parse_filesize(m.group('value') + 'B')
1802                 if comparison_value is None:
1803                     raise ValueError(
1804                         'Invalid value %r in format specification %r' % (
1805                             m.group('value'), filter_spec))
1806             op = OPERATORS[m.group('op')]
1807
1808         if not m:
1809             STR_OPERATORS = {
1810                 '=': operator.eq,
1811                 '^=': lambda attr, value: attr.startswith(value),
1812                 '$=': lambda attr, value: attr.endswith(value),
1813                 '*=': lambda attr, value: value in attr,
1814                 '~=': lambda attr, value: value.search(attr) is not None
1815             }
1816             str_operator_rex = re.compile(r'''(?x)\s*
1817                 (?P<key>[a-zA-Z0-9._-]+)\s*
1818                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1819                 (?P<quote>["'])?
1820                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1821                 (?(quote)(?P=quote))\s*
1822                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1823             m = str_operator_rex.fullmatch(filter_spec)
1824             if m:
1825                 if m.group('op') == '~=':
1826                     comparison_value = re.compile(m.group('value'))
1827                 else:
1828                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1829                 str_op = STR_OPERATORS[m.group('op')]
1830                 if m.group('negation'):
1831                     op = lambda attr, value: not str_op(attr, value)
1832                 else:
1833                     op = str_op
1834
1835         if not m:
1836             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1837
1838         def _filter(f):
1839             actual_value = f.get(m.group('key'))
1840             if actual_value is None:
1841                 return m.group('none_inclusive')
1842             return op(actual_value, comparison_value)
1843         return _filter
1844
1845     def _check_formats(self, formats):
1846         for f in formats:
1847             self.to_screen('[info] Testing format %s' % f['format_id'])
1848             path = self.get_output_path('temp')
1849             if not self._ensure_dir_exists(f'{path}/'):
1850                 continue
1851             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1852             temp_file.close()
1853             try:
1854                 success, _ = self.dl(temp_file.name, f, test=True)
1855             except (DownloadError, OSError, ValueError) + network_exceptions:
1856                 success = False
1857             finally:
1858                 if os.path.exists(temp_file.name):
1859                     try:
1860                         os.remove(temp_file.name)
1861                     except OSError:
1862                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1863             if success:
1864                 yield f
1865             else:
1866                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1867
1868     def _default_format_spec(self, info_dict, download=True):
1869
1870         def can_merge():
1871             merger = FFmpegMergerPP(self)
1872             return merger.available and merger.can_merge()
1873
1874         prefer_best = (
1875             not self.params.get('simulate')
1876             and download
1877             and (
1878                 not can_merge()
1879                 or info_dict.get('is_live') and not self.params.get('live_from_start')
1880                 or self.params['outtmpl']['default'] == '-'))
1881         compat = (
1882             prefer_best
1883             or self.params.get('allow_multiple_audio_streams', False)
1884             or 'format-spec' in self.params['compat_opts'])
1885
1886         return (
1887             'best/bestvideo+bestaudio' if prefer_best
1888             else 'bestvideo*+bestaudio/best' if not compat
1889             else 'bestvideo+bestaudio/best')
1890
1891     def build_format_selector(self, format_spec):
1892         def syntax_error(note, start):
1893             message = (
1894                 'Invalid format specification: '
1895                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
1896             return SyntaxError(message)
1897
1898         PICKFIRST = 'PICKFIRST'
1899         MERGE = 'MERGE'
1900         SINGLE = 'SINGLE'
1901         GROUP = 'GROUP'
1902         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1903
1904         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1905                                   'video': self.params.get('allow_multiple_video_streams', False)}
1906
1907         check_formats = self.params.get('check_formats') == 'selected'
1908
1909         def _parse_filter(tokens):
1910             filter_parts = []
1911             for type, string, start, _, _ in tokens:
1912                 if type == tokenize.OP and string == ']':
1913                     return ''.join(filter_parts)
1914                 else:
1915                     filter_parts.append(string)
1916
1917         def _remove_unused_ops(tokens):
1918             # Remove operators that we don't use and join them with the surrounding strings
1919             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1920             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1921             last_string, last_start, last_end, last_line = None, None, None, None
1922             for type, string, start, end, line in tokens:
1923                 if type == tokenize.OP and string == '[':
1924                     if last_string:
1925                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1926                         last_string = None
1927                     yield type, string, start, end, line
1928                     # everything inside brackets will be handled by _parse_filter
1929                     for type, string, start, end, line in tokens:
1930                         yield type, string, start, end, line
1931                         if type == tokenize.OP and string == ']':
1932                             break
1933                 elif type == tokenize.OP and string in ALLOWED_OPS:
1934                     if last_string:
1935                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1936                         last_string = None
1937                     yield type, string, start, end, line
1938                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1939                     if not last_string:
1940                         last_string = string
1941                         last_start = start
1942                         last_end = end
1943                     else:
1944                         last_string += string
1945             if last_string:
1946                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1947
1948         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1949             selectors = []
1950             current_selector = None
1951             for type, string, start, _, _ in tokens:
1952                 # ENCODING is only defined in python 3.x
1953                 if type == getattr(tokenize, 'ENCODING', None):
1954                     continue
1955                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1956                     current_selector = FormatSelector(SINGLE, string, [])
1957                 elif type == tokenize.OP:
1958                     if string == ')':
1959                         if not inside_group:
1960                             # ')' will be handled by the parentheses group
1961                             tokens.restore_last_token()
1962                         break
1963                     elif inside_merge and string in ['/', ',']:
1964                         tokens.restore_last_token()
1965                         break
1966                     elif inside_choice and string == ',':
1967                         tokens.restore_last_token()
1968                         break
1969                     elif string == ',':
1970                         if not current_selector:
1971                             raise syntax_error('"," must follow a format selector', start)
1972                         selectors.append(current_selector)
1973                         current_selector = None
1974                     elif string == '/':
1975                         if not current_selector:
1976                             raise syntax_error('"/" must follow a format selector', start)
1977                         first_choice = current_selector
1978                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1979                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1980                     elif string == '[':
1981                         if not current_selector:
1982                             current_selector = FormatSelector(SINGLE, 'best', [])
1983                         format_filter = _parse_filter(tokens)
1984                         current_selector.filters.append(format_filter)
1985                     elif string == '(':
1986                         if current_selector:
1987                             raise syntax_error('Unexpected "("', start)
1988                         group = _parse_format_selection(tokens, inside_group=True)
1989                         current_selector = FormatSelector(GROUP, group, [])
1990                     elif string == '+':
1991                         if not current_selector:
1992                             raise syntax_error('Unexpected "+"', start)
1993                         selector_1 = current_selector
1994                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1995                         if not selector_2:
1996                             raise syntax_error('Expected a selector', start)
1997                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1998                     else:
1999                         raise syntax_error(f'Operator not recognized: "{string}"', start)
2000                 elif type == tokenize.ENDMARKER:
2001                     break
2002             if current_selector:
2003                 selectors.append(current_selector)
2004             return selectors
2005
2006         def _merge(formats_pair):
2007             format_1, format_2 = formats_pair
2008
2009             formats_info = []
2010             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2011             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2012
2013             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2014                 get_no_more = {'video': False, 'audio': False}
2015                 for (i, fmt_info) in enumerate(formats_info):
2016                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2017                         formats_info.pop(i)
2018                         continue
2019                     for aud_vid in ['audio', 'video']:
2020                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2021                             if get_no_more[aud_vid]:
2022                                 formats_info.pop(i)
2023                                 break
2024                             get_no_more[aud_vid] = True
2025
2026             if len(formats_info) == 1:
2027                 return formats_info[0]
2028
2029             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2030             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2031
2032             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2033             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2034
2035             output_ext = self.params.get('merge_output_format')
2036             if not output_ext:
2037                 if the_only_video:
2038                     output_ext = the_only_video['ext']
2039                 elif the_only_audio and not video_fmts:
2040                     output_ext = the_only_audio['ext']
2041                 else:
2042                     output_ext = 'mkv'
2043
2044             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2045
2046             new_dict = {
2047                 'requested_formats': formats_info,
2048                 'format': '+'.join(filtered('format')),
2049                 'format_id': '+'.join(filtered('format_id')),
2050                 'ext': output_ext,
2051                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2052                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2053                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2054                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2055                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2056             }
2057
2058             if the_only_video:
2059                 new_dict.update({
2060                     'width': the_only_video.get('width'),
2061                     'height': the_only_video.get('height'),
2062                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2063                     'fps': the_only_video.get('fps'),
2064                     'dynamic_range': the_only_video.get('dynamic_range'),
2065                     'vcodec': the_only_video.get('vcodec'),
2066                     'vbr': the_only_video.get('vbr'),
2067                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2068                 })
2069
2070             if the_only_audio:
2071                 new_dict.update({
2072                     'acodec': the_only_audio.get('acodec'),
2073                     'abr': the_only_audio.get('abr'),
2074                     'asr': the_only_audio.get('asr'),
2075                 })
2076
2077             return new_dict
2078
2079         def _check_formats(formats):
2080             if not check_formats:
2081                 yield from formats
2082                 return
2083             yield from self._check_formats(formats)
2084
2085         def _build_selector_function(selector):
2086             if isinstance(selector, list):  # ,
2087                 fs = [_build_selector_function(s) for s in selector]
2088
2089                 def selector_function(ctx):
2090                     for f in fs:
2091                         yield from f(ctx)
2092                 return selector_function
2093
2094             elif selector.type == GROUP:  # ()
2095                 selector_function = _build_selector_function(selector.selector)
2096
2097             elif selector.type == PICKFIRST:  # /
2098                 fs = [_build_selector_function(s) for s in selector.selector]
2099
2100                 def selector_function(ctx):
2101                     for f in fs:
2102                         picked_formats = list(f(ctx))
2103                         if picked_formats:
2104                             return picked_formats
2105                     return []
2106
2107             elif selector.type == MERGE:  # +
2108                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2109
2110                 def selector_function(ctx):
2111                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2112                         yield _merge(pair)
2113
2114             elif selector.type == SINGLE:  # atom
2115                 format_spec = selector.selector or 'best'
2116
2117                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2118                 if format_spec == 'all':
2119                     def selector_function(ctx):
2120                         yield from _check_formats(ctx['formats'][::-1])
2121                 elif format_spec == 'mergeall':
2122                     def selector_function(ctx):
2123                         formats = list(_check_formats(
2124                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2125                         if not formats:
2126                             return
2127                         merged_format = formats[-1]
2128                         for f in formats[-2::-1]:
2129                             merged_format = _merge((merged_format, f))
2130                         yield merged_format
2131
2132                 else:
2133                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2134                     mobj = re.match(
2135                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2136                         format_spec)
2137                     if mobj is not None:
2138                         format_idx = int_or_none(mobj.group('n'), default=1)
2139                         format_reverse = mobj.group('bw')[0] == 'b'
2140                         format_type = (mobj.group('type') or [None])[0]
2141                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2142                         format_modified = mobj.group('mod') is not None
2143
2144                         format_fallback = not format_type and not format_modified  # for b, w
2145                         _filter_f = (
2146                             (lambda f: f.get('%scodec' % format_type) != 'none')
2147                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2148                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2149                             if format_type  # bv, ba, wv, wa
2150                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2151                             if not format_modified  # b, w
2152                             else lambda f: True)  # b*, w*
2153                         filter_f = lambda f: _filter_f(f) and (
2154                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2155                     else:
2156                         if format_spec in self._format_selection_exts['audio']:
2157                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2158                         elif format_spec in self._format_selection_exts['video']:
2159                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2160                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2161                         elif format_spec in self._format_selection_exts['storyboards']:
2162                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2163                         else:
2164                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2165
2166                     def selector_function(ctx):
2167                         formats = list(ctx['formats'])
2168                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2169                         if not matches:
2170                             if format_fallback and ctx['incomplete_formats']:
2171                                 # for extractors with incomplete formats (audio only (soundcloud)
2172                                 # or video only (imgur)) best/worst will fallback to
2173                                 # best/worst {video,audio}-only format
2174                                 matches = formats
2175                             elif seperate_fallback and not ctx['has_merged_format']:
2176                                 # for compatibility with youtube-dl when there is no pre-merged format
2177                                 matches = list(filter(seperate_fallback, formats))
2178                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2179                         try:
2180                             yield matches[format_idx - 1]
2181                         except LazyList.IndexError:
2182                             return
2183
2184             filters = [self._build_format_filter(f) for f in selector.filters]
2185
2186             def final_selector(ctx):
2187                 ctx_copy = dict(ctx)
2188                 for _filter in filters:
2189                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2190                 return selector_function(ctx_copy)
2191             return final_selector
2192
2193         stream = io.BytesIO(format_spec.encode())
2194         try:
2195             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2196         except tokenize.TokenError:
2197             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2198
2199         class TokenIterator:
2200             def __init__(self, tokens):
2201                 self.tokens = tokens
2202                 self.counter = 0
2203
2204             def __iter__(self):
2205                 return self
2206
2207             def __next__(self):
2208                 if self.counter >= len(self.tokens):
2209                     raise StopIteration()
2210                 value = self.tokens[self.counter]
2211                 self.counter += 1
2212                 return value
2213
2214             next = __next__
2215
2216             def restore_last_token(self):
2217                 self.counter -= 1
2218
2219         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2220         return _build_selector_function(parsed_selector)
2221
2222     def _calc_headers(self, info_dict):
2223         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2224
2225         cookies = self._calc_cookies(info_dict['url'])
2226         if cookies:
2227             res['Cookie'] = cookies
2228
2229         if 'X-Forwarded-For' not in res:
2230             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2231             if x_forwarded_for_ip:
2232                 res['X-Forwarded-For'] = x_forwarded_for_ip
2233
2234         return res
2235
2236     def _calc_cookies(self, url):
2237         pr = sanitized_Request(url)
2238         self.cookiejar.add_cookie_header(pr)
2239         return pr.get_header('Cookie')
2240
2241     def _sort_thumbnails(self, thumbnails):
2242         thumbnails.sort(key=lambda t: (
2243             t.get('preference') if t.get('preference') is not None else -1,
2244             t.get('width') if t.get('width') is not None else -1,
2245             t.get('height') if t.get('height') is not None else -1,
2246             t.get('id') if t.get('id') is not None else '',
2247             t.get('url')))
2248
2249     def _sanitize_thumbnails(self, info_dict):
2250         thumbnails = info_dict.get('thumbnails')
2251         if thumbnails is None:
2252             thumbnail = info_dict.get('thumbnail')
2253             if thumbnail:
2254                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2255         if not thumbnails:
2256             return
2257
2258         def check_thumbnails(thumbnails):
2259             for t in thumbnails:
2260                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2261                 try:
2262                     self.urlopen(HEADRequest(t['url']))
2263                 except network_exceptions as err:
2264                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2265                     continue
2266                 yield t
2267
2268         self._sort_thumbnails(thumbnails)
2269         for i, t in enumerate(thumbnails):
2270             if t.get('id') is None:
2271                 t['id'] = '%d' % i
2272             if t.get('width') and t.get('height'):
2273                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2274             t['url'] = sanitize_url(t['url'])
2275
2276         if self.params.get('check_formats') is True:
2277             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2278         else:
2279             info_dict['thumbnails'] = thumbnails
2280
2281     def _fill_common_fields(self, info_dict, is_video=True):
2282         # TODO: move sanitization here
2283         if is_video:
2284             # playlists are allowed to lack "title"
2285             title = info_dict.get('title', NO_DEFAULT)
2286             if title is NO_DEFAULT:
2287                 raise ExtractorError('Missing "title" field in extractor result',
2288                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2289             info_dict['fulltitle'] = title
2290             if not title:
2291                 if title == '':
2292                     self.write_debug('Extractor gave empty title. Creating a generic title')
2293                 else:
2294                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2295                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2296
2297         if info_dict.get('duration') is not None:
2298             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2299
2300         for ts_key, date_key in (
2301                 ('timestamp', 'upload_date'),
2302                 ('release_timestamp', 'release_date'),
2303                 ('modified_timestamp', 'modified_date'),
2304         ):
2305             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2306                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2307                 # see http://bugs.python.org/issue1646728)
2308                 with contextlib.suppress(ValueError, OverflowError, OSError):
2309                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2310                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2311
2312         live_keys = ('is_live', 'was_live')
2313         live_status = info_dict.get('live_status')
2314         if live_status is None:
2315             for key in live_keys:
2316                 if info_dict.get(key) is False:
2317                     continue
2318                 if info_dict.get(key):
2319                     live_status = key
2320                 break
2321             if all(info_dict.get(key) is False for key in live_keys):
2322                 live_status = 'not_live'
2323         if live_status:
2324             info_dict['live_status'] = live_status
2325             for key in live_keys:
2326                 if info_dict.get(key) is None:
2327                     info_dict[key] = (live_status == key)
2328
2329         # Auto generate title fields corresponding to the *_number fields when missing
2330         # in order to always have clean titles. This is very common for TV series.
2331         for field in ('chapter', 'season', 'episode'):
2332             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2333                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2334
2335     def _raise_pending_errors(self, info):
2336         err = info.pop('__pending_error', None)
2337         if err:
2338             self.report_error(err, tb=False)
2339
2340     def process_video_result(self, info_dict, download=True):
2341         assert info_dict.get('_type', 'video') == 'video'
2342         self._num_videos += 1
2343
2344         if 'id' not in info_dict:
2345             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2346         elif not info_dict.get('id'):
2347             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2348
2349         def report_force_conversion(field, field_not, conversion):
2350             self.report_warning(
2351                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2352                 % (field, field_not, conversion))
2353
2354         def sanitize_string_field(info, string_field):
2355             field = info.get(string_field)
2356             if field is None or isinstance(field, compat_str):
2357                 return
2358             report_force_conversion(string_field, 'a string', 'string')
2359             info[string_field] = compat_str(field)
2360
2361         def sanitize_numeric_fields(info):
2362             for numeric_field in self._NUMERIC_FIELDS:
2363                 field = info.get(numeric_field)
2364                 if field is None or isinstance(field, (int, float)):
2365                     continue
2366                 report_force_conversion(numeric_field, 'numeric', 'int')
2367                 info[numeric_field] = int_or_none(field)
2368
2369         sanitize_string_field(info_dict, 'id')
2370         sanitize_numeric_fields(info_dict)
2371         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2372             self.report_warning('"duration" field is negative, there is an error in extractor')
2373
2374         if 'playlist' not in info_dict:
2375             # It isn't part of a playlist
2376             info_dict['playlist'] = None
2377             info_dict['playlist_index'] = None
2378
2379         self._sanitize_thumbnails(info_dict)
2380
2381         thumbnail = info_dict.get('thumbnail')
2382         thumbnails = info_dict.get('thumbnails')
2383         if thumbnail:
2384             info_dict['thumbnail'] = sanitize_url(thumbnail)
2385         elif thumbnails:
2386             info_dict['thumbnail'] = thumbnails[-1]['url']
2387
2388         if info_dict.get('display_id') is None and 'id' in info_dict:
2389             info_dict['display_id'] = info_dict['id']
2390
2391         self._fill_common_fields(info_dict)
2392
2393         for cc_kind in ('subtitles', 'automatic_captions'):
2394             cc = info_dict.get(cc_kind)
2395             if cc:
2396                 for _, subtitle in cc.items():
2397                     for subtitle_format in subtitle:
2398                         if subtitle_format.get('url'):
2399                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2400                         if subtitle_format.get('ext') is None:
2401                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2402
2403         automatic_captions = info_dict.get('automatic_captions')
2404         subtitles = info_dict.get('subtitles')
2405
2406         info_dict['requested_subtitles'] = self.process_subtitles(
2407             info_dict['id'], subtitles, automatic_captions)
2408
2409         if info_dict.get('formats') is None:
2410             # There's only one format available
2411             formats = [info_dict]
2412         else:
2413             formats = info_dict['formats']
2414
2415         # or None ensures --clean-infojson removes it
2416         info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
2417         if not self.params.get('allow_unplayable_formats'):
2418             formats = [f for f in formats if not f.get('has_drm')]
2419             if info_dict['_has_drm'] and all(
2420                     f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2421                 self.report_warning(
2422                     'This video is DRM protected and only images are available for download. '
2423                     'Use --list-formats to see them')
2424
2425         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2426         if not get_from_start:
2427             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2428         if info_dict.get('is_live') and formats:
2429             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2430             if get_from_start and not formats:
2431                 self.raise_no_formats(info_dict, msg=(
2432                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2433                     'If you want to download from the current time, use --no-live-from-start'))
2434
2435         if not formats:
2436             self.raise_no_formats(info_dict)
2437
2438         def is_wellformed(f):
2439             url = f.get('url')
2440             if not url:
2441                 self.report_warning(
2442                     '"url" field is missing or empty - skipping format, '
2443                     'there is an error in extractor')
2444                 return False
2445             if isinstance(url, bytes):
2446                 sanitize_string_field(f, 'url')
2447             return True
2448
2449         # Filter out malformed formats for better extraction robustness
2450         formats = list(filter(is_wellformed, formats))
2451
2452         formats_dict = {}
2453
2454         # We check that all the formats have the format and format_id fields
2455         for i, format in enumerate(formats):
2456             sanitize_string_field(format, 'format_id')
2457             sanitize_numeric_fields(format)
2458             format['url'] = sanitize_url(format['url'])
2459             if not format.get('format_id'):
2460                 format['format_id'] = compat_str(i)
2461             else:
2462                 # Sanitize format_id from characters used in format selector expression
2463                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2464             format_id = format['format_id']
2465             if format_id not in formats_dict:
2466                 formats_dict[format_id] = []
2467             formats_dict[format_id].append(format)
2468
2469         # Make sure all formats have unique format_id
2470         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2471         for format_id, ambiguous_formats in formats_dict.items():
2472             ambigious_id = len(ambiguous_formats) > 1
2473             for i, format in enumerate(ambiguous_formats):
2474                 if ambigious_id:
2475                     format['format_id'] = '%s-%d' % (format_id, i)
2476                 if format.get('ext') is None:
2477                     format['ext'] = determine_ext(format['url']).lower()
2478                 # Ensure there is no conflict between id and ext in format selection
2479                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2480                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2481                     format['format_id'] = 'f%s' % format['format_id']
2482
2483         for i, format in enumerate(formats):
2484             if format.get('format') is None:
2485                 format['format'] = '{id} - {res}{note}'.format(
2486                     id=format['format_id'],
2487                     res=self.format_resolution(format),
2488                     note=format_field(format, 'format_note', ' (%s)'),
2489                 )
2490             if format.get('protocol') is None:
2491                 format['protocol'] = determine_protocol(format)
2492             if format.get('resolution') is None:
2493                 format['resolution'] = self.format_resolution(format, default=None)
2494             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2495                 format['dynamic_range'] = 'SDR'
2496             if (info_dict.get('duration') and format.get('tbr')
2497                     and not format.get('filesize') and not format.get('filesize_approx')):
2498                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2499
2500             # Add HTTP headers, so that external programs can use them from the
2501             # json output
2502             full_format_info = info_dict.copy()
2503             full_format_info.update(format)
2504             format['http_headers'] = self._calc_headers(full_format_info)
2505         # Remove private housekeeping stuff
2506         if '__x_forwarded_for_ip' in info_dict:
2507             del info_dict['__x_forwarded_for_ip']
2508
2509         if self.params.get('check_formats') is True:
2510             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2511
2512         if not formats or formats[0] is not info_dict:
2513             # only set the 'formats' fields if the original info_dict list them
2514             # otherwise we end up with a circular reference, the first (and unique)
2515             # element in the 'formats' field in info_dict is info_dict itself,
2516             # which can't be exported to json
2517             info_dict['formats'] = formats
2518
2519         info_dict, _ = self.pre_process(info_dict)
2520
2521         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2522             return info_dict
2523
2524         self.post_extract(info_dict)
2525         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2526
2527         # The pre-processors may have modified the formats
2528         formats = info_dict.get('formats', [info_dict])
2529
2530         list_only = self.params.get('simulate') is None and (
2531             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2532         interactive_format_selection = not list_only and self.format_selector == '-'
2533         if self.params.get('list_thumbnails'):
2534             self.list_thumbnails(info_dict)
2535         if self.params.get('listsubtitles'):
2536             if 'automatic_captions' in info_dict:
2537                 self.list_subtitles(
2538                     info_dict['id'], automatic_captions, 'automatic captions')
2539             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2540         if self.params.get('listformats') or interactive_format_selection:
2541             self.list_formats(info_dict)
2542         if list_only:
2543             # Without this printing, -F --print-json will not work
2544             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2545             return info_dict
2546
2547         format_selector = self.format_selector
2548         if format_selector is None:
2549             req_format = self._default_format_spec(info_dict, download=download)
2550             self.write_debug('Default format spec: %s' % req_format)
2551             format_selector = self.build_format_selector(req_format)
2552
2553         while True:
2554             if interactive_format_selection:
2555                 req_format = input(
2556                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2557                 try:
2558                     format_selector = self.build_format_selector(req_format)
2559                 except SyntaxError as err:
2560                     self.report_error(err, tb=False, is_error=False)
2561                     continue
2562
2563             formats_to_download = list(format_selector({
2564                 'formats': formats,
2565                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2566                 'incomplete_formats': (
2567                     # All formats are video-only or
2568                     all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2569                     # all formats are audio-only
2570                     or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2571             }))
2572             if interactive_format_selection and not formats_to_download:
2573                 self.report_error('Requested format is not available', tb=False, is_error=False)
2574                 continue
2575             break
2576
2577         if not formats_to_download:
2578             if not self.params.get('ignore_no_formats_error'):
2579                 raise ExtractorError(
2580                     'Requested format is not available. Use --list-formats for a list of available formats',
2581                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2582             self.report_warning('Requested format is not available')
2583             # Process what we can, even without any available formats.
2584             formats_to_download = [{}]
2585
2586         requested_ranges = self.params.get('download_ranges')
2587         if requested_ranges:
2588             requested_ranges = tuple(requested_ranges(info_dict, self))
2589
2590         best_format, downloaded_formats = formats_to_download[-1], []
2591         if download:
2592             if best_format:
2593                 def to_screen(*msg):
2594                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2595
2596                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2597                           (f['format_id'] for f in formats_to_download))
2598                 if requested_ranges:
2599                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2600                               (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
2601             max_downloads_reached = False
2602
2603             for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
2604                 new_info = self._copy_infodict(info_dict)
2605                 new_info.update(fmt)
2606                 if chapter:
2607                     new_info.update({
2608                         'section_start': chapter.get('start_time'),
2609                         'section_end': chapter.get('end_time', 0),
2610                         'section_title': chapter.get('title'),
2611                         'section_number': chapter.get('index'),
2612                     })
2613                 downloaded_formats.append(new_info)
2614                 try:
2615                     self.process_info(new_info)
2616                 except MaxDownloadsReached:
2617                     max_downloads_reached = True
2618                 self._raise_pending_errors(new_info)
2619                 # Remove copied info
2620                 for key, val in tuple(new_info.items()):
2621                     if info_dict.get(key) == val:
2622                         new_info.pop(key)
2623                 if max_downloads_reached:
2624                     break
2625
2626             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2627             assert write_archive.issubset({True, False, 'ignore'})
2628             if True in write_archive and False not in write_archive:
2629                 self.record_download_archive(info_dict)
2630
2631             info_dict['requested_downloads'] = downloaded_formats
2632             info_dict = self.run_all_pps('after_video', info_dict)
2633             if max_downloads_reached:
2634                 raise MaxDownloadsReached()
2635
2636         # We update the info dict with the selected best quality format (backwards compatibility)
2637         info_dict.update(best_format)
2638         return info_dict
2639
2640     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2641         """Select the requested subtitles and their format"""
2642         available_subs, normal_sub_langs = {}, []
2643         if normal_subtitles and self.params.get('writesubtitles'):
2644             available_subs.update(normal_subtitles)
2645             normal_sub_langs = tuple(normal_subtitles.keys())
2646         if automatic_captions and self.params.get('writeautomaticsub'):
2647             for lang, cap_info in automatic_captions.items():
2648                 if lang not in available_subs:
2649                     available_subs[lang] = cap_info
2650
2651         if (not self.params.get('writesubtitles') and not
2652                 self.params.get('writeautomaticsub') or not
2653                 available_subs):
2654             return None
2655
2656         all_sub_langs = tuple(available_subs.keys())
2657         if self.params.get('allsubtitles', False):
2658             requested_langs = all_sub_langs
2659         elif self.params.get('subtitleslangs', False):
2660             # A list is used so that the order of languages will be the same as
2661             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2662             requested_langs = []
2663             for lang_re in self.params.get('subtitleslangs'):
2664                 discard = lang_re[0] == '-'
2665                 if discard:
2666                     lang_re = lang_re[1:]
2667                 if lang_re == 'all':
2668                     if discard:
2669                         requested_langs = []
2670                     else:
2671                         requested_langs.extend(all_sub_langs)
2672                     continue
2673                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2674                 if discard:
2675                     for lang in current_langs:
2676                         while lang in requested_langs:
2677                             requested_langs.remove(lang)
2678                 else:
2679                     requested_langs.extend(current_langs)
2680             requested_langs = orderedSet(requested_langs)
2681         elif normal_sub_langs:
2682             requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
2683         else:
2684             requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
2685         if requested_langs:
2686             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2687
2688         formats_query = self.params.get('subtitlesformat', 'best')
2689         formats_preference = formats_query.split('/') if formats_query else []
2690         subs = {}
2691         for lang in requested_langs:
2692             formats = available_subs.get(lang)
2693             if formats is None:
2694                 self.report_warning(f'{lang} subtitles not available for {video_id}')
2695                 continue
2696             for ext in formats_preference:
2697                 if ext == 'best':
2698                     f = formats[-1]
2699                     break
2700                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2701                 if matches:
2702                     f = matches[-1]
2703                     break
2704             else:
2705                 f = formats[-1]
2706                 self.report_warning(
2707                     'No subtitle format found matching "%s" for language %s, '
2708                     'using %s' % (formats_query, lang, f['ext']))
2709             subs[lang] = f
2710         return subs
2711
2712     def _forceprint(self, key, info_dict):
2713         if info_dict is None:
2714             return
2715         info_copy = info_dict.copy()
2716         info_copy['formats_table'] = self.render_formats_table(info_dict)
2717         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2718         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2719         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2720
2721         def format_tmpl(tmpl):
2722             mobj = re.match(r'\w+(=?)$', tmpl)
2723             if mobj and mobj.group(1):
2724                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2725             elif mobj:
2726                 return f'%({tmpl})s'
2727             return tmpl
2728
2729         for tmpl in self.params['forceprint'].get(key, []):
2730             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2731
2732         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2733             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2734             tmpl = format_tmpl(tmpl)
2735             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2736             if self._ensure_dir_exists(filename):
2737                 with open(filename, 'a', encoding='utf-8') as f:
2738                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2739
2740     def __forced_printings(self, info_dict, filename, incomplete):
2741         def print_mandatory(field, actual_field=None):
2742             if actual_field is None:
2743                 actual_field = field
2744             if (self.params.get('force%s' % field, False)
2745                     and (not incomplete or info_dict.get(actual_field) is not None)):
2746                 self.to_stdout(info_dict[actual_field])
2747
2748         def print_optional(field):
2749             if (self.params.get('force%s' % field, False)
2750                     and info_dict.get(field) is not None):
2751                 self.to_stdout(info_dict[field])
2752
2753         info_dict = info_dict.copy()
2754         if filename is not None:
2755             info_dict['filename'] = filename
2756         if info_dict.get('requested_formats') is not None:
2757             # For RTMP URLs, also include the playpath
2758             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2759         elif info_dict.get('url'):
2760             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2761
2762         if (self.params.get('forcejson')
2763                 or self.params['forceprint'].get('video')
2764                 or self.params['print_to_file'].get('video')):
2765             self.post_extract(info_dict)
2766         self._forceprint('video', info_dict)
2767
2768         print_mandatory('title')
2769         print_mandatory('id')
2770         print_mandatory('url', 'urls')
2771         print_optional('thumbnail')
2772         print_optional('description')
2773         print_optional('filename')
2774         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2775             self.to_stdout(formatSeconds(info_dict['duration']))
2776         print_mandatory('format')
2777
2778         if self.params.get('forcejson'):
2779             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2780
2781     def dl(self, name, info, subtitle=False, test=False):
2782         if not info.get('url'):
2783             self.raise_no_formats(info, True)
2784
2785         if test:
2786             verbose = self.params.get('verbose')
2787             params = {
2788                 'test': True,
2789                 'quiet': self.params.get('quiet') or not verbose,
2790                 'verbose': verbose,
2791                 'noprogress': not verbose,
2792                 'nopart': True,
2793                 'skip_unavailable_fragments': False,
2794                 'keep_fragments': False,
2795                 'overwrites': True,
2796                 '_no_ytdl_file': True,
2797             }
2798         else:
2799             params = self.params
2800         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2801         if not test:
2802             for ph in self._progress_hooks:
2803                 fd.add_progress_hook(ph)
2804             urls = '", "'.join(
2805                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2806                 for f in info.get('requested_formats', []) or [info])
2807             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
2808
2809         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2810         # But it may contain objects that are not deep-copyable
2811         new_info = self._copy_infodict(info)
2812         if new_info.get('http_headers') is None:
2813             new_info['http_headers'] = self._calc_headers(new_info)
2814         return fd.download(name, new_info, subtitle)
2815
2816     def existing_file(self, filepaths, *, default_overwrite=True):
2817         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2818         if existing_files and not self.params.get('overwrites', default_overwrite):
2819             return existing_files[0]
2820
2821         for file in existing_files:
2822             self.report_file_delete(file)
2823             os.remove(file)
2824         return None
2825
2826     def process_info(self, info_dict):
2827         """Process a single resolved IE result. (Modifies it in-place)"""
2828
2829         assert info_dict.get('_type', 'video') == 'video'
2830         original_infodict = info_dict
2831
2832         if 'format' not in info_dict and 'ext' in info_dict:
2833             info_dict['format'] = info_dict['ext']
2834
2835         # This is mostly just for backward compatibility of process_info
2836         # As a side-effect, this allows for format-specific filters
2837         if self._match_entry(info_dict) is not None:
2838             info_dict['__write_download_archive'] = 'ignore'
2839             return
2840
2841         # Does nothing under normal operation - for backward compatibility of process_info
2842         self.post_extract(info_dict)
2843         self._num_downloads += 1
2844
2845         # info_dict['_filename'] needs to be set for backward compatibility
2846         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2847         temp_filename = self.prepare_filename(info_dict, 'temp')
2848         files_to_move = {}
2849
2850         # Forced printings
2851         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2852
2853         def check_max_downloads():
2854             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
2855                 raise MaxDownloadsReached()
2856
2857         if self.params.get('simulate'):
2858             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2859             check_max_downloads()
2860             return
2861
2862         if full_filename is None:
2863             return
2864         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2865             return
2866         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2867             return
2868
2869         if self._write_description('video', info_dict,
2870                                    self.prepare_filename(info_dict, 'description')) is None:
2871             return
2872
2873         sub_files = self._write_subtitles(info_dict, temp_filename)
2874         if sub_files is None:
2875             return
2876         files_to_move.update(dict(sub_files))
2877
2878         thumb_files = self._write_thumbnails(
2879             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2880         if thumb_files is None:
2881             return
2882         files_to_move.update(dict(thumb_files))
2883
2884         infofn = self.prepare_filename(info_dict, 'infojson')
2885         _infojson_written = self._write_info_json('video', info_dict, infofn)
2886         if _infojson_written:
2887             info_dict['infojson_filename'] = infofn
2888             # For backward compatibility, even though it was a private field
2889             info_dict['__infojson_filename'] = infofn
2890         elif _infojson_written is None:
2891             return
2892
2893         # Note: Annotations are deprecated
2894         annofn = None
2895         if self.params.get('writeannotations', False):
2896             annofn = self.prepare_filename(info_dict, 'annotation')
2897         if annofn:
2898             if not self._ensure_dir_exists(encodeFilename(annofn)):
2899                 return
2900             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2901                 self.to_screen('[info] Video annotations are already present')
2902             elif not info_dict.get('annotations'):
2903                 self.report_warning('There are no annotations to write.')
2904             else:
2905                 try:
2906                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2907                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2908                         annofile.write(info_dict['annotations'])
2909                 except (KeyError, TypeError):
2910                     self.report_warning('There are no annotations to write.')
2911                 except OSError:
2912                     self.report_error('Cannot write annotations file: ' + annofn)
2913                     return
2914
2915         # Write internet shortcut files
2916         def _write_link_file(link_type):
2917             url = try_get(info_dict['webpage_url'], iri_to_uri)
2918             if not url:
2919                 self.report_warning(
2920                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2921                 return True
2922             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2923             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2924                 return False
2925             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2926                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2927                 return True
2928             try:
2929                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2930                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2931                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2932                     template_vars = {'url': url}
2933                     if link_type == 'desktop':
2934                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2935                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2936             except OSError:
2937                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2938                 return False
2939             return True
2940
2941         write_links = {
2942             'url': self.params.get('writeurllink'),
2943             'webloc': self.params.get('writewebloclink'),
2944             'desktop': self.params.get('writedesktoplink'),
2945         }
2946         if self.params.get('writelink'):
2947             link_type = ('webloc' if sys.platform == 'darwin'
2948                          else 'desktop' if sys.platform.startswith('linux')
2949                          else 'url')
2950             write_links[link_type] = True
2951
2952         if any(should_write and not _write_link_file(link_type)
2953                for link_type, should_write in write_links.items()):
2954             return
2955
2956         def replace_info_dict(new_info):
2957             nonlocal info_dict
2958             if new_info == info_dict:
2959                 return
2960             info_dict.clear()
2961             info_dict.update(new_info)
2962
2963         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2964         replace_info_dict(new_info)
2965
2966         if self.params.get('skip_download'):
2967             info_dict['filepath'] = temp_filename
2968             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2969             info_dict['__files_to_move'] = files_to_move
2970             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2971             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2972         else:
2973             # Download
2974             info_dict.setdefault('__postprocessors', [])
2975             try:
2976
2977                 def existing_video_file(*filepaths):
2978                     ext = info_dict.get('ext')
2979                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2980                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2981                                               default_overwrite=False)
2982                     if file:
2983                         info_dict['ext'] = os.path.splitext(file)[1][1:]
2984                     return file
2985
2986                 success = True
2987                 merger, fd = FFmpegMergerPP(self), None
2988                 if info_dict.get('protocol') or info_dict.get('url'):
2989                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2990                     if fd is not FFmpegFD and (
2991                             info_dict.get('section_start') or info_dict.get('section_end')):
2992                         msg = ('This format cannot be partially downloaded' if merger.available
2993                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
2994                         self.report_error(f'{msg}. Aborting')
2995                         return
2996
2997                 if info_dict.get('requested_formats') is not None:
2998
2999                     def compatible_formats(formats):
3000                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3001                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
3002                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
3003                         if len(video_formats) > 2 or len(audio_formats) > 2:
3004                             return False
3005
3006                         # Check extension
3007                         exts = {format.get('ext') for format in formats}
3008                         COMPATIBLE_EXTS = (
3009                             {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
3010                             {'webm'},
3011                         )
3012                         for ext_sets in COMPATIBLE_EXTS:
3013                             if ext_sets.issuperset(exts):
3014                                 return True
3015                         # TODO: Check acodec/vcodec
3016                         return False
3017
3018                     requested_formats = info_dict['requested_formats']
3019                     old_ext = info_dict['ext']
3020                     if self.params.get('merge_output_format') is None:
3021                         if not compatible_formats(requested_formats):
3022                             info_dict['ext'] = 'mkv'
3023                             self.report_warning(
3024                                 'Requested formats are incompatible for merge and will be merged into mkv')
3025                         if (info_dict['ext'] == 'webm'
3026                                 and info_dict.get('thumbnails')
3027                                 # check with type instead of pp_key, __name__, or isinstance
3028                                 # since we dont want any custom PPs to trigger this
3029                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3030                             info_dict['ext'] = 'mkv'
3031                             self.report_warning(
3032                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3033                     new_ext = info_dict['ext']
3034
3035                     def correct_ext(filename, ext=new_ext):
3036                         if filename == '-':
3037                             return filename
3038                         filename_real_ext = os.path.splitext(filename)[1][1:]
3039                         filename_wo_ext = (
3040                             os.path.splitext(filename)[0]
3041                             if filename_real_ext in (old_ext, new_ext)
3042                             else filename)
3043                         return f'{filename_wo_ext}.{ext}'
3044
3045                     # Ensure filename always has a correct extension for successful merge
3046                     full_filename = correct_ext(full_filename)
3047                     temp_filename = correct_ext(temp_filename)
3048                     dl_filename = existing_video_file(full_filename, temp_filename)
3049                     info_dict['__real_download'] = False
3050
3051                     downloaded = []
3052                     if dl_filename is not None:
3053                         self.report_file_already_downloaded(dl_filename)
3054                     elif fd:
3055                         for f in requested_formats if fd != FFmpegFD else []:
3056                             f['filepath'] = fname = prepend_extension(
3057                                 correct_ext(temp_filename, info_dict['ext']),
3058                                 'f%s' % f['format_id'], info_dict['ext'])
3059                             downloaded.append(fname)
3060                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3061                         success, real_download = self.dl(temp_filename, info_dict)
3062                         info_dict['__real_download'] = real_download
3063                     else:
3064                         if self.params.get('allow_unplayable_formats'):
3065                             self.report_warning(
3066                                 'You have requested merging of multiple formats '
3067                                 'while also allowing unplayable formats to be downloaded. '
3068                                 'The formats won\'t be merged to prevent data corruption.')
3069                         elif not merger.available:
3070                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3071                             if not self.params.get('ignoreerrors'):
3072                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3073                                 return
3074                             self.report_warning(f'{msg}. The formats won\'t be merged')
3075
3076                         if temp_filename == '-':
3077                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3078                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3079                                       else 'but ffmpeg is not installed')
3080                             self.report_warning(
3081                                 f'You have requested downloading multiple formats to stdout {reason}. '
3082                                 'The formats will be streamed one after the other')
3083                             fname = temp_filename
3084                         for f in requested_formats:
3085                             new_info = dict(info_dict)
3086                             del new_info['requested_formats']
3087                             new_info.update(f)
3088                             if temp_filename != '-':
3089                                 fname = prepend_extension(
3090                                     correct_ext(temp_filename, new_info['ext']),
3091                                     'f%s' % f['format_id'], new_info['ext'])
3092                                 if not self._ensure_dir_exists(fname):
3093                                     return
3094                                 f['filepath'] = fname
3095                                 downloaded.append(fname)
3096                             partial_success, real_download = self.dl(fname, new_info)
3097                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3098                             success = success and partial_success
3099
3100                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3101                         info_dict['__postprocessors'].append(merger)
3102                         info_dict['__files_to_merge'] = downloaded
3103                         # Even if there were no downloads, it is being merged only now
3104                         info_dict['__real_download'] = True
3105                     else:
3106                         for file in downloaded:
3107                             files_to_move[file] = None
3108                 else:
3109                     # Just a single file
3110                     dl_filename = existing_video_file(full_filename, temp_filename)
3111                     if dl_filename is None or dl_filename == temp_filename:
3112                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3113                         # So we should try to resume the download
3114                         success, real_download = self.dl(temp_filename, info_dict)
3115                         info_dict['__real_download'] = real_download
3116                     else:
3117                         self.report_file_already_downloaded(dl_filename)
3118
3119                 dl_filename = dl_filename or temp_filename
3120                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3121
3122             except network_exceptions as err:
3123                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3124                 return
3125             except OSError as err:
3126                 raise UnavailableVideoError(err)
3127             except (ContentTooShortError, ) as err:
3128                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3129                 return
3130
3131             self._raise_pending_errors(info_dict)
3132             if success and full_filename != '-':
3133
3134                 def fixup():
3135                     do_fixup = True
3136                     fixup_policy = self.params.get('fixup')
3137                     vid = info_dict['id']
3138
3139                     if fixup_policy in ('ignore', 'never'):
3140                         return
3141                     elif fixup_policy == 'warn':
3142                         do_fixup = 'warn'
3143                     elif fixup_policy != 'force':
3144                         assert fixup_policy in ('detect_or_warn', None)
3145                         if not info_dict.get('__real_download'):
3146                             do_fixup = False
3147
3148                     def ffmpeg_fixup(cndn, msg, cls):
3149                         if not (do_fixup and cndn):
3150                             return
3151                         elif do_fixup == 'warn':
3152                             self.report_warning(f'{vid}: {msg}')
3153                             return
3154                         pp = cls(self)
3155                         if pp.available:
3156                             info_dict['__postprocessors'].append(pp)
3157                         else:
3158                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3159
3160                     stretched_ratio = info_dict.get('stretched_ratio')
3161                     ffmpeg_fixup(
3162                         stretched_ratio not in (1, None),
3163                         f'Non-uniform pixel ratio {stretched_ratio}',
3164                         FFmpegFixupStretchedPP)
3165
3166                     ffmpeg_fixup(
3167                         (info_dict.get('requested_formats') is None
3168                          and info_dict.get('container') == 'm4a_dash'
3169                          and info_dict.get('ext') == 'm4a'),
3170                         'writing DASH m4a. Only some players support this container',
3171                         FFmpegFixupM4aPP)
3172
3173                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3174                     downloader = downloader.FD_NAME if downloader else None
3175
3176                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3177                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3178                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3179                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3180                                      FFmpegFixupM3u8PP)
3181                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3182                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3183
3184                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3185                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3186
3187                 fixup()
3188                 try:
3189                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3190                 except PostProcessingError as err:
3191                     self.report_error('Postprocessing: %s' % str(err))
3192                     return
3193                 try:
3194                     for ph in self._post_hooks:
3195                         ph(info_dict['filepath'])
3196                 except Exception as err:
3197                     self.report_error('post hooks: %s' % str(err))
3198                     return
3199                 info_dict['__write_download_archive'] = True
3200
3201         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3202         if self.params.get('force_write_download_archive'):
3203             info_dict['__write_download_archive'] = True
3204         check_max_downloads()
3205
3206     def __download_wrapper(self, func):
3207         @functools.wraps(func)
3208         def wrapper(*args, **kwargs):
3209             try:
3210                 res = func(*args, **kwargs)
3211             except UnavailableVideoError as e:
3212                 self.report_error(e)
3213             except DownloadCancelled as e:
3214                 self.to_screen(f'[info] {e}')
3215                 if not self.params.get('break_per_url'):
3216                     raise
3217             else:
3218                 if self.params.get('dump_single_json', False):
3219                     self.post_extract(res)
3220                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3221         return wrapper
3222
3223     def download(self, url_list):
3224         """Download a given list of URLs."""
3225         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3226         outtmpl = self.params['outtmpl']['default']
3227         if (len(url_list) > 1
3228                 and outtmpl != '-'
3229                 and '%' not in outtmpl
3230                 and self.params.get('max_downloads') != 1):
3231             raise SameFileError(outtmpl)
3232
3233         for url in url_list:
3234             self.__download_wrapper(self.extract_info)(
3235                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3236
3237         return self._download_retcode
3238
3239     def download_with_info_file(self, info_filename):
3240         with contextlib.closing(fileinput.FileInput(
3241                 [info_filename], mode='r',
3242                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3243             # FileInput doesn't have a read method, we can't call json.load
3244             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3245         try:
3246             self.__download_wrapper(self.process_ie_result)(info, download=True)
3247         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3248             if not isinstance(e, EntryNotInPlaylist):
3249                 self.to_stderr('\r')
3250             webpage_url = info.get('webpage_url')
3251             if webpage_url is not None:
3252                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3253                 return self.download([webpage_url])
3254             else:
3255                 raise
3256         return self._download_retcode
3257
3258     @staticmethod
3259     def sanitize_info(info_dict, remove_private_keys=False):
3260         ''' Sanitize the infodict for converting to json '''
3261         if info_dict is None:
3262             return info_dict
3263         info_dict.setdefault('epoch', int(time.time()))
3264         info_dict.setdefault('_type', 'video')
3265
3266         if remove_private_keys:
3267             reject = lambda k, v: v is None or k.startswith('__') or k in {
3268                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3269                 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3270             }
3271         else:
3272             reject = lambda k, v: False
3273
3274         def filter_fn(obj):
3275             if isinstance(obj, dict):
3276                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3277             elif isinstance(obj, (list, tuple, set, LazyList)):
3278                 return list(map(filter_fn, obj))
3279             elif obj is None or isinstance(obj, (str, int, float, bool)):
3280                 return obj
3281             else:
3282                 return repr(obj)
3283
3284         return filter_fn(info_dict)
3285
3286     @staticmethod
3287     def filter_requested_info(info_dict, actually_filter=True):
3288         ''' Alias of sanitize_info for backward compatibility '''
3289         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3290
3291     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3292         for filename in set(filter(None, files_to_delete)):
3293             if msg:
3294                 self.to_screen(msg % filename)
3295             try:
3296                 os.remove(filename)
3297             except OSError:
3298                 self.report_warning(f'Unable to delete file {filename}')
3299             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3300                 del info['__files_to_move'][filename]
3301
3302     @staticmethod
3303     def post_extract(info_dict):
3304         def actual_post_extract(info_dict):
3305             if info_dict.get('_type') in ('playlist', 'multi_video'):
3306                 for video_dict in info_dict.get('entries', {}):
3307                     actual_post_extract(video_dict or {})
3308                 return
3309
3310             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3311             info_dict.update(post_extractor())
3312
3313         actual_post_extract(info_dict or {})
3314
3315     def run_pp(self, pp, infodict):
3316         files_to_delete = []
3317         if '__files_to_move' not in infodict:
3318             infodict['__files_to_move'] = {}
3319         try:
3320             files_to_delete, infodict = pp.run(infodict)
3321         except PostProcessingError as e:
3322             # Must be True and not 'only_download'
3323             if self.params.get('ignoreerrors') is True:
3324                 self.report_error(e)
3325                 return infodict
3326             raise
3327
3328         if not files_to_delete:
3329             return infodict
3330         if self.params.get('keepvideo', False):
3331             for f in files_to_delete:
3332                 infodict['__files_to_move'].setdefault(f, '')
3333         else:
3334             self._delete_downloaded_files(
3335                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3336         return infodict
3337
3338     def run_all_pps(self, key, info, *, additional_pps=None):
3339         self._forceprint(key, info)
3340         for pp in (additional_pps or []) + self._pps[key]:
3341             info = self.run_pp(pp, info)
3342         return info
3343
3344     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3345         info = dict(ie_info)
3346         info['__files_to_move'] = files_to_move or {}
3347         try:
3348             info = self.run_all_pps(key, info)
3349         except PostProcessingError as err:
3350             msg = f'Preprocessing: {err}'
3351             info.setdefault('__pending_error', msg)
3352             self.report_error(msg, is_error=False)
3353         return info, info.pop('__files_to_move', None)
3354
3355     def post_process(self, filename, info, files_to_move=None):
3356         """Run all the postprocessors on the given file."""
3357         info['filepath'] = filename
3358         info['__files_to_move'] = files_to_move or {}
3359         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3360         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3361         del info['__files_to_move']
3362         return self.run_all_pps('after_move', info)
3363
3364     def _make_archive_id(self, info_dict):
3365         video_id = info_dict.get('id')
3366         if not video_id:
3367             return
3368         # Future-proof against any change in case
3369         # and backwards compatibility with prior versions
3370         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3371         if extractor is None:
3372             url = str_or_none(info_dict.get('url'))
3373             if not url:
3374                 return
3375             # Try to find matching extractor for the URL and take its ie_key
3376             for ie_key, ie in self._ies.items():
3377                 if ie.suitable(url):
3378                     extractor = ie_key
3379                     break
3380             else:
3381                 return
3382         return f'{extractor.lower()} {video_id}'
3383
3384     def in_download_archive(self, info_dict):
3385         fn = self.params.get('download_archive')
3386         if fn is None:
3387             return False
3388
3389         vid_id = self._make_archive_id(info_dict)
3390         if not vid_id:
3391             return False  # Incomplete video information
3392
3393         return vid_id in self.archive
3394
3395     def record_download_archive(self, info_dict):
3396         fn = self.params.get('download_archive')
3397         if fn is None:
3398             return
3399         vid_id = self._make_archive_id(info_dict)
3400         assert vid_id
3401         self.write_debug(f'Adding to archive: {vid_id}')
3402         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3403             archive_file.write(vid_id + '\n')
3404         self.archive.add(vid_id)
3405
3406     @staticmethod
3407     def format_resolution(format, default='unknown'):
3408         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3409             return 'audio only'
3410         if format.get('resolution') is not None:
3411             return format['resolution']
3412         if format.get('width') and format.get('height'):
3413             return '%dx%d' % (format['width'], format['height'])
3414         elif format.get('height'):
3415             return '%sp' % format['height']
3416         elif format.get('width'):
3417             return '%dx?' % format['width']
3418         return default
3419
3420     def _list_format_headers(self, *headers):
3421         if self.params.get('listformats_table', True) is not False:
3422             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3423         return headers
3424
3425     def _format_note(self, fdict):
3426         res = ''
3427         if fdict.get('ext') in ['f4f', 'f4m']:
3428             res += '(unsupported)'
3429         if fdict.get('language'):
3430             if res:
3431                 res += ' '
3432             res += '[%s]' % fdict['language']
3433         if fdict.get('format_note') is not None:
3434             if res:
3435                 res += ' '
3436             res += fdict['format_note']
3437         if fdict.get('tbr') is not None:
3438             if res:
3439                 res += ', '
3440             res += '%4dk' % fdict['tbr']
3441         if fdict.get('container') is not None:
3442             if res:
3443                 res += ', '
3444             res += '%s container' % fdict['container']
3445         if (fdict.get('vcodec') is not None
3446                 and fdict.get('vcodec') != 'none'):
3447             if res:
3448                 res += ', '
3449             res += fdict['vcodec']
3450             if fdict.get('vbr') is not None:
3451                 res += '@'
3452         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3453             res += 'video@'
3454         if fdict.get('vbr') is not None:
3455             res += '%4dk' % fdict['vbr']
3456         if fdict.get('fps') is not None:
3457             if res:
3458                 res += ', '
3459             res += '%sfps' % fdict['fps']
3460         if fdict.get('acodec') is not None:
3461             if res:
3462                 res += ', '
3463             if fdict['acodec'] == 'none':
3464                 res += 'video only'
3465             else:
3466                 res += '%-5s' % fdict['acodec']
3467         elif fdict.get('abr') is not None:
3468             if res:
3469                 res += ', '
3470             res += 'audio'
3471         if fdict.get('abr') is not None:
3472             res += '@%3dk' % fdict['abr']
3473         if fdict.get('asr') is not None:
3474             res += ' (%5dHz)' % fdict['asr']
3475         if fdict.get('filesize') is not None:
3476             if res:
3477                 res += ', '
3478             res += format_bytes(fdict['filesize'])
3479         elif fdict.get('filesize_approx') is not None:
3480             if res:
3481                 res += ', '
3482             res += '~' + format_bytes(fdict['filesize_approx'])
3483         return res
3484
3485     def render_formats_table(self, info_dict):
3486         if not info_dict.get('formats') and not info_dict.get('url'):
3487             return None
3488
3489         formats = info_dict.get('formats', [info_dict])
3490         if not self.params.get('listformats_table', True) is not False:
3491             table = [
3492                 [
3493                     format_field(f, 'format_id'),
3494                     format_field(f, 'ext'),
3495                     self.format_resolution(f),
3496                     self._format_note(f)
3497                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3498             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3499
3500         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3501         table = [
3502             [
3503                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3504                 format_field(f, 'ext'),
3505                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3506                 format_field(f, 'fps', '\t%d'),
3507                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3508                 delim,
3509                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3510                 format_field(f, 'tbr', '\t%dk'),
3511                 shorten_protocol_name(f.get('protocol', '')),
3512                 delim,
3513                 format_field(f, 'vcodec', default='unknown').replace(
3514                     'none', 'images' if f.get('acodec') == 'none'
3515                             else self._format_out('audio only', self.Styles.SUPPRESS)),
3516                 format_field(f, 'vbr', '\t%dk'),
3517                 format_field(f, 'acodec', default='unknown').replace(
3518                     'none', '' if f.get('vcodec') == 'none'
3519                             else self._format_out('video only', self.Styles.SUPPRESS)),
3520                 format_field(f, 'abr', '\t%dk'),
3521                 format_field(f, 'asr', '\t%dHz'),
3522                 join_nonempty(
3523                     self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3524                     format_field(f, 'language', '[%s]'),
3525                     join_nonempty(format_field(f, 'format_note'),
3526                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3527                                   delim=', '),
3528                     delim=' '),
3529             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3530         header_line = self._list_format_headers(
3531             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3532             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3533
3534         return render_table(
3535             header_line, table, hide_empty=True,
3536             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3537
3538     def render_thumbnails_table(self, info_dict):
3539         thumbnails = list(info_dict.get('thumbnails') or [])
3540         if not thumbnails:
3541             return None
3542         return render_table(
3543             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3544             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3545
3546     def render_subtitles_table(self, video_id, subtitles):
3547         def _row(lang, formats):
3548             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3549             if len(set(names)) == 1:
3550                 names = [] if names[0] == 'unknown' else names[:1]
3551             return [lang, ', '.join(names), ', '.join(exts)]
3552
3553         if not subtitles:
3554             return None
3555         return render_table(
3556             self._list_format_headers('Language', 'Name', 'Formats'),
3557             [_row(lang, formats) for lang, formats in subtitles.items()],
3558             hide_empty=True)
3559
3560     def __list_table(self, video_id, name, func, *args):
3561         table = func(*args)
3562         if not table:
3563             self.to_screen(f'{video_id} has no {name}')
3564             return
3565         self.to_screen(f'[info] Available {name} for {video_id}:')
3566         self.to_stdout(table)
3567
3568     def list_formats(self, info_dict):
3569         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3570
3571     def list_thumbnails(self, info_dict):
3572         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3573
3574     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3575         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3576
3577     def urlopen(self, req):
3578         """ Start an HTTP download """
3579         if isinstance(req, str):
3580             req = sanitized_Request(req)
3581         return self._opener.open(req, timeout=self._socket_timeout)
3582
3583     def print_debug_header(self):
3584         if not self.params.get('verbose'):
3585             return
3586
3587         # These imports can be slow. So import them only as needed
3588         from .extractor.extractors import _LAZY_LOADER
3589         from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3590
3591         def get_encoding(stream):
3592             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3593             if not supports_terminal_sequences(stream):
3594                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3595                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3596             return ret
3597
3598         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3599             locale.getpreferredencoding(),
3600             sys.getfilesystemencoding(),
3601             self.get_encoding(),
3602             ', '.join(
3603                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3604                 if stream is not None and key != 'console')
3605         )
3606
3607         logger = self.params.get('logger')
3608         if logger:
3609             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3610             write_debug(encoding_str)
3611         else:
3612             write_string(f'[debug] {encoding_str}\n', encoding=None)
3613             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3614
3615         source = detect_variant()
3616         write_debug(join_nonempty(
3617             'yt-dlp version', __version__,
3618             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3619             '' if source == 'unknown' else f'({source})',
3620             delim=' '))
3621         if not _LAZY_LOADER:
3622             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3623                 write_debug('Lazy loading extractors is forcibly disabled')
3624             else:
3625                 write_debug('Lazy loading extractors is disabled')
3626         if plugin_extractors or plugin_postprocessors:
3627             write_debug('Plugins: %s' % [
3628                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3629                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3630         if self.params['compat_opts']:
3631             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3632
3633         if source == 'source':
3634             try:
3635                 stdout, _, _ = Popen.run(
3636                     ['git', 'rev-parse', '--short', 'HEAD'],
3637                     text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
3638                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3639                 if re.fullmatch('[0-9a-f]+', stdout.strip()):
3640                     write_debug(f'Git HEAD: {stdout.strip()}')
3641             except Exception:
3642                 with contextlib.suppress(Exception):
3643                     sys.exc_clear()
3644
3645         def python_implementation():
3646             impl_name = platform.python_implementation()
3647             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3648                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3649             return impl_name
3650
3651         write_debug('Python version %s (%s %s) - %s' % (
3652             platform.python_version(),
3653             python_implementation(),
3654             platform.architecture()[0],
3655             platform_name()))
3656
3657         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3658         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3659         if ffmpeg_features:
3660             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3661
3662         exe_versions['rtmpdump'] = rtmpdump_version()
3663         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3664         exe_str = ', '.join(
3665             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3666         ) or 'none'
3667         write_debug('exe versions: %s' % exe_str)
3668
3669         from .compat.compat_utils import get_package_info
3670         from .dependencies import available_dependencies
3671
3672         write_debug('Optional libraries: %s' % (', '.join(sorted({
3673             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3674         })) or 'none'))
3675
3676         self._setup_opener()
3677         proxy_map = {}
3678         for handler in self._opener.handlers:
3679             if hasattr(handler, 'proxies'):
3680                 proxy_map.update(handler.proxies)
3681         write_debug(f'Proxy map: {proxy_map}')
3682
3683         # Not implemented
3684         if False and self.params.get('call_home'):
3685             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3686             write_debug('Public IP address: %s' % ipaddr)
3687             latest_version = self.urlopen(
3688                 'https://yt-dl.org/latest/version').read().decode()
3689             if version_tuple(latest_version) > version_tuple(__version__):
3690                 self.report_warning(
3691                     'You are using an outdated version (newest version: %s)! '
3692                     'See https://yt-dl.org/update if you need help updating.' %
3693                     latest_version)
3694
3695     def _setup_opener(self):
3696         if hasattr(self, '_opener'):
3697             return
3698         timeout_val = self.params.get('socket_timeout')
3699         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3700
3701         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3702         opts_cookiefile = self.params.get('cookiefile')
3703         opts_proxy = self.params.get('proxy')
3704
3705         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3706
3707         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3708         if opts_proxy is not None:
3709             if opts_proxy == '':
3710                 proxies = {}
3711             else:
3712                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3713         else:
3714             proxies = compat_urllib_request.getproxies()
3715             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3716             if 'http' in proxies and 'https' not in proxies:
3717                 proxies['https'] = proxies['http']
3718         proxy_handler = PerRequestProxyHandler(proxies)
3719
3720         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3721         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3722         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3723         redirect_handler = YoutubeDLRedirectHandler()
3724         data_handler = urllib.request.DataHandler()
3725
3726         # When passing our own FileHandler instance, build_opener won't add the
3727         # default FileHandler and allows us to disable the file protocol, which
3728         # can be used for malicious purposes (see
3729         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3730         file_handler = compat_urllib_request.FileHandler()
3731
3732         def file_open(*args, **kwargs):
3733             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3734         file_handler.file_open = file_open
3735
3736         opener = compat_urllib_request.build_opener(
3737             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3738
3739         # Delete the default user-agent header, which would otherwise apply in
3740         # cases where our custom HTTP handler doesn't come into play
3741         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3742         opener.addheaders = []
3743         self._opener = opener
3744
3745     def encode(self, s):
3746         if isinstance(s, bytes):
3747             return s  # Already encoded
3748
3749         try:
3750             return s.encode(self.get_encoding())
3751         except UnicodeEncodeError as err:
3752             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3753             raise
3754
3755     def get_encoding(self):
3756         encoding = self.params.get('encoding')
3757         if encoding is None:
3758             encoding = preferredencoding()
3759         return encoding
3760
3761     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3762         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3763         if overwrite is None:
3764             overwrite = self.params.get('overwrites', True)
3765         if not self.params.get('writeinfojson'):
3766             return False
3767         elif not infofn:
3768             self.write_debug(f'Skipping writing {label} infojson')
3769             return False
3770         elif not self._ensure_dir_exists(infofn):
3771             return None
3772         elif not overwrite and os.path.exists(infofn):
3773             self.to_screen(f'[info] {label.title()} metadata is already present')
3774             return 'exists'
3775
3776         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3777         try:
3778             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3779             return True
3780         except OSError:
3781             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3782             return None
3783
3784     def _write_description(self, label, ie_result, descfn):
3785         ''' Write description and returns True = written, False = skip, None = error '''
3786         if not self.params.get('writedescription'):
3787             return False
3788         elif not descfn:
3789             self.write_debug(f'Skipping writing {label} description')
3790             return False
3791         elif not self._ensure_dir_exists(descfn):
3792             return None
3793         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3794             self.to_screen(f'[info] {label.title()} description is already present')
3795         elif ie_result.get('description') is None:
3796             self.report_warning(f'There\'s no {label} description to write')
3797             return False
3798         else:
3799             try:
3800                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3801                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3802                     descfile.write(ie_result['description'])
3803             except OSError:
3804                 self.report_error(f'Cannot write {label} description file {descfn}')
3805                 return None
3806         return True
3807
3808     def _write_subtitles(self, info_dict, filename):
3809         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3810         ret = []
3811         subtitles = info_dict.get('requested_subtitles')
3812         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3813             # subtitles download errors are already managed as troubles in relevant IE
3814             # that way it will silently go on when used with unsupporting IE
3815             return ret
3816
3817         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3818         if not sub_filename_base:
3819             self.to_screen('[info] Skipping writing video subtitles')
3820             return ret
3821         for sub_lang, sub_info in subtitles.items():
3822             sub_format = sub_info['ext']
3823             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3824             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3825             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3826             if existing_sub:
3827                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3828                 sub_info['filepath'] = existing_sub
3829                 ret.append((existing_sub, sub_filename_final))
3830                 continue
3831
3832             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3833             if sub_info.get('data') is not None:
3834                 try:
3835                     # Use newline='' to prevent conversion of newline characters
3836                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3837                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3838                         subfile.write(sub_info['data'])
3839                     sub_info['filepath'] = sub_filename
3840                     ret.append((sub_filename, sub_filename_final))
3841                     continue
3842                 except OSError:
3843                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3844                     return None
3845
3846             try:
3847                 sub_copy = sub_info.copy()
3848                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3849                 self.dl(sub_filename, sub_copy, subtitle=True)
3850                 sub_info['filepath'] = sub_filename
3851                 ret.append((sub_filename, sub_filename_final))
3852             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3853                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3854                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3855                     if not self.params.get('ignoreerrors'):
3856                         self.report_error(msg)
3857                     raise DownloadError(msg)
3858                 self.report_warning(msg)
3859         return ret
3860
3861     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3862         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3863         write_all = self.params.get('write_all_thumbnails', False)
3864         thumbnails, ret = [], []
3865         if write_all or self.params.get('writethumbnail', False):
3866             thumbnails = info_dict.get('thumbnails') or []
3867         multiple = write_all and len(thumbnails) > 1
3868
3869         if thumb_filename_base is None:
3870             thumb_filename_base = filename
3871         if thumbnails and not thumb_filename_base:
3872             self.write_debug(f'Skipping writing {label} thumbnail')
3873             return ret
3874
3875         for idx, t in list(enumerate(thumbnails))[::-1]:
3876             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3877             thumb_display_id = f'{label} thumbnail {t["id"]}'
3878             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3879             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3880
3881             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3882             if existing_thumb:
3883                 self.to_screen('[info] %s is already present' % (
3884                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3885                 t['filepath'] = existing_thumb
3886                 ret.append((existing_thumb, thumb_filename_final))
3887             else:
3888                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3889                 try:
3890                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3891                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3892                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3893                         shutil.copyfileobj(uf, thumbf)
3894                     ret.append((thumb_filename, thumb_filename_final))
3895                     t['filepath'] = thumb_filename
3896                 except network_exceptions as err:
3897                     thumbnails.pop(idx)
3898                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3899             if ret and not write_all:
3900                 break
3901         return ret