yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 import collections
   3 import contextlib
   4 import datetime
   5 import errno
   6 import fileinput
   7 import functools
   8 import io
   9 import itertools
  10 import json
  11 import locale
  12 import operator
  13 import os
  14 import platform
  15 import random
  16 import re
  17 import shutil
  18 import subprocess
  19 import sys
  20 import tempfile
  21 import time
  22 import tokenize
  23 import traceback
  24 import unicodedata
  25 import urllib.request
  26 from string import ascii_letters
  27
  28 from .cache import Cache
  29 from .compat import HAS_LEGACY as compat_has_legacy
  30 from .compat import compat_os_name, compat_shlex_quote, compat_str
  31 from .cookies import load_cookies
  32 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
  33 from .downloader.rtmp import rtmpdump_version
  34 from .extractor import gen_extractor_classes, get_info_extractor
  35 from .extractor.openload import PhantomJSwrapper
  36 from .minicurses import format_text
  37 from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
  38 from .postprocessor import (
  39     EmbedThumbnailPP,
  40     FFmpegFixupDuplicateMoovPP,
  41     FFmpegFixupDurationPP,
  42     FFmpegFixupM3u8PP,
  43     FFmpegFixupM4aPP,
  44     FFmpegFixupStretchedPP,
  45     FFmpegFixupTimestampPP,
  46     FFmpegMergerPP,
  47     FFmpegPostProcessor,
  48     MoveFilesAfterDownloadPP,
  49     get_postprocessor,
  50 )
  51 from .update import detect_variant
  52 from .utils import (
  53     DEFAULT_OUTTMPL,
  54     IDENTITY,
  55     LINK_TEMPLATES,
  56     NO_DEFAULT,
  57     NUMBER_RE,
  58     OUTTMPL_TYPES,
  59     POSTPROCESS_WHEN,
  60     STR_FORMAT_RE_TMPL,
  61     STR_FORMAT_TYPES,
  62     ContentTooShortError,
  63     DateRange,
  64     DownloadCancelled,
  65     DownloadError,
  66     EntryNotInPlaylist,
  67     ExistingVideoReached,
  68     ExtractorError,
  69     GeoRestrictedError,
  70     HEADRequest,
  71     ISO3166Utils,
  72     LazyList,
  73     MaxDownloadsReached,
  74     Namespace,
  75     PagedList,
  76     PerRequestProxyHandler,
  77     PlaylistEntries,
  78     Popen,
  79     PostProcessingError,
  80     ReExtractInfo,
  81     RejectedVideoReached,
  82     SameFileError,
  83     UnavailableVideoError,
  84     YoutubeDLCookieProcessor,
  85     YoutubeDLHandler,
  86     YoutubeDLRedirectHandler,
  87     age_restricted,
  88     args_to_str,
  89     date_from_str,
  90     determine_ext,
  91     determine_protocol,
  92     encode_compat_str,
  93     encodeFilename,
  94     error_to_compat_str,
  95     expand_path,
  96     filter_dict,
  97     float_or_none,
  98     format_bytes,
  99     format_decimal_suffix,
 100     format_field,
 101     formatSeconds,
 102     get_domain,
 103     int_or_none,
 104     iri_to_uri,
 105     join_nonempty,
 106     locked_file,
 107     make_dir,
 108     make_HTTPS_handler,
 109     merge_headers,
 110     network_exceptions,
 111     number_of_digits,
 112     orderedSet,
 113     parse_filesize,
 114     platform_name,
 115     preferredencoding,
 116     prepend_extension,
 117     register_socks_protocols,
 118     remove_terminal_sequences,
 119     render_table,
 120     replace_extension,
 121     sanitize_filename,
 122     sanitize_path,
 123     sanitize_url,
 124     sanitized_Request,
 125     std_headers,
 126     str_or_none,
 127     strftime_or_none,
 128     subtitles_filename,
 129     supports_terminal_sequences,
 130     timetuple_from_msec,
 131     to_high_limit_path,
 132     traverse_obj,
 133     try_get,
 134     url_basename,
 135     variadic,
 136     version_tuple,
 137     windows_enable_vt_mode,
 138     write_json_file,
 139     write_string,
 140 )
 141 from .version import RELEASE_GIT_HEAD, __version__
 142
 143 if compat_os_name == 'nt':
 144     import ctypes
 145
 146
 147 class YoutubeDL:
 148     """YoutubeDL class.
 149
 150     YoutubeDL objects are the ones responsible of downloading the
 151     actual video file and writing it to disk if the user has requested
 152     it, among some other tasks. In most cases there should be one per
 153     program. As, given a video URL, the downloader doesn't know how to
 154     extract all the needed information, task that InfoExtractors do, it
 155     has to pass the URL to one of them.
 156
 157     For this, YoutubeDL objects have a method that allows
 158     InfoExtractors to be registered in a given order. When it is passed
 159     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 160     finds that reports being able to handle it. The InfoExtractor extracts
 161     all the information about the video or videos the URL refers to, and
 162     YoutubeDL process the extracted information, possibly using a File
 163     Downloader to download the video.
 164
 165     YoutubeDL objects accept a lot of parameters. In order not to saturate
 166     the object constructor with arguments, it receives a dictionary of
 167     options instead. These options are available through the params
 168     attribute for the InfoExtractors to use. The YoutubeDL also
 169     registers itself as the downloader in charge for the InfoExtractors
 170     that are added to it, so this is a "mutual registration".
 171
 172     Available options:
 173
 174     username:          Username for authentication purposes.
 175     password:          Password for authentication purposes.
 176     videopassword:     Password for accessing a video.
 177     ap_mso:            Adobe Pass multiple-system operator identifier.
 178     ap_username:       Multiple-system operator account username.
 179     ap_password:       Multiple-system operator account password.
 180     usenetrc:          Use netrc for authentication instead.
 181     verbose:           Print additional info to stdout.
 182     quiet:             Do not print messages to stdout.
 183     no_warnings:       Do not print out anything for warnings.
 184     forceprint:        A dict with keys WHEN mapped to a list of templates to
 185                        print to stdout. The allowed keys are video or any of the
 186                        items in utils.POSTPROCESS_WHEN.
 187                        For compatibility, a single list is also accepted
 188     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 189                        a list of tuples with (template, filename)
 190     forcejson:         Force printing info_dict as JSON.
 191     dump_single_json:  Force printing the info_dict of the whole playlist
 192                        (or video) as a single JSON line.
 193     force_write_download_archive: Force writing download archive regardless
 194                        of 'skip_download' or 'simulate'.
 195     simulate:          Do not download the video files. If unset (or None),
 196                        simulate only if listsubtitles, listformats or list_thumbnails is used
 197     format:            Video format code. see "FORMAT SELECTION" for more details.
 198                        You can also pass a function. The function takes 'ctx' as
 199                        argument and returns the formats to download.
 200                        See "build_format_selector" for an implementation
 201     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 202     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 203                        extracting metadata even if the video is not actually
 204                        available for download (experimental)
 205     format_sort:       A list of fields by which to sort the video formats.
 206                        See "Sorting Formats" for more details.
 207     format_sort_force: Force the given format_sort. see "Sorting Formats"
 208                        for more details.
 209     prefer_free_formats: Whether to prefer video formats with free containers
 210                        over non-free ones of same quality.
 211     allow_multiple_video_streams:   Allow multiple video streams to be merged
 212                        into a single file
 213     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 214                        into a single file
 215     check_formats      Whether to test if the formats are downloadable.
 216                        Can be True (check all), False (check none),
 217                        'selected' (check selected formats),
 218                        or None (check only if requested by extractor)
 219     paths:             Dictionary of output paths. The allowed keys are 'home'
 220                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 221     outtmpl:           Dictionary of templates for output names. Allowed keys
 222                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 223                        For compatibility with youtube-dl, a single string can also be used
 224     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 225     restrictfilenames: Do not allow "&" and spaces in file names
 226     trim_file_name:    Limit length of filename (extension excluded)
 227     windowsfilenames:  Force the filenames to be windows compatible
 228     ignoreerrors:      Do not stop on download/postprocessing errors.
 229                        Can be 'only_download' to ignore only download errors.
 230                        Default is 'only_download' for CLI, but False for API
 231     skip_playlist_after_errors: Number of allowed failures until the rest of
 232                        the playlist is skipped
 233     force_generic_extractor: Force downloader to use the generic extractor
 234     overwrites:        Overwrite all video and metadata files if True,
 235                        overwrite only non-video files if None
 236                        and don't overwrite any file if False
 237                        For compatibility with youtube-dl,
 238                        "nooverwrites" may also be used instead
 239     playlist_items:    Specific indices of playlist to download.
 240     playlistrandom:    Download playlist items in random order.
 241     lazy_playlist:     Process playlist entries as they are received.
 242     matchtitle:        Download only matching titles.
 243     rejecttitle:       Reject downloads for matching titles.
 244     logger:            Log messages to a logging.Logger instance.
 245     logtostderr:       Log messages to stderr instead of stdout.
 246     consoletitle:       Display progress in console window's titlebar.
 247     writedescription:  Write the video description to a .description file
 248     writeinfojson:     Write the video description to a .info.json file
 249     clean_infojson:    Remove private fields from the infojson
 250     getcomments:       Extract video comments. This will not be written to disk
 251                        unless writeinfojson is also given
 252     writeannotations:  Write the video annotations to a .annotations.xml file
 253     writethumbnail:    Write the thumbnail image to a file
 254     allow_playlist_files: Whether to write playlists' description, infojson etc
 255                        also to disk when using the 'write*' options
 256     write_all_thumbnails:  Write all thumbnail formats to files
 257     writelink:         Write an internet shortcut file, depending on the
 258                        current platform (.url/.webloc/.desktop)
 259     writeurllink:      Write a Windows internet shortcut file (.url)
 260     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 261     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 262     writesubtitles:    Write the video subtitles to a file
 263     writeautomaticsub: Write the automatically generated subtitles to a file
 264     listsubtitles:     Lists all available subtitles for the video
 265     subtitlesformat:   The format code for subtitles
 266     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 267                        The list may contain "all" to refer to all the available
 268                        subtitles. The language can be prefixed with a "-" to
 269                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 270     keepvideo:         Keep the video file after post-processing
 271     daterange:         A DateRange object, download only if the upload_date is in the range.
 272     skip_download:     Skip the actual download of the video file
 273     cachedir:          Location of the cache files in the filesystem.
 274                        False to disable filesystem cache.
 275     noplaylist:        Download single video instead of a playlist if in doubt.
 276     age_limit:         An integer representing the user's age in years.
 277                        Unsuitable videos for the given age are skipped.
 278     min_views:         An integer representing the minimum view count the video
 279                        must have in order to not be skipped.
 280                        Videos without view count information are always
 281                        downloaded. None for no limit.
 282     max_views:         An integer representing the maximum view count.
 283                        Videos that are more popular than that are not
 284                        downloaded.
 285                        Videos without view count information are always
 286                        downloaded. None for no limit.
 287     download_archive:  File name of a file where all downloads are recorded.
 288                        Videos already present in the file are not downloaded
 289                        again.
 290     break_on_existing: Stop the download process after attempting to download a
 291                        file that is in the archive.
 292     break_on_reject:   Stop the download process when encountering a video that
 293                        has been filtered out.
 294     break_per_url:     Whether break_on_reject and break_on_existing
 295                        should act on each input URL as opposed to for the entire queue
 296     cookiefile:        File name or text stream from where cookies should be read and dumped to
 297     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 298                        name/pathfrom where cookies are loaded, and the name of the
 299                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 300     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 301                        support RFC 5746 secure renegotiation
 302     nocheckcertificate:  Do not verify SSL certificates
 303     client_certificate:  Path to client certificate file in PEM format. May include the private key
 304     client_certificate_key:  Path to private key file for client certificate
 305     client_certificate_password:  Password for client certificate private key, if encrypted.
 306                         If not provided and the key is encrypted, yt-dlp will ask interactively
 307     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 308                        At the moment, this is only supported by YouTube.
 309     http_headers:      A dictionary of custom headers to be used for all requests
 310     proxy:             URL of the proxy server to use
 311     geo_verification_proxy:  URL of the proxy to use for IP address verification
 312                        on geo-restricted sites.
 313     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 314     bidi_workaround:   Work around buggy terminals without bidirectional text
 315                        support, using fridibi
 316     debug_printtraffic:Print out sent and received HTTP traffic
 317     default_search:    Prepend this string if an input url is not valid.
 318                        'auto' for elaborate guessing
 319     encoding:          Use this encoding instead of the system-specified.
 320     extract_flat:      Do not resolve URLs, return the immediate result.
 321                        Pass in 'in_playlist' to only show this behavior for
 322                        playlist items.
 323     wait_for_video:    If given, wait for scheduled streams to become available.
 324                        The value should be a tuple containing the range
 325                        (min_secs, max_secs) to wait between retries
 326     postprocessors:    A list of dictionaries, each with an entry
 327                        * key:  The name of the postprocessor. See
 328                                yt_dlp/postprocessor/__init__.py for a list.
 329                        * when: When to run the postprocessor. Allowed values are
 330                                the entries of utils.POSTPROCESS_WHEN
 331                                Assumed to be 'post_process' if not given
 332     progress_hooks:    A list of functions that get called on download
 333                        progress, with a dictionary with the entries
 334                        * status: One of "downloading", "error", or "finished".
 335                                  Check this first and ignore unknown values.
 336                        * info_dict: The extracted info_dict
 337
 338                        If status is one of "downloading", or "finished", the
 339                        following properties may also be present:
 340                        * filename: The final filename (always present)
 341                        * tmpfilename: The filename we're currently writing to
 342                        * downloaded_bytes: Bytes on disk
 343                        * total_bytes: Size of the whole file, None if unknown
 344                        * total_bytes_estimate: Guess of the eventual file size,
 345                                                None if unavailable.
 346                        * elapsed: The number of seconds since download started.
 347                        * eta: The estimated time in seconds, None if unknown
 348                        * speed: The download speed in bytes/second, None if
 349                                 unknown
 350                        * fragment_index: The counter of the currently
 351                                          downloaded video fragment.
 352                        * fragment_count: The number of fragments (= individual
 353                                          files that will be merged)
 354
 355                        Progress hooks are guaranteed to be called at least once
 356                        (with status "finished") if the download is successful.
 357     postprocessor_hooks:  A list of functions that get called on postprocessing
 358                        progress, with a dictionary with the entries
 359                        * status: One of "started", "processing", or "finished".
 360                                  Check this first and ignore unknown values.
 361                        * postprocessor: Name of the postprocessor
 362                        * info_dict: The extracted info_dict
 363
 364                        Progress hooks are guaranteed to be called at least twice
 365                        (with status "started" and "finished") if the processing is successful.
 366     merge_output_format: Extension to use when merging formats.
 367     final_ext:         Expected final extension; used to detect when the file was
 368                        already downloaded and converted
 369     fixup:             Automatically correct known faults of the file.
 370                        One of:
 371                        - "never": do nothing
 372                        - "warn": only emit a warning
 373                        - "detect_or_warn": check whether we can do anything
 374                                            about it, warn otherwise (default)
 375     source_address:    Client-side IP address to bind to.
 376     sleep_interval_requests: Number of seconds to sleep between requests
 377                        during extraction
 378     sleep_interval:    Number of seconds to sleep before each download when
 379                        used alone or a lower bound of a range for randomized
 380                        sleep before each download (minimum possible number
 381                        of seconds to sleep) when used along with
 382                        max_sleep_interval.
 383     max_sleep_interval:Upper bound of a range for randomized sleep before each
 384                        download (maximum possible number of seconds to sleep).
 385                        Must only be used along with sleep_interval.
 386                        Actual sleep time will be a random float from range
 387                        [sleep_interval; max_sleep_interval].
 388     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 389     listformats:       Print an overview of available video formats and exit.
 390     list_thumbnails:   Print a table of all thumbnails and exit.
 391     match_filter:      A function that gets called for every video with the signature
 392                        (info_dict, *, incomplete: bool) -> Optional[str]
 393                        For backward compatibility with youtube-dl, the signature
 394                        (info_dict) -> Optional[str] is also allowed.
 395                        - If it returns a message, the video is ignored.
 396                        - If it returns None, the video is downloaded.
 397                        - If it returns utils.NO_DEFAULT, the user is interactively
 398                          asked whether to download the video.
 399                        match_filter_func in utils.py is one example for this.
 400     no_color:          Do not emit color codes in output.
 401     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 402                        HTTP header
 403     geo_bypass_country:
 404                        Two-letter ISO 3166-2 country code that will be used for
 405                        explicit geographic restriction bypassing via faking
 406                        X-Forwarded-For HTTP header
 407     geo_bypass_ip_block:
 408                        IP range in CIDR notation that will be used similarly to
 409                        geo_bypass_country
 410     external_downloader: A dictionary of protocol keys and the executable of the
 411                        external downloader to use for it. The allowed protocols
 412                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 413                        Set the value to 'native' to use the native downloader
 414     compat_opts:       Compatibility options. See "Differences in default behavior".
 415                        The following options do not work when used through the API:
 416                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 417                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 418                        Refer __init__.py for their implementation
 419     progress_template: Dictionary of templates for progress outputs.
 420                        Allowed keys are 'download', 'postprocess',
 421                        'download-title' (console title) and 'postprocess-title'.
 422                        The template is mapped on a dictionary with keys 'progress' and 'info'
 423     retry_sleep_functions: Dictionary of functions that takes the number of attempts
 424                        as argument and returns the time to sleep in seconds.
 425                        Allowed keys are 'http', 'fragment', 'file_access'
 426     download_ranges:   A function that gets called for every video with the signature
 427                        (info_dict, *, ydl) -> Iterable[Section].
 428                        Only the returned sections will be downloaded. Each Section contains:
 429                        * start_time: Start time of the section in seconds
 430                        * end_time: End time of the section in seconds
 431                        * title: Section title (Optional)
 432                        * index: Section number (Optional)
 433
 434     The following parameters are not used by YoutubeDL itself, they are used by
 435     the downloader (see yt_dlp/downloader/common.py):
 436     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 437     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 438     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 439     external_downloader_args, concurrent_fragment_downloads.
 440
 441     The following options are used by the post processors:
 442     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 443                        to the binary or its containing directory.
 444     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 445                        and a list of additional command-line arguments for the
 446                        postprocessor/executable. The dict can also have "PP+EXE" keys
 447                        which are used when the given exe is used by the given PP.
 448                        Use 'default' as the name for arguments to passed to all PP
 449                        For compatibility with youtube-dl, a single list of args
 450                        can also be used
 451
 452     The following options are used by the extractors:
 453     extractor_retries: Number of times to retry for known errors
 454     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 455     hls_split_discontinuity: Split HLS playlists to different formats at
 456                        discontinuities such as ad breaks (default: False)
 457     extractor_args:    A dictionary of arguments to be passed to the extractors.
 458                        See "EXTRACTOR ARGUMENTS" for details.
 459                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 460     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 461
 462     The following options are deprecated and may be removed in the future:
 463
 464     playliststart:     - Use playlist_items
 465                        Playlist item to start at.
 466     playlistend:       - Use playlist_items
 467                        Playlist item to end at.
 468     playlistreverse:   - Use playlist_items
 469                        Download playlist items in reverse order.
 470     forceurl:          - Use forceprint
 471                        Force printing final URL.
 472     forcetitle:        - Use forceprint
 473                        Force printing title.
 474     forceid:           - Use forceprint
 475                        Force printing ID.
 476     forcethumbnail:    - Use forceprint
 477                        Force printing thumbnail URL.
 478     forcedescription:  - Use forceprint
 479                        Force printing description.
 480     forcefilename:     - Use forceprint
 481                        Force printing final filename.
 482     forceduration:     - Use forceprint
 483                        Force printing duration.
 484     allsubtitles:      - Use subtitleslangs = ['all']
 485                        Downloads all the subtitles of the video
 486                        (requires writesubtitles or writeautomaticsub)
 487     include_ads:       - Doesn't work
 488                        Download ads as well
 489     call_home:         - Not implemented
 490                        Boolean, true iff we are allowed to contact the
 491                        yt-dlp servers for debugging.
 492     post_hooks:        - Register a custom postprocessor
 493                        A list of functions that get called as the final step
 494                        for each video file, after all postprocessors have been
 495                        called. The filename will be passed as the only argument.
 496     hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
 497                        Use the native HLS downloader instead of ffmpeg/avconv
 498                        if True, otherwise use ffmpeg/avconv if False, otherwise
 499                        use downloader suggested by extractor if None.
 500     prefer_ffmpeg:     - avconv support is deprecated
 501                        If False, use avconv instead of ffmpeg if both are available,
 502                        otherwise prefer ffmpeg.
 503     youtube_include_dash_manifest: - Use extractor_args
 504                        If True (default), DASH manifests and related
 505                        data will be downloaded and processed by extractor.
 506                        You can reduce network I/O by disabling it if you don't
 507                        care about DASH. (only for youtube)
 508     youtube_include_hls_manifest: - Use extractor_args
 509                        If True (default), HLS manifests and related
 510                        data will be downloaded and processed by extractor.
 511                        You can reduce network I/O by disabling it if you don't
 512                        care about HLS. (only for youtube)
 513     """
 514
 515     _NUMERIC_FIELDS = {
 516         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 517         'timestamp', 'release_timestamp',
 518         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 519         'average_rating', 'comment_count', 'age_limit',
 520         'start_time', 'end_time',
 521         'chapter_number', 'season_number', 'episode_number',
 522         'track_number', 'disc_number', 'release_year',
 523     }
 524
 525     _format_fields = {
 526         # NB: Keep in sync with the docstring of extractor/common.py
 527         'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
 528         'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
 529         'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
 530         'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
 531         'preference', 'language', 'language_preference', 'quality', 'source_preference',
 532         'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
 533         'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
 534     }
 535     _format_selection_exts = {
 536         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 537         'video': {'mp4', 'flv', 'webm', '3gp'},
 538         'storyboards': {'mhtml'},
 539     }
 540
 541     def __init__(self, params=None, auto_init=True):
 542         """Create a FileDownloader object with the given options.
 543         @param auto_init    Whether to load the default extractors and print header (if verbose).
 544                             Set to 'no_verbose_header' to not print the header
 545         """
 546         if params is None:
 547             params = {}
 548         self.params = params
 549         self._ies = {}
 550         self._ies_instances = {}
 551         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 552         self._printed_messages = set()
 553         self._first_webpage_request = True
 554         self._post_hooks = []
 555         self._progress_hooks = []
 556         self._postprocessor_hooks = []
 557         self._download_retcode = 0
 558         self._num_downloads = 0
 559         self._num_videos = 0
 560         self._playlist_level = 0
 561         self._playlist_urls = set()
 562         self.cache = Cache(self)
 563
 564         windows_enable_vt_mode()
 565         stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
 566         self._out_files = Namespace(
 567             out=stdout,
 568             error=sys.stderr,
 569             screen=sys.stderr if self.params.get('quiet') else stdout,
 570             console=None if compat_os_name == 'nt' else next(
 571                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 572         )
 573         self._allow_colors = Namespace(**{
 574             type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
 575             for type_, stream in self._out_files.items_ if type_ != 'console'
 576         })
 577
 578         MIN_SUPPORTED, MIN_RECOMMENDED = (3, 6), (3, 7)
 579         current_version = sys.version_info[:2]
 580         if current_version < MIN_RECOMMENDED:
 581             msg = 'Support for Python version %d.%d has been deprecated and will break in future versions of yt-dlp'
 582             if current_version < MIN_SUPPORTED:
 583                 msg = 'Python version %d.%d is no longer supported'
 584             self.deprecation_warning(
 585                 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
 586
 587         if self.params.get('allow_unplayable_formats'):
 588             self.report_warning(
 589                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 590                 'This is a developer option intended for debugging. \n'
 591                 '         If you experience any issues while using this option, '
 592                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 593
 594         def check_deprecated(param, option, suggestion):
 595             if self.params.get(param) is not None:
 596                 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
 597                 return True
 598             return False
 599
 600         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 601             if self.params.get('geo_verification_proxy') is None:
 602                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 603
 604         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 605         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 606         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 607
 608         for msg in self.params.get('_warnings', []):
 609             self.report_warning(msg)
 610         for msg in self.params.get('_deprecation_warnings', []):
 611             self.deprecation_warning(msg)
 612
 613         self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
 614         if not compat_has_legacy:
 615             self.params['compat_opts'].add('no-compat-legacy')
 616         if 'list-formats' in self.params['compat_opts']:
 617             self.params['listformats_table'] = False
 618
 619         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 620             # nooverwrites was unnecessarily changed to overwrites
 621             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 622             # This ensures compatibility with both keys
 623             self.params['overwrites'] = not self.params['nooverwrites']
 624         elif self.params.get('overwrites') is None:
 625             self.params.pop('overwrites', None)
 626         else:
 627             self.params['nooverwrites'] = not self.params['overwrites']
 628
 629         self.params.setdefault('forceprint', {})
 630         self.params.setdefault('print_to_file', {})
 631
 632         # Compatibility with older syntax
 633         if not isinstance(params['forceprint'], dict):
 634             self.params['forceprint'] = {'video': params['forceprint']}
 635
 636         if self.params.get('bidi_workaround', False):
 637             try:
 638                 import pty
 639                 master, slave = pty.openpty()
 640                 width = shutil.get_terminal_size().columns
 641                 width_args = [] if width is None else ['-w', str(width)]
 642                 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
 643                 try:
 644                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 645                 except OSError:
 646                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 647                 self._output_channel = os.fdopen(master, 'rb')
 648             except OSError as ose:
 649                 if ose.errno == errno.ENOENT:
 650                     self.report_warning(
 651                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 652                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 653                 else:
 654                     raise
 655
 656         if auto_init:
 657             if auto_init != 'no_verbose_header':
 658                 self.print_debug_header()
 659             self.add_default_info_extractors()
 660
 661         if (sys.platform != 'win32'
 662                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 663                 and not self.params.get('restrictfilenames', False)):
 664             # Unicode filesystem API will throw errors (#1474, #13027)
 665             self.report_warning(
 666                 'Assuming --restrict-filenames since file system encoding '
 667                 'cannot encode all characters. '
 668                 'Set the LC_ALL environment variable to fix this.')
 669             self.params['restrictfilenames'] = True
 670
 671         self._parse_outtmpl()
 672
 673         # Creating format selector here allows us to catch syntax errors before the extraction
 674         self.format_selector = (
 675             self.params.get('format') if self.params.get('format') in (None, '-')
 676             else self.params['format'] if callable(self.params['format'])
 677             else self.build_format_selector(self.params['format']))
 678
 679         # Set http_headers defaults according to std_headers
 680         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 681
 682         hooks = {
 683             'post_hooks': self.add_post_hook,
 684             'progress_hooks': self.add_progress_hook,
 685             'postprocessor_hooks': self.add_postprocessor_hook,
 686         }
 687         for opt, fn in hooks.items():
 688             for ph in self.params.get(opt, []):
 689                 fn(ph)
 690
 691         for pp_def_raw in self.params.get('postprocessors', []):
 692             pp_def = dict(pp_def_raw)
 693             when = pp_def.pop('when', 'post_process')
 694             self.add_post_processor(
 695                 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
 696                 when=when)
 697
 698         self._setup_opener()
 699         register_socks_protocols()
 700
 701         def preload_download_archive(fn):
 702             """Preload the archive, if any is specified"""
 703             if fn is None:
 704                 return False
 705             self.write_debug(f'Loading archive file {fn!r}')
 706             try:
 707                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 708                     for line in archive_file:
 709                         self.archive.add(line.strip())
 710             except OSError as ioe:
 711                 if ioe.errno != errno.ENOENT:
 712                     raise
 713                 return False
 714             return True
 715
 716         self.archive = set()
 717         preload_download_archive(self.params.get('download_archive'))
 718
 719     def warn_if_short_id(self, argv):
 720         # short YouTube ID starting with dash?
 721         idxs = [
 722             i for i, a in enumerate(argv)
 723             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 724         if idxs:
 725             correct_argv = (
 726                 ['yt-dlp']
 727                 + [a for i, a in enumerate(argv) if i not in idxs]
 728                 + ['--'] + [argv[i] for i in idxs]
 729             )
 730             self.report_warning(
 731                 'Long argument string detected. '
 732                 'Use -- to separate parameters and URLs, like this:\n%s' %
 733                 args_to_str(correct_argv))
 734
 735     def add_info_extractor(self, ie):
 736         """Add an InfoExtractor object to the end of the list."""
 737         ie_key = ie.ie_key()
 738         self._ies[ie_key] = ie
 739         if not isinstance(ie, type):
 740             self._ies_instances[ie_key] = ie
 741             ie.set_downloader(self)
 742
 743     def _get_info_extractor_class(self, ie_key):
 744         ie = self._ies.get(ie_key)
 745         if ie is None:
 746             ie = get_info_extractor(ie_key)
 747             self.add_info_extractor(ie)
 748         return ie
 749
 750     def get_info_extractor(self, ie_key):
 751         """
 752         Get an instance of an IE with name ie_key, it will try to get one from
 753         the _ies list, if there's no instance it will create a new one and add
 754         it to the extractor list.
 755         """
 756         ie = self._ies_instances.get(ie_key)
 757         if ie is None:
 758             ie = get_info_extractor(ie_key)()
 759             self.add_info_extractor(ie)
 760         return ie
 761
 762     def add_default_info_extractors(self):
 763         """
 764         Add the InfoExtractors returned by gen_extractors to the end of the list
 765         """
 766         for ie in gen_extractor_classes():
 767             self.add_info_extractor(ie)
 768
 769     def add_post_processor(self, pp, when='post_process'):
 770         """Add a PostProcessor object to the end of the chain."""
 771         assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
 772         self._pps[when].append(pp)
 773         pp.set_downloader(self)
 774
 775     def add_post_hook(self, ph):
 776         """Add the post hook"""
 777         self._post_hooks.append(ph)
 778
 779     def add_progress_hook(self, ph):
 780         """Add the download progress hook"""
 781         self._progress_hooks.append(ph)
 782
 783     def add_postprocessor_hook(self, ph):
 784         """Add the postprocessing progress hook"""
 785         self._postprocessor_hooks.append(ph)
 786         for pps in self._pps.values():
 787             for pp in pps:
 788                 pp.add_progress_hook(ph)
 789
 790     def _bidi_workaround(self, message):
 791         if not hasattr(self, '_output_channel'):
 792             return message
 793
 794         assert hasattr(self, '_output_process')
 795         assert isinstance(message, compat_str)
 796         line_count = message.count('\n') + 1
 797         self._output_process.stdin.write((message + '\n').encode())
 798         self._output_process.stdin.flush()
 799         res = ''.join(self._output_channel.readline().decode()
 800                       for _ in range(line_count))
 801         return res[:-len('\n')]
 802
 803     def _write_string(self, message, out=None, only_once=False):
 804         if only_once:
 805             if message in self._printed_messages:
 806                 return
 807             self._printed_messages.add(message)
 808         write_string(message, out=out, encoding=self.params.get('encoding'))
 809
 810     def to_stdout(self, message, skip_eol=False, quiet=None):
 811         """Print message to stdout"""
 812         if quiet is not None:
 813             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
 814         if skip_eol is not False:
 815             self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
 816         self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
 817
 818     def to_screen(self, message, skip_eol=False, quiet=None):
 819         """Print message to screen if not in quiet mode"""
 820         if self.params.get('logger'):
 821             self.params['logger'].debug(message)
 822             return
 823         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 824             return
 825         self._write_string(
 826             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 827             self._out_files.screen)
 828
 829     def to_stderr(self, message, only_once=False):
 830         """Print message to stderr"""
 831         assert isinstance(message, compat_str)
 832         if self.params.get('logger'):
 833             self.params['logger'].error(message)
 834         else:
 835             self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
 836
 837     def _send_console_code(self, code):
 838         if compat_os_name == 'nt' or not self._out_files.console:
 839             return
 840         self._write_string(code, self._out_files.console)
 841
 842     def to_console_title(self, message):
 843         if not self.params.get('consoletitle', False):
 844             return
 845         message = remove_terminal_sequences(message)
 846         if compat_os_name == 'nt':
 847             if ctypes.windll.kernel32.GetConsoleWindow():
 848                 # c_wchar_p() might not be necessary if `message` is
 849                 # already of type unicode()
 850                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 851         else:
 852             self._send_console_code(f'\033]0;{message}\007')
 853
 854     def save_console_title(self):
 855         if not self.params.get('consoletitle') or self.params.get('simulate'):
 856             return
 857         self._send_console_code('\033[22;0t')  # Save the title on stack
 858
 859     def restore_console_title(self):
 860         if not self.params.get('consoletitle') or self.params.get('simulate'):
 861             return
 862         self._send_console_code('\033[23;0t')  # Restore the title from stack
 863
 864     def __enter__(self):
 865         self.save_console_title()
 866         return self
 867
 868     def __exit__(self, *args):
 869         self.restore_console_title()
 870
 871         if self.params.get('cookiefile') is not None:
 872             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 873
 874     def trouble(self, message=None, tb=None, is_error=True):
 875         """Determine action to take when a download problem appears.
 876
 877         Depending on if the downloader has been configured to ignore
 878         download errors or not, this method may throw an exception or
 879         not when errors are found, after printing the message.
 880
 881         @param tb          If given, is additional traceback information
 882         @param is_error    Whether to raise error according to ignorerrors
 883         """
 884         if message is not None:
 885             self.to_stderr(message)
 886         if self.params.get('verbose'):
 887             if tb is None:
 888                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 889                     tb = ''
 890                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 891                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 892                     tb += encode_compat_str(traceback.format_exc())
 893                 else:
 894                     tb_data = traceback.format_list(traceback.extract_stack())
 895                     tb = ''.join(tb_data)
 896             if tb:
 897                 self.to_stderr(tb)
 898         if not is_error:
 899             return
 900         if not self.params.get('ignoreerrors'):
 901             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 902                 exc_info = sys.exc_info()[1].exc_info
 903             else:
 904                 exc_info = sys.exc_info()
 905             raise DownloadError(message, exc_info)
 906         self._download_retcode = 1
 907
 908     Styles = Namespace(
 909         HEADERS='yellow',
 910         EMPHASIS='light blue',
 911         FILENAME='green',
 912         ID='green',
 913         DELIM='blue',
 914         ERROR='red',
 915         WARNING='yellow',
 916         SUPPRESS='light black',
 917     )
 918
 919     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 920         text = str(text)
 921         if test_encoding:
 922             original_text = text
 923             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 924             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 925             text = text.encode(encoding, 'ignore').decode(encoding)
 926             if fallback is not None and text != original_text:
 927                 text = fallback
 928         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 929
 930     def _format_out(self, *args, **kwargs):
 931         return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
 932
 933     def _format_screen(self, *args, **kwargs):
 934         return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
 935
 936     def _format_err(self, *args, **kwargs):
 937         return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
 938
 939     def report_warning(self, message, only_once=False):
 940         '''
 941         Print the message to stderr, it will be prefixed with 'WARNING:'
 942         If stderr is a tty file the 'WARNING:' will be colored
 943         '''
 944         if self.params.get('logger') is not None:
 945             self.params['logger'].warning(message)
 946         else:
 947             if self.params.get('no_warnings'):
 948                 return
 949             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 950
 951     def deprecation_warning(self, message):
 952         if self.params.get('logger') is not None:
 953             self.params['logger'].warning(f'DeprecationWarning: {message}')
 954         else:
 955             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 956
 957     def report_error(self, message, *args, **kwargs):
 958         '''
 959         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 960         in red if stderr is a tty file.
 961         '''
 962         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 963
 964     def write_debug(self, message, only_once=False):
 965         '''Log debug message or Print message to stderr'''
 966         if not self.params.get('verbose', False):
 967             return
 968         message = f'[debug] {message}'
 969         if self.params.get('logger'):
 970             self.params['logger'].debug(message)
 971         else:
 972             self.to_stderr(message, only_once)
 973
 974     def report_file_already_downloaded(self, file_name):
 975         """Report file has already been fully downloaded."""
 976         try:
 977             self.to_screen('[download] %s has already been downloaded' % file_name)
 978         except UnicodeEncodeError:
 979             self.to_screen('[download] The file has already been downloaded')
 980
 981     def report_file_delete(self, file_name):
 982         """Report that existing file will be deleted."""
 983         try:
 984             self.to_screen('Deleting existing file %s' % file_name)
 985         except UnicodeEncodeError:
 986             self.to_screen('Deleting existing file')
 987
 988     def raise_no_formats(self, info, forced=False, *, msg=None):
 989         has_drm = info.get('_has_drm')
 990         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
 991         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
 992         if forced or not ignored:
 993             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 994                                  expected=has_drm or ignored or expected)
 995         else:
 996             self.report_warning(msg)
 997
 998     def parse_outtmpl(self):
 999         self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1000         self._parse_outtmpl()
1001         return self.params['outtmpl']
1002
1003     def _parse_outtmpl(self):
1004         sanitize = IDENTITY
1005         if self.params.get('restrictfilenames'):  # Remove spaces in the default template
1006             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1007
1008         outtmpl = self.params.setdefault('outtmpl', {})
1009         if not isinstance(outtmpl, dict):
1010             self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1011         outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1012
1013     def get_output_path(self, dir_type='', filename=None):
1014         paths = self.params.get('paths', {})
1015         assert isinstance(paths, dict)
1016         path = os.path.join(
1017             expand_path(paths.get('home', '').strip()),
1018             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1019             filename or '')
1020         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1021
1022     @staticmethod
1023     def _outtmpl_expandpath(outtmpl):
1024         # expand_path translates '%%' into '%' and '$$' into '$'
1025         # correspondingly that is not what we want since we need to keep
1026         # '%%' intact for template dict substitution step. Working around
1027         # with boundary-alike separator hack.
1028         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1029         outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1030
1031         # outtmpl should be expand_path'ed before template dict substitution
1032         # because meta fields may contain env variables we don't want to
1033         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1034         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1035         return expand_path(outtmpl).replace(sep, '')
1036
1037     @staticmethod
1038     def escape_outtmpl(outtmpl):
1039         ''' Escape any remaining strings like %s, %abc% etc. '''
1040         return re.sub(
1041             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1042             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1043             outtmpl)
1044
1045     @classmethod
1046     def validate_outtmpl(cls, outtmpl):
1047         ''' @return None or Exception object '''
1048         outtmpl = re.sub(
1049             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1050             lambda mobj: f'{mobj.group(0)[:-1]}s',
1051             cls._outtmpl_expandpath(outtmpl))
1052         try:
1053             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1054             return None
1055         except ValueError as err:
1056             return err
1057
1058     @staticmethod
1059     def _copy_infodict(info_dict):
1060         info_dict = dict(info_dict)
1061         info_dict.pop('__postprocessors', None)
1062         info_dict.pop('__pending_error', None)
1063         return info_dict
1064
1065     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1066         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1067         @param sanitize    Whether to sanitize the output as a filename.
1068                            For backward compatibility, a function can also be passed
1069         """
1070
1071         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1072
1073         info_dict = self._copy_infodict(info_dict)
1074         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1075             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1076             if info_dict.get('duration', None) is not None
1077             else None)
1078         info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1079         info_dict['video_autonumber'] = self._num_videos
1080         if info_dict.get('resolution') is None:
1081             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1082
1083         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1084         # of %(field)s to %(field)0Nd for backward compatibility
1085         field_size_compat_map = {
1086             'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1087             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1088             'autonumber': self.params.get('autonumber_size') or 5,
1089         }
1090
1091         TMPL_DICT = {}
1092         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1093         MATH_FUNCTIONS = {
1094             '+': float.__add__,
1095             '-': float.__sub__,
1096         }
1097         # Field is of the form key1.key2...
1098         # where keys (except first) can be string, int or slice
1099         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1100         MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1101         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1102         INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
1103             (?P<negate>-)?
1104             (?P<fields>{FIELD_RE})
1105             (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1106             (?:>(?P<strf_format>.+?))?
1107             (?P<remaining>
1108                 (?P<alternate>(?<!\\),[^|&)]+)?
1109                 (?:&(?P<replacement>.*?))?
1110                 (?:\|(?P<default>.*?))?
1111             )$''')
1112
1113         def _traverse_infodict(k):
1114             k = k.split('.')
1115             if k[0] == '':
1116                 k.pop(0)
1117             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1118
1119         def get_value(mdict):
1120             # Object traversal
1121             value = _traverse_infodict(mdict['fields'])
1122             # Negative
1123             if mdict['negate']:
1124                 value = float_or_none(value)
1125                 if value is not None:
1126                     value *= -1
1127             # Do maths
1128             offset_key = mdict['maths']
1129             if offset_key:
1130                 value = float_or_none(value)
1131                 operator = None
1132                 while offset_key:
1133                     item = re.match(
1134                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1135                         offset_key).group(0)
1136                     offset_key = offset_key[len(item):]
1137                     if operator is None:
1138                         operator = MATH_FUNCTIONS[item]
1139                         continue
1140                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1141                     offset = float_or_none(item)
1142                     if offset is None:
1143                         offset = float_or_none(_traverse_infodict(item))
1144                     try:
1145                         value = operator(value, multiplier * offset)
1146                     except (TypeError, ZeroDivisionError):
1147                         return None
1148                     operator = None
1149             # Datetime formatting
1150             if mdict['strf_format']:
1151                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1152
1153             return value
1154
1155         na = self.params.get('outtmpl_na_placeholder', 'NA')
1156
1157         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1158             return sanitize_filename(str(value), restricted=restricted, is_id=(
1159                 bool(re.search(r'(^|[_.])id(\.|$)', key))
1160                 if 'filename-sanitization' in self.params['compat_opts']
1161                 else NO_DEFAULT))
1162
1163         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1164         sanitize = bool(sanitize)
1165
1166         def _dumpjson_default(obj):
1167             if isinstance(obj, (set, LazyList)):
1168                 return list(obj)
1169             return repr(obj)
1170
1171         def create_key(outer_mobj):
1172             if not outer_mobj.group('has_key'):
1173                 return outer_mobj.group(0)
1174             key = outer_mobj.group('key')
1175             mobj = re.match(INTERNAL_FORMAT_RE, key)
1176             initial_field = mobj.group('fields') if mobj else ''
1177             value, replacement, default = None, None, na
1178             while mobj:
1179                 mobj = mobj.groupdict()
1180                 default = mobj['default'] if mobj['default'] is not None else default
1181                 value = get_value(mobj)
1182                 replacement = mobj['replacement']
1183                 if value is None and mobj['alternate']:
1184                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1185                 else:
1186                     break
1187
1188             fmt = outer_mobj.group('format')
1189             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1190                 fmt = f'0{field_size_compat_map[key]:d}d'
1191
1192             value = default if value is None else value if replacement is None else replacement
1193
1194             flags = outer_mobj.group('conversion') or ''
1195             str_fmt = f'{fmt[:-1]}s'
1196             if fmt[-1] == 'l':  # list
1197                 delim = '\n' if '#' in flags else ', '
1198                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1199             elif fmt[-1] == 'j':  # json
1200                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1201             elif fmt[-1] == 'q':  # quoted
1202                 value = map(str, variadic(value) if '#' in flags else [value])
1203                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1204             elif fmt[-1] == 'B':  # bytes
1205                 value = f'%{str_fmt}'.encode() % str(value).encode()
1206                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1207             elif fmt[-1] == 'U':  # unicode normalized
1208                 value, fmt = unicodedata.normalize(
1209                     # "+" = compatibility equivalence, "#" = NFD
1210                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1211                     value), str_fmt
1212             elif fmt[-1] == 'D':  # decimal suffix
1213                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1214                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1215                                               factor=1024 if '#' in flags else 1000)
1216             elif fmt[-1] == 'S':  # filename sanitization
1217                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1218             elif fmt[-1] == 'c':
1219                 if value:
1220                     value = str(value)[0]
1221                 else:
1222                     fmt = str_fmt
1223             elif fmt[-1] not in 'rs':  # numeric
1224                 value = float_or_none(value)
1225                 if value is None:
1226                     value, fmt = default, 's'
1227
1228             if sanitize:
1229                 if fmt[-1] == 'r':
1230                     # If value is an object, sanitize might convert it to a string
1231                     # So we convert it to repr first
1232                     value, fmt = repr(value), str_fmt
1233                 if fmt[-1] in 'csr':
1234                     value = sanitizer(initial_field, value)
1235
1236             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1237             TMPL_DICT[key] = value
1238             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1239
1240         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1241
1242     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1243         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1244         return self.escape_outtmpl(outtmpl) % info_dict
1245
1246     def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1247         assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1248         if outtmpl is None:
1249             outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1250         try:
1251             outtmpl = self._outtmpl_expandpath(outtmpl)
1252             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1253             if not filename:
1254                 return None
1255
1256             if tmpl_type in ('', 'temp'):
1257                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1258                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1259                     filename = replace_extension(filename, ext, final_ext)
1260             elif tmpl_type:
1261                 force_ext = OUTTMPL_TYPES[tmpl_type]
1262                 if force_ext:
1263                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1264
1265             # https://github.com/blackjack4494/youtube-dlc/issues/85
1266             trim_file_name = self.params.get('trim_file_name', False)
1267             if trim_file_name:
1268                 no_ext, *ext = filename.rsplit('.', 2)
1269                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1270
1271             return filename
1272         except ValueError as err:
1273             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1274             return None
1275
1276     def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1277         """Generate the output filename"""
1278         if outtmpl:
1279             assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1280             dir_type = None
1281         filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1282         if not filename and dir_type not in ('', 'temp'):
1283             return ''
1284
1285         if warn:
1286             if not self.params.get('paths'):
1287                 pass
1288             elif filename == '-':
1289                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1290             elif os.path.isabs(filename):
1291                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1292         if filename == '-' or not filename:
1293             return filename
1294
1295         return self.get_output_path(dir_type, filename)
1296
1297     def _match_entry(self, info_dict, incomplete=False, silent=False):
1298         """ Returns None if the file should be downloaded """
1299
1300         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1301
1302         def check_filter():
1303             if 'title' in info_dict:
1304                 # This can happen when we're just evaluating the playlist
1305                 title = info_dict['title']
1306                 matchtitle = self.params.get('matchtitle', False)
1307                 if matchtitle:
1308                     if not re.search(matchtitle, title, re.IGNORECASE):
1309                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1310                 rejecttitle = self.params.get('rejecttitle', False)
1311                 if rejecttitle:
1312                     if re.search(rejecttitle, title, re.IGNORECASE):
1313                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1314             date = info_dict.get('upload_date')
1315             if date is not None:
1316                 dateRange = self.params.get('daterange', DateRange())
1317                 if date not in dateRange:
1318                     return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1319             view_count = info_dict.get('view_count')
1320             if view_count is not None:
1321                 min_views = self.params.get('min_views')
1322                 if min_views is not None and view_count < min_views:
1323                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1324                 max_views = self.params.get('max_views')
1325                 if max_views is not None and view_count > max_views:
1326                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1327             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1328                 return 'Skipping "%s" because it is age restricted' % video_title
1329
1330             match_filter = self.params.get('match_filter')
1331             if match_filter is not None:
1332                 try:
1333                     ret = match_filter(info_dict, incomplete=incomplete)
1334                 except TypeError:
1335                     # For backward compatibility
1336                     ret = None if incomplete else match_filter(info_dict)
1337                 if ret is NO_DEFAULT:
1338                     while True:
1339                         filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1340                         reply = input(self._format_screen(
1341                             f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1342                         if reply in {'y', ''}:
1343                             return None
1344                         elif reply == 'n':
1345                             return f'Skipping {video_title}'
1346                 elif ret is not None:
1347                     return ret
1348             return None
1349
1350         if self.in_download_archive(info_dict):
1351             reason = '%s has already been recorded in the archive' % video_title
1352             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1353         else:
1354             reason = check_filter()
1355             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1356         if reason is not None:
1357             if not silent:
1358                 self.to_screen('[download] ' + reason)
1359             if self.params.get(break_opt, False):
1360                 raise break_err()
1361         return reason
1362
1363     @staticmethod
1364     def add_extra_info(info_dict, extra_info):
1365         '''Set the keys from extra_info in info dict if they are missing'''
1366         for key, value in extra_info.items():
1367             info_dict.setdefault(key, value)
1368
1369     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1370                      process=True, force_generic_extractor=False):
1371         """
1372         Return a list with a dictionary for each video extracted.
1373
1374         Arguments:
1375         url -- URL to extract
1376
1377         Keyword arguments:
1378         download -- whether to download videos during extraction
1379         ie_key -- extractor key hint
1380         extra_info -- dictionary containing the extra values to add to each result
1381         process -- whether to resolve all unresolved references (URLs, playlist items),
1382             must be True for download to work.
1383         force_generic_extractor -- force using the generic extractor
1384         """
1385
1386         if extra_info is None:
1387             extra_info = {}
1388
1389         if not ie_key and force_generic_extractor:
1390             ie_key = 'Generic'
1391
1392         if ie_key:
1393             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1394         else:
1395             ies = self._ies
1396
1397         for ie_key, ie in ies.items():
1398             if not ie.suitable(url):
1399                 continue
1400
1401             if not ie.working():
1402                 self.report_warning('The program functionality for this site has been marked as broken, '
1403                                     'and will probably not work.')
1404
1405             temp_id = ie.get_temp_id(url)
1406             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1407                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1408                 if self.params.get('break_on_existing', False):
1409                     raise ExistingVideoReached()
1410                 break
1411             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1412         else:
1413             self.report_error('no suitable InfoExtractor for URL %s' % url)
1414
1415     def _handle_extraction_exceptions(func):
1416         @functools.wraps(func)
1417         def wrapper(self, *args, **kwargs):
1418             while True:
1419                 try:
1420                     return func(self, *args, **kwargs)
1421                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1422                     raise
1423                 except ReExtractInfo as e:
1424                     if e.expected:
1425                         self.to_screen(f'{e}; Re-extracting data')
1426                     else:
1427                         self.to_stderr('\r')
1428                         self.report_warning(f'{e}; Re-extracting data')
1429                     continue
1430                 except GeoRestrictedError as e:
1431                     msg = e.msg
1432                     if e.countries:
1433                         msg += '\nThis video is available in %s.' % ', '.join(
1434                             map(ISO3166Utils.short2full, e.countries))
1435                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1436                     self.report_error(msg)
1437                 except ExtractorError as e:  # An error we somewhat expected
1438                     self.report_error(str(e), e.format_traceback())
1439                 except Exception as e:
1440                     if self.params.get('ignoreerrors'):
1441                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1442                     else:
1443                         raise
1444                 break
1445         return wrapper
1446
1447     def _wait_for_video(self, ie_result):
1448         if (not self.params.get('wait_for_video')
1449                 or ie_result.get('_type', 'video') != 'video'
1450                 or ie_result.get('formats') or ie_result.get('url')):
1451             return
1452
1453         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1454         last_msg = ''
1455
1456         def progress(msg):
1457             nonlocal last_msg
1458             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1459             last_msg = msg
1460
1461         min_wait, max_wait = self.params.get('wait_for_video')
1462         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1463         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1464             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1465             self.report_warning('Release time of video is not known')
1466         elif (diff or 0) <= 0:
1467             self.report_warning('Video should already be available according to extracted info')
1468         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1469         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1470
1471         wait_till = time.time() + diff
1472         try:
1473             while True:
1474                 diff = wait_till - time.time()
1475                 if diff <= 0:
1476                     progress('')
1477                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1478                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1479                 time.sleep(1)
1480         except KeyboardInterrupt:
1481             progress('')
1482             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1483         except BaseException as e:
1484             if not isinstance(e, ReExtractInfo):
1485                 self.to_screen('')
1486             raise
1487
1488     @_handle_extraction_exceptions
1489     def __extract_info(self, url, ie, download, extra_info, process):
1490         ie_result = ie.extract(url)
1491         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1492             return
1493         if isinstance(ie_result, list):
1494             # Backwards compatibility: old IE result format
1495             ie_result = {
1496                 '_type': 'compat_list',
1497                 'entries': ie_result,
1498             }
1499         if extra_info.get('original_url'):
1500             ie_result.setdefault('original_url', extra_info['original_url'])
1501         self.add_default_extra_info(ie_result, ie, url)
1502         if process:
1503             self._wait_for_video(ie_result)
1504             return self.process_ie_result(ie_result, download, extra_info)
1505         else:
1506             return ie_result
1507
1508     def add_default_extra_info(self, ie_result, ie, url):
1509         if url is not None:
1510             self.add_extra_info(ie_result, {
1511                 'webpage_url': url,
1512                 'original_url': url,
1513             })
1514         webpage_url = ie_result.get('webpage_url')
1515         if webpage_url:
1516             self.add_extra_info(ie_result, {
1517                 'webpage_url_basename': url_basename(webpage_url),
1518                 'webpage_url_domain': get_domain(webpage_url),
1519             })
1520         if ie is not None:
1521             self.add_extra_info(ie_result, {
1522                 'extractor': ie.IE_NAME,
1523                 'extractor_key': ie.ie_key(),
1524             })
1525
1526     def process_ie_result(self, ie_result, download=True, extra_info=None):
1527         """
1528         Take the result of the ie(may be modified) and resolve all unresolved
1529         references (URLs, playlist items).
1530
1531         It will also download the videos if 'download'.
1532         Returns the resolved ie_result.
1533         """
1534         if extra_info is None:
1535             extra_info = {}
1536         result_type = ie_result.get('_type', 'video')
1537
1538         if result_type in ('url', 'url_transparent'):
1539             ie_result['url'] = sanitize_url(ie_result['url'])
1540             if ie_result.get('original_url'):
1541                 extra_info.setdefault('original_url', ie_result['original_url'])
1542
1543             extract_flat = self.params.get('extract_flat', False)
1544             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1545                     or extract_flat is True):
1546                 info_copy = ie_result.copy()
1547                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1548                 if ie and not ie_result.get('id'):
1549                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1550                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1551                 self.add_extra_info(info_copy, extra_info)
1552                 info_copy, _ = self.pre_process(info_copy)
1553                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1554                 self._raise_pending_errors(info_copy)
1555                 if self.params.get('force_write_download_archive', False):
1556                     self.record_download_archive(info_copy)
1557                 return ie_result
1558
1559         if result_type == 'video':
1560             self.add_extra_info(ie_result, extra_info)
1561             ie_result = self.process_video_result(ie_result, download=download)
1562             self._raise_pending_errors(ie_result)
1563             additional_urls = (ie_result or {}).get('additional_urls')
1564             if additional_urls:
1565                 # TODO: Improve MetadataParserPP to allow setting a list
1566                 if isinstance(additional_urls, compat_str):
1567                     additional_urls = [additional_urls]
1568                 self.to_screen(
1569                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1570                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1571                 ie_result['additional_entries'] = [
1572                     self.extract_info(
1573                         url, download, extra_info=extra_info,
1574                         force_generic_extractor=self.params.get('force_generic_extractor'))
1575                     for url in additional_urls
1576                 ]
1577             return ie_result
1578         elif result_type == 'url':
1579             # We have to add extra_info to the results because it may be
1580             # contained in a playlist
1581             return self.extract_info(
1582                 ie_result['url'], download,
1583                 ie_key=ie_result.get('ie_key'),
1584                 extra_info=extra_info)
1585         elif result_type == 'url_transparent':
1586             # Use the information from the embedding page
1587             info = self.extract_info(
1588                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1589                 extra_info=extra_info, download=False, process=False)
1590
1591             # extract_info may return None when ignoreerrors is enabled and
1592             # extraction failed with an error, don't crash and return early
1593             # in this case
1594             if not info:
1595                 return info
1596
1597             exempted_fields = {'_type', 'url', 'ie_key'}
1598             if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1599                 # For video clips, the id etc of the clip extractor should be used
1600                 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1601
1602             new_result = info.copy()
1603             new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1604
1605             # Extracted info may not be a video result (i.e.
1606             # info.get('_type', 'video') != video) but rather an url or
1607             # url_transparent. In such cases outer metadata (from ie_result)
1608             # should be propagated to inner one (info). For this to happen
1609             # _type of info should be overridden with url_transparent. This
1610             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1611             if new_result.get('_type') == 'url':
1612                 new_result['_type'] = 'url_transparent'
1613
1614             return self.process_ie_result(
1615                 new_result, download=download, extra_info=extra_info)
1616         elif result_type in ('playlist', 'multi_video'):
1617             # Protect from infinite recursion due to recursively nested playlists
1618             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1619             webpage_url = ie_result['webpage_url']
1620             if webpage_url in self._playlist_urls:
1621                 self.to_screen(
1622                     '[download] Skipping already downloaded playlist: %s'
1623                     % ie_result.get('title') or ie_result.get('id'))
1624                 return
1625
1626             self._playlist_level += 1
1627             self._playlist_urls.add(webpage_url)
1628             self._fill_common_fields(ie_result, False)
1629             self._sanitize_thumbnails(ie_result)
1630             try:
1631                 return self.__process_playlist(ie_result, download)
1632             finally:
1633                 self._playlist_level -= 1
1634                 if not self._playlist_level:
1635                     self._playlist_urls.clear()
1636         elif result_type == 'compat_list':
1637             self.report_warning(
1638                 'Extractor %s returned a compat_list result. '
1639                 'It needs to be updated.' % ie_result.get('extractor'))
1640
1641             def _fixup(r):
1642                 self.add_extra_info(r, {
1643                     'extractor': ie_result['extractor'],
1644                     'webpage_url': ie_result['webpage_url'],
1645                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1646                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1647                     'extractor_key': ie_result['extractor_key'],
1648                 })
1649                 return r
1650             ie_result['entries'] = [
1651                 self.process_ie_result(_fixup(r), download, extra_info)
1652                 for r in ie_result['entries']
1653             ]
1654             return ie_result
1655         else:
1656             raise Exception('Invalid result type: %s' % result_type)
1657
1658     def _ensure_dir_exists(self, path):
1659         return make_dir(path, self.report_error)
1660
1661     @staticmethod
1662     def _playlist_infodict(ie_result, **kwargs):
1663         return {
1664             **ie_result,
1665             'playlist': ie_result.get('title') or ie_result.get('id'),
1666             'playlist_id': ie_result.get('id'),
1667             'playlist_title': ie_result.get('title'),
1668             'playlist_uploader': ie_result.get('uploader'),
1669             'playlist_uploader_id': ie_result.get('uploader_id'),
1670             'playlist_index': 0,
1671             **kwargs,
1672         }
1673
1674     def __process_playlist(self, ie_result, download):
1675         """Process each entry in the playlist"""
1676         title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
1677         self.to_screen(f'[download] Downloading playlist: {title}')
1678
1679         all_entries = PlaylistEntries(self, ie_result)
1680         entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1681
1682         lazy = self.params.get('lazy_playlist')
1683         if lazy:
1684             resolved_entries, n_entries = [], 'N/A'
1685             ie_result['requested_entries'], ie_result['entries'] = None, None
1686         else:
1687             entries = resolved_entries = list(entries)
1688             n_entries = len(resolved_entries)
1689             ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1690         if not ie_result.get('playlist_count'):
1691             # Better to do this after potentially exhausting entries
1692             ie_result['playlist_count'] = all_entries.get_full_count()
1693
1694         _infojson_written = False
1695         write_playlist_files = self.params.get('allow_playlist_files', True)
1696         if write_playlist_files and self.params.get('list_thumbnails'):
1697             self.list_thumbnails(ie_result)
1698         if write_playlist_files and not self.params.get('simulate'):
1699             ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1700             _infojson_written = self._write_info_json(
1701                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1702             if _infojson_written is None:
1703                 return
1704             if self._write_description('playlist', ie_result,
1705                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1706                 return
1707             # TODO: This should be passed to ThumbnailsConvertor if necessary
1708             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1709
1710         if lazy:
1711             if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1712                 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1713         elif self.params.get('playlistreverse'):
1714             entries.reverse()
1715         elif self.params.get('playlistrandom'):
1716             random.shuffle(entries)
1717
1718         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
1719                        f'{format_field(ie_result, "playlist_count", " of %s")}')
1720
1721         failures = 0
1722         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1723         for i, (playlist_index, entry) in enumerate(entries):
1724             if lazy:
1725                 resolved_entries.append((playlist_index, entry))
1726
1727             # TODO: Add auto-generated fields
1728             if not entry or self._match_entry(entry, incomplete=True) is not None:
1729                 continue
1730
1731             self.to_screen('[download] Downloading video %s of %s' % (
1732                 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1733
1734             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1735             if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1736                 playlist_index = ie_result['requested_entries'][i]
1737
1738             entry_result = self.__process_iterable_entry(entry, download, {
1739                 'n_entries': int_or_none(n_entries),
1740                 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1741                 'playlist_count': ie_result.get('playlist_count'),
1742                 'playlist_index': playlist_index,
1743                 'playlist_autonumber': i + 1,
1744                 'playlist': title,
1745                 'playlist_id': ie_result.get('id'),
1746                 'playlist_title': ie_result.get('title'),
1747                 'playlist_uploader': ie_result.get('uploader'),
1748                 'playlist_uploader_id': ie_result.get('uploader_id'),
1749                 'extractor': ie_result['extractor'],
1750                 'webpage_url': ie_result['webpage_url'],
1751                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1752                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1753                 'extractor_key': ie_result['extractor_key'],
1754             })
1755             if not entry_result:
1756                 failures += 1
1757             if failures >= max_failures:
1758                 self.report_error(
1759                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1760                 break
1761             resolved_entries[i] = (playlist_index, entry_result)
1762
1763         # Update with processed data
1764         ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1765
1766         # Write the updated info to json
1767         if _infojson_written is True and self._write_info_json(
1768                 'updated playlist', ie_result,
1769                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1770             return
1771
1772         ie_result = self.run_all_pps('playlist', ie_result)
1773         self.to_screen(f'[download] Finished downloading playlist: {title}')
1774         return ie_result
1775
1776     @_handle_extraction_exceptions
1777     def __process_iterable_entry(self, entry, download, extra_info):
1778         return self.process_ie_result(
1779             entry, download=download, extra_info=extra_info)
1780
1781     def _build_format_filter(self, filter_spec):
1782         " Returns a function to filter the formats according to the filter_spec "
1783
1784         OPERATORS = {
1785             '<': operator.lt,
1786             '<=': operator.le,
1787             '>': operator.gt,
1788             '>=': operator.ge,
1789             '=': operator.eq,
1790             '!=': operator.ne,
1791         }
1792         operator_rex = re.compile(r'''(?x)\s*
1793             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1794             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1795             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1796             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1797         m = operator_rex.fullmatch(filter_spec)
1798         if m:
1799             try:
1800                 comparison_value = int(m.group('value'))
1801             except ValueError:
1802                 comparison_value = parse_filesize(m.group('value'))
1803                 if comparison_value is None:
1804                     comparison_value = parse_filesize(m.group('value') + 'B')
1805                 if comparison_value is None:
1806                     raise ValueError(
1807                         'Invalid value %r in format specification %r' % (
1808                             m.group('value'), filter_spec))
1809             op = OPERATORS[m.group('op')]
1810
1811         if not m:
1812             STR_OPERATORS = {
1813                 '=': operator.eq,
1814                 '^=': lambda attr, value: attr.startswith(value),
1815                 '$=': lambda attr, value: attr.endswith(value),
1816                 '*=': lambda attr, value: value in attr,
1817                 '~=': lambda attr, value: value.search(attr) is not None
1818             }
1819             str_operator_rex = re.compile(r'''(?x)\s*
1820                 (?P<key>[a-zA-Z0-9._-]+)\s*
1821                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1822                 (?P<quote>["'])?
1823                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1824                 (?(quote)(?P=quote))\s*
1825                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1826             m = str_operator_rex.fullmatch(filter_spec)
1827             if m:
1828                 if m.group('op') == '~=':
1829                     comparison_value = re.compile(m.group('value'))
1830                 else:
1831                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1832                 str_op = STR_OPERATORS[m.group('op')]
1833                 if m.group('negation'):
1834                     op = lambda attr, value: not str_op(attr, value)
1835                 else:
1836                     op = str_op
1837
1838         if not m:
1839             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1840
1841         def _filter(f):
1842             actual_value = f.get(m.group('key'))
1843             if actual_value is None:
1844                 return m.group('none_inclusive')
1845             return op(actual_value, comparison_value)
1846         return _filter
1847
1848     def _check_formats(self, formats):
1849         for f in formats:
1850             self.to_screen('[info] Testing format %s' % f['format_id'])
1851             path = self.get_output_path('temp')
1852             if not self._ensure_dir_exists(f'{path}/'):
1853                 continue
1854             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1855             temp_file.close()
1856             try:
1857                 success, _ = self.dl(temp_file.name, f, test=True)
1858             except (DownloadError, OSError, ValueError) + network_exceptions:
1859                 success = False
1860             finally:
1861                 if os.path.exists(temp_file.name):
1862                     try:
1863                         os.remove(temp_file.name)
1864                     except OSError:
1865                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1866             if success:
1867                 yield f
1868             else:
1869                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1870
1871     def _default_format_spec(self, info_dict, download=True):
1872
1873         def can_merge():
1874             merger = FFmpegMergerPP(self)
1875             return merger.available and merger.can_merge()
1876
1877         prefer_best = (
1878             not self.params.get('simulate')
1879             and download
1880             and (
1881                 not can_merge()
1882                 or info_dict.get('is_live') and not self.params.get('live_from_start')
1883                 or self.params['outtmpl']['default'] == '-'))
1884         compat = (
1885             prefer_best
1886             or self.params.get('allow_multiple_audio_streams', False)
1887             or 'format-spec' in self.params['compat_opts'])
1888
1889         return (
1890             'best/bestvideo+bestaudio' if prefer_best
1891             else 'bestvideo*+bestaudio/best' if not compat
1892             else 'bestvideo+bestaudio/best')
1893
1894     def build_format_selector(self, format_spec):
1895         def syntax_error(note, start):
1896             message = (
1897                 'Invalid format specification: '
1898                 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
1899             return SyntaxError(message)
1900
1901         PICKFIRST = 'PICKFIRST'
1902         MERGE = 'MERGE'
1903         SINGLE = 'SINGLE'
1904         GROUP = 'GROUP'
1905         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1906
1907         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1908                                   'video': self.params.get('allow_multiple_video_streams', False)}
1909
1910         check_formats = self.params.get('check_formats') == 'selected'
1911
1912         def _parse_filter(tokens):
1913             filter_parts = []
1914             for type, string, start, _, _ in tokens:
1915                 if type == tokenize.OP and string == ']':
1916                     return ''.join(filter_parts)
1917                 else:
1918                     filter_parts.append(string)
1919
1920         def _remove_unused_ops(tokens):
1921             # Remove operators that we don't use and join them with the surrounding strings
1922             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1923             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1924             last_string, last_start, last_end, last_line = None, None, None, None
1925             for type, string, start, end, line in tokens:
1926                 if type == tokenize.OP and string == '[':
1927                     if last_string:
1928                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1929                         last_string = None
1930                     yield type, string, start, end, line
1931                     # everything inside brackets will be handled by _parse_filter
1932                     for type, string, start, end, line in tokens:
1933                         yield type, string, start, end, line
1934                         if type == tokenize.OP and string == ']':
1935                             break
1936                 elif type == tokenize.OP and string in ALLOWED_OPS:
1937                     if last_string:
1938                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1939                         last_string = None
1940                     yield type, string, start, end, line
1941                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1942                     if not last_string:
1943                         last_string = string
1944                         last_start = start
1945                         last_end = end
1946                     else:
1947                         last_string += string
1948             if last_string:
1949                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1950
1951         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1952             selectors = []
1953             current_selector = None
1954             for type, string, start, _, _ in tokens:
1955                 # ENCODING is only defined in python 3.x
1956                 if type == getattr(tokenize, 'ENCODING', None):
1957                     continue
1958                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1959                     current_selector = FormatSelector(SINGLE, string, [])
1960                 elif type == tokenize.OP:
1961                     if string == ')':
1962                         if not inside_group:
1963                             # ')' will be handled by the parentheses group
1964                             tokens.restore_last_token()
1965                         break
1966                     elif inside_merge and string in ['/', ',']:
1967                         tokens.restore_last_token()
1968                         break
1969                     elif inside_choice and string == ',':
1970                         tokens.restore_last_token()
1971                         break
1972                     elif string == ',':
1973                         if not current_selector:
1974                             raise syntax_error('"," must follow a format selector', start)
1975                         selectors.append(current_selector)
1976                         current_selector = None
1977                     elif string == '/':
1978                         if not current_selector:
1979                             raise syntax_error('"/" must follow a format selector', start)
1980                         first_choice = current_selector
1981                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1982                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1983                     elif string == '[':
1984                         if not current_selector:
1985                             current_selector = FormatSelector(SINGLE, 'best', [])
1986                         format_filter = _parse_filter(tokens)
1987                         current_selector.filters.append(format_filter)
1988                     elif string == '(':
1989                         if current_selector:
1990                             raise syntax_error('Unexpected "("', start)
1991                         group = _parse_format_selection(tokens, inside_group=True)
1992                         current_selector = FormatSelector(GROUP, group, [])
1993                     elif string == '+':
1994                         if not current_selector:
1995                             raise syntax_error('Unexpected "+"', start)
1996                         selector_1 = current_selector
1997                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1998                         if not selector_2:
1999                             raise syntax_error('Expected a selector', start)
2000                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2001                     else:
2002                         raise syntax_error(f'Operator not recognized: "{string}"', start)
2003                 elif type == tokenize.ENDMARKER:
2004                     break
2005             if current_selector:
2006                 selectors.append(current_selector)
2007             return selectors
2008
2009         def _merge(formats_pair):
2010             format_1, format_2 = formats_pair
2011
2012             formats_info = []
2013             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2014             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2015
2016             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2017                 get_no_more = {'video': False, 'audio': False}
2018                 for (i, fmt_info) in enumerate(formats_info):
2019                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2020                         formats_info.pop(i)
2021                         continue
2022                     for aud_vid in ['audio', 'video']:
2023                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2024                             if get_no_more[aud_vid]:
2025                                 formats_info.pop(i)
2026                                 break
2027                             get_no_more[aud_vid] = True
2028
2029             if len(formats_info) == 1:
2030                 return formats_info[0]
2031
2032             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2033             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2034
2035             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2036             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2037
2038             output_ext = self.params.get('merge_output_format')
2039             if not output_ext:
2040                 if the_only_video:
2041                     output_ext = the_only_video['ext']
2042                 elif the_only_audio and not video_fmts:
2043                     output_ext = the_only_audio['ext']
2044                 else:
2045                     output_ext = 'mkv'
2046
2047             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2048
2049             new_dict = {
2050                 'requested_formats': formats_info,
2051                 'format': '+'.join(filtered('format')),
2052                 'format_id': '+'.join(filtered('format_id')),
2053                 'ext': output_ext,
2054                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2055                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2056                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2057                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2058                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2059             }
2060
2061             if the_only_video:
2062                 new_dict.update({
2063                     'width': the_only_video.get('width'),
2064                     'height': the_only_video.get('height'),
2065                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2066                     'fps': the_only_video.get('fps'),
2067                     'dynamic_range': the_only_video.get('dynamic_range'),
2068                     'vcodec': the_only_video.get('vcodec'),
2069                     'vbr': the_only_video.get('vbr'),
2070                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2071                 })
2072
2073             if the_only_audio:
2074                 new_dict.update({
2075                     'acodec': the_only_audio.get('acodec'),
2076                     'abr': the_only_audio.get('abr'),
2077                     'asr': the_only_audio.get('asr'),
2078                 })
2079
2080             return new_dict
2081
2082         def _check_formats(formats):
2083             if not check_formats:
2084                 yield from formats
2085                 return
2086             yield from self._check_formats(formats)
2087
2088         def _build_selector_function(selector):
2089             if isinstance(selector, list):  # ,
2090                 fs = [_build_selector_function(s) for s in selector]
2091
2092                 def selector_function(ctx):
2093                     for f in fs:
2094                         yield from f(ctx)
2095                 return selector_function
2096
2097             elif selector.type == GROUP:  # ()
2098                 selector_function = _build_selector_function(selector.selector)
2099
2100             elif selector.type == PICKFIRST:  # /
2101                 fs = [_build_selector_function(s) for s in selector.selector]
2102
2103                 def selector_function(ctx):
2104                     for f in fs:
2105                         picked_formats = list(f(ctx))
2106                         if picked_formats:
2107                             return picked_formats
2108                     return []
2109
2110             elif selector.type == MERGE:  # +
2111                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2112
2113                 def selector_function(ctx):
2114                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2115                         yield _merge(pair)
2116
2117             elif selector.type == SINGLE:  # atom
2118                 format_spec = selector.selector or 'best'
2119
2120                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2121                 if format_spec == 'all':
2122                     def selector_function(ctx):
2123                         yield from _check_formats(ctx['formats'][::-1])
2124                 elif format_spec == 'mergeall':
2125                     def selector_function(ctx):
2126                         formats = list(_check_formats(
2127                             f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2128                         if not formats:
2129                             return
2130                         merged_format = formats[-1]
2131                         for f in formats[-2::-1]:
2132                             merged_format = _merge((merged_format, f))
2133                         yield merged_format
2134
2135                 else:
2136                     format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2137                     mobj = re.match(
2138                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2139                         format_spec)
2140                     if mobj is not None:
2141                         format_idx = int_or_none(mobj.group('n'), default=1)
2142                         format_reverse = mobj.group('bw')[0] == 'b'
2143                         format_type = (mobj.group('type') or [None])[0]
2144                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2145                         format_modified = mobj.group('mod') is not None
2146
2147                         format_fallback = not format_type and not format_modified  # for b, w
2148                         _filter_f = (
2149                             (lambda f: f.get('%scodec' % format_type) != 'none')
2150                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2151                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2152                             if format_type  # bv, ba, wv, wa
2153                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2154                             if not format_modified  # b, w
2155                             else lambda f: True)  # b*, w*
2156                         filter_f = lambda f: _filter_f(f) and (
2157                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2158                     else:
2159                         if format_spec in self._format_selection_exts['audio']:
2160                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2161                         elif format_spec in self._format_selection_exts['video']:
2162                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2163                             seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2164                         elif format_spec in self._format_selection_exts['storyboards']:
2165                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2166                         else:
2167                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2168
2169                     def selector_function(ctx):
2170                         formats = list(ctx['formats'])
2171                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2172                         if not matches:
2173                             if format_fallback and ctx['incomplete_formats']:
2174                                 # for extractors with incomplete formats (audio only (soundcloud)
2175                                 # or video only (imgur)) best/worst will fallback to
2176                                 # best/worst {video,audio}-only format
2177                                 matches = formats
2178                             elif seperate_fallback and not ctx['has_merged_format']:
2179                                 # for compatibility with youtube-dl when there is no pre-merged format
2180                                 matches = list(filter(seperate_fallback, formats))
2181                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2182                         try:
2183                             yield matches[format_idx - 1]
2184                         except LazyList.IndexError:
2185                             return
2186
2187             filters = [self._build_format_filter(f) for f in selector.filters]
2188
2189             def final_selector(ctx):
2190                 ctx_copy = dict(ctx)
2191                 for _filter in filters:
2192                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2193                 return selector_function(ctx_copy)
2194             return final_selector
2195
2196         stream = io.BytesIO(format_spec.encode())
2197         try:
2198             tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2199         except tokenize.TokenError:
2200             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2201
2202         class TokenIterator:
2203             def __init__(self, tokens):
2204                 self.tokens = tokens
2205                 self.counter = 0
2206
2207             def __iter__(self):
2208                 return self
2209
2210             def __next__(self):
2211                 if self.counter >= len(self.tokens):
2212                     raise StopIteration()
2213                 value = self.tokens[self.counter]
2214                 self.counter += 1
2215                 return value
2216
2217             next = __next__
2218
2219             def restore_last_token(self):
2220                 self.counter -= 1
2221
2222         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2223         return _build_selector_function(parsed_selector)
2224
2225     def _calc_headers(self, info_dict):
2226         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2227
2228         cookies = self._calc_cookies(info_dict['url'])
2229         if cookies:
2230             res['Cookie'] = cookies
2231
2232         if 'X-Forwarded-For' not in res:
2233             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2234             if x_forwarded_for_ip:
2235                 res['X-Forwarded-For'] = x_forwarded_for_ip
2236
2237         return res
2238
2239     def _calc_cookies(self, url):
2240         pr = sanitized_Request(url)
2241         self.cookiejar.add_cookie_header(pr)
2242         return pr.get_header('Cookie')
2243
2244     def _sort_thumbnails(self, thumbnails):
2245         thumbnails.sort(key=lambda t: (
2246             t.get('preference') if t.get('preference') is not None else -1,
2247             t.get('width') if t.get('width') is not None else -1,
2248             t.get('height') if t.get('height') is not None else -1,
2249             t.get('id') if t.get('id') is not None else '',
2250             t.get('url')))
2251
2252     def _sanitize_thumbnails(self, info_dict):
2253         thumbnails = info_dict.get('thumbnails')
2254         if thumbnails is None:
2255             thumbnail = info_dict.get('thumbnail')
2256             if thumbnail:
2257                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2258         if not thumbnails:
2259             return
2260
2261         def check_thumbnails(thumbnails):
2262             for t in thumbnails:
2263                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2264                 try:
2265                     self.urlopen(HEADRequest(t['url']))
2266                 except network_exceptions as err:
2267                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2268                     continue
2269                 yield t
2270
2271         self._sort_thumbnails(thumbnails)
2272         for i, t in enumerate(thumbnails):
2273             if t.get('id') is None:
2274                 t['id'] = '%d' % i
2275             if t.get('width') and t.get('height'):
2276                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2277             t['url'] = sanitize_url(t['url'])
2278
2279         if self.params.get('check_formats') is True:
2280             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2281         else:
2282             info_dict['thumbnails'] = thumbnails
2283
2284     def _fill_common_fields(self, info_dict, is_video=True):
2285         # TODO: move sanitization here
2286         if is_video:
2287             # playlists are allowed to lack "title"
2288             title = info_dict.get('title', NO_DEFAULT)
2289             if title is NO_DEFAULT:
2290                 raise ExtractorError('Missing "title" field in extractor result',
2291                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2292             info_dict['fulltitle'] = title
2293             if not title:
2294                 if title == '':
2295                     self.write_debug('Extractor gave empty title. Creating a generic title')
2296                 else:
2297                     self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2298                 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2299
2300         if info_dict.get('duration') is not None:
2301             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2302
2303         for ts_key, date_key in (
2304                 ('timestamp', 'upload_date'),
2305                 ('release_timestamp', 'release_date'),
2306                 ('modified_timestamp', 'modified_date'),
2307         ):
2308             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2309                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2310                 # see http://bugs.python.org/issue1646728)
2311                 with contextlib.suppress(ValueError, OverflowError, OSError):
2312                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2313                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2314
2315         live_keys = ('is_live', 'was_live')
2316         live_status = info_dict.get('live_status')
2317         if live_status is None:
2318             for key in live_keys:
2319                 if info_dict.get(key) is False:
2320                     continue
2321                 if info_dict.get(key):
2322                     live_status = key
2323                 break
2324             if all(info_dict.get(key) is False for key in live_keys):
2325                 live_status = 'not_live'
2326         if live_status:
2327             info_dict['live_status'] = live_status
2328             for key in live_keys:
2329                 if info_dict.get(key) is None:
2330                     info_dict[key] = (live_status == key)
2331
2332         # Auto generate title fields corresponding to the *_number fields when missing
2333         # in order to always have clean titles. This is very common for TV series.
2334         for field in ('chapter', 'season', 'episode'):
2335             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2336                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2337
2338     def _raise_pending_errors(self, info):
2339         err = info.pop('__pending_error', None)
2340         if err:
2341             self.report_error(err, tb=False)
2342
2343     def process_video_result(self, info_dict, download=True):
2344         assert info_dict.get('_type', 'video') == 'video'
2345         self._num_videos += 1
2346
2347         if 'id' not in info_dict:
2348             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2349         elif not info_dict.get('id'):
2350             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2351
2352         def report_force_conversion(field, field_not, conversion):
2353             self.report_warning(
2354                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2355                 % (field, field_not, conversion))
2356
2357         def sanitize_string_field(info, string_field):
2358             field = info.get(string_field)
2359             if field is None or isinstance(field, compat_str):
2360                 return
2361             report_force_conversion(string_field, 'a string', 'string')
2362             info[string_field] = compat_str(field)
2363
2364         def sanitize_numeric_fields(info):
2365             for numeric_field in self._NUMERIC_FIELDS:
2366                 field = info.get(numeric_field)
2367                 if field is None or isinstance(field, (int, float)):
2368                     continue
2369                 report_force_conversion(numeric_field, 'numeric', 'int')
2370                 info[numeric_field] = int_or_none(field)
2371
2372         sanitize_string_field(info_dict, 'id')
2373         sanitize_numeric_fields(info_dict)
2374         if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2375             info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2376         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2377             self.report_warning('"duration" field is negative, there is an error in extractor')
2378
2379         if 'playlist' not in info_dict:
2380             # It isn't part of a playlist
2381             info_dict['playlist'] = None
2382             info_dict['playlist_index'] = None
2383
2384         self._sanitize_thumbnails(info_dict)
2385
2386         thumbnail = info_dict.get('thumbnail')
2387         thumbnails = info_dict.get('thumbnails')
2388         if thumbnail:
2389             info_dict['thumbnail'] = sanitize_url(thumbnail)
2390         elif thumbnails:
2391             info_dict['thumbnail'] = thumbnails[-1]['url']
2392
2393         if info_dict.get('display_id') is None and 'id' in info_dict:
2394             info_dict['display_id'] = info_dict['id']
2395
2396         self._fill_common_fields(info_dict)
2397
2398         for cc_kind in ('subtitles', 'automatic_captions'):
2399             cc = info_dict.get(cc_kind)
2400             if cc:
2401                 for _, subtitle in cc.items():
2402                     for subtitle_format in subtitle:
2403                         if subtitle_format.get('url'):
2404                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2405                         if subtitle_format.get('ext') is None:
2406                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2407
2408         automatic_captions = info_dict.get('automatic_captions')
2409         subtitles = info_dict.get('subtitles')
2410
2411         info_dict['requested_subtitles'] = self.process_subtitles(
2412             info_dict['id'], subtitles, automatic_captions)
2413
2414         if info_dict.get('formats') is None:
2415             # There's only one format available
2416             formats = [info_dict]
2417         else:
2418             formats = info_dict['formats']
2419
2420         # or None ensures --clean-infojson removes it
2421         info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
2422         if not self.params.get('allow_unplayable_formats'):
2423             formats = [f for f in formats if not f.get('has_drm')]
2424             if info_dict['_has_drm'] and all(
2425                     f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2426                 self.report_warning(
2427                     'This video is DRM protected and only images are available for download. '
2428                     'Use --list-formats to see them')
2429
2430         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2431         if not get_from_start:
2432             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2433         if info_dict.get('is_live') and formats:
2434             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2435             if get_from_start and not formats:
2436                 self.raise_no_formats(info_dict, msg=(
2437                     '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2438                     'If you want to download from the current time, use --no-live-from-start'))
2439
2440         if not formats:
2441             self.raise_no_formats(info_dict)
2442
2443         def is_wellformed(f):
2444             url = f.get('url')
2445             if not url:
2446                 self.report_warning(
2447                     '"url" field is missing or empty - skipping format, '
2448                     'there is an error in extractor')
2449                 return False
2450             if isinstance(url, bytes):
2451                 sanitize_string_field(f, 'url')
2452             return True
2453
2454         # Filter out malformed formats for better extraction robustness
2455         formats = list(filter(is_wellformed, formats))
2456
2457         formats_dict = {}
2458
2459         # We check that all the formats have the format and format_id fields
2460         for i, format in enumerate(formats):
2461             sanitize_string_field(format, 'format_id')
2462             sanitize_numeric_fields(format)
2463             format['url'] = sanitize_url(format['url'])
2464             if not format.get('format_id'):
2465                 format['format_id'] = compat_str(i)
2466             else:
2467                 # Sanitize format_id from characters used in format selector expression
2468                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2469             format_id = format['format_id']
2470             if format_id not in formats_dict:
2471                 formats_dict[format_id] = []
2472             formats_dict[format_id].append(format)
2473
2474         # Make sure all formats have unique format_id
2475         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2476         for format_id, ambiguous_formats in formats_dict.items():
2477             ambigious_id = len(ambiguous_formats) > 1
2478             for i, format in enumerate(ambiguous_formats):
2479                 if ambigious_id:
2480                     format['format_id'] = '%s-%d' % (format_id, i)
2481                 if format.get('ext') is None:
2482                     format['ext'] = determine_ext(format['url']).lower()
2483                 # Ensure there is no conflict between id and ext in format selection
2484                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2485                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2486                     format['format_id'] = 'f%s' % format['format_id']
2487
2488         for i, format in enumerate(formats):
2489             if format.get('format') is None:
2490                 format['format'] = '{id} - {res}{note}'.format(
2491                     id=format['format_id'],
2492                     res=self.format_resolution(format),
2493                     note=format_field(format, 'format_note', ' (%s)'),
2494                 )
2495             if format.get('protocol') is None:
2496                 format['protocol'] = determine_protocol(format)
2497             if format.get('resolution') is None:
2498                 format['resolution'] = self.format_resolution(format, default=None)
2499             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2500                 format['dynamic_range'] = 'SDR'
2501             if (info_dict.get('duration') and format.get('tbr')
2502                     and not format.get('filesize') and not format.get('filesize_approx')):
2503                 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2504
2505             # Add HTTP headers, so that external programs can use them from the
2506             # json output
2507             full_format_info = info_dict.copy()
2508             full_format_info.update(format)
2509             format['http_headers'] = self._calc_headers(full_format_info)
2510         # Remove private housekeeping stuff
2511         if '__x_forwarded_for_ip' in info_dict:
2512             del info_dict['__x_forwarded_for_ip']
2513
2514         if self.params.get('check_formats') is True:
2515             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2516
2517         if not formats or formats[0] is not info_dict:
2518             # only set the 'formats' fields if the original info_dict list them
2519             # otherwise we end up with a circular reference, the first (and unique)
2520             # element in the 'formats' field in info_dict is info_dict itself,
2521             # which can't be exported to json
2522             info_dict['formats'] = formats
2523
2524         info_dict, _ = self.pre_process(info_dict)
2525
2526         if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2527             return info_dict
2528
2529         self.post_extract(info_dict)
2530         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2531
2532         # The pre-processors may have modified the formats
2533         formats = info_dict.get('formats', [info_dict])
2534
2535         list_only = self.params.get('simulate') is None and (
2536             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2537         interactive_format_selection = not list_only and self.format_selector == '-'
2538         if self.params.get('list_thumbnails'):
2539             self.list_thumbnails(info_dict)
2540         if self.params.get('listsubtitles'):
2541             if 'automatic_captions' in info_dict:
2542                 self.list_subtitles(
2543                     info_dict['id'], automatic_captions, 'automatic captions')
2544             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2545         if self.params.get('listformats') or interactive_format_selection:
2546             self.list_formats(info_dict)
2547         if list_only:
2548             # Without this printing, -F --print-json will not work
2549             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2550             return info_dict
2551
2552         format_selector = self.format_selector
2553         if format_selector is None:
2554             req_format = self._default_format_spec(info_dict, download=download)
2555             self.write_debug('Default format spec: %s' % req_format)
2556             format_selector = self.build_format_selector(req_format)
2557
2558         while True:
2559             if interactive_format_selection:
2560                 req_format = input(
2561                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2562                 try:
2563                     format_selector = self.build_format_selector(req_format)
2564                 except SyntaxError as err:
2565                     self.report_error(err, tb=False, is_error=False)
2566                     continue
2567
2568             formats_to_download = list(format_selector({
2569                 'formats': formats,
2570                 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2571                 'incomplete_formats': (
2572                     # All formats are video-only or
2573                     all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2574                     # all formats are audio-only
2575                     or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2576             }))
2577             if interactive_format_selection and not formats_to_download:
2578                 self.report_error('Requested format is not available', tb=False, is_error=False)
2579                 continue
2580             break
2581
2582         if not formats_to_download:
2583             if not self.params.get('ignore_no_formats_error'):
2584                 raise ExtractorError(
2585                     'Requested format is not available. Use --list-formats for a list of available formats',
2586                     expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2587             self.report_warning('Requested format is not available')
2588             # Process what we can, even without any available formats.
2589             formats_to_download = [{}]
2590
2591         requested_ranges = self.params.get('download_ranges')
2592         if requested_ranges:
2593             requested_ranges = tuple(requested_ranges(info_dict, self))
2594
2595         best_format, downloaded_formats = formats_to_download[-1], []
2596         if download:
2597             if best_format:
2598                 def to_screen(*msg):
2599                     self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2600
2601                 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2602                           (f['format_id'] for f in formats_to_download))
2603                 if requested_ranges:
2604                     to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2605                               (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
2606             max_downloads_reached = False
2607
2608             for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
2609                 new_info = self._copy_infodict(info_dict)
2610                 new_info.update(fmt)
2611                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2612                 if chapter or offset:
2613                     new_info.update({
2614                         'section_start': offset + chapter.get('start_time', 0),
2615                         'section_end': offset + min(chapter.get('end_time', duration), duration),
2616                         'section_title': chapter.get('title'),
2617                         'section_number': chapter.get('index'),
2618                     })
2619                 downloaded_formats.append(new_info)
2620                 try:
2621                     self.process_info(new_info)
2622                 except MaxDownloadsReached:
2623                     max_downloads_reached = True
2624                 self._raise_pending_errors(new_info)
2625                 # Remove copied info
2626                 for key, val in tuple(new_info.items()):
2627                     if info_dict.get(key) == val:
2628                         new_info.pop(key)
2629                 if max_downloads_reached:
2630                     break
2631
2632             write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2633             assert write_archive.issubset({True, False, 'ignore'})
2634             if True in write_archive and False not in write_archive:
2635                 self.record_download_archive(info_dict)
2636
2637             info_dict['requested_downloads'] = downloaded_formats
2638             info_dict = self.run_all_pps('after_video', info_dict)
2639             if max_downloads_reached:
2640                 raise MaxDownloadsReached()
2641
2642         # We update the info dict with the selected best quality format (backwards compatibility)
2643         info_dict.update(best_format)
2644         return info_dict
2645
2646     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2647         """Select the requested subtitles and their format"""
2648         available_subs, normal_sub_langs = {}, []
2649         if normal_subtitles and self.params.get('writesubtitles'):
2650             available_subs.update(normal_subtitles)
2651             normal_sub_langs = tuple(normal_subtitles.keys())
2652         if automatic_captions and self.params.get('writeautomaticsub'):
2653             for lang, cap_info in automatic_captions.items():
2654                 if lang not in available_subs:
2655                     available_subs[lang] = cap_info
2656
2657         if (not self.params.get('writesubtitles') and not
2658                 self.params.get('writeautomaticsub') or not
2659                 available_subs):
2660             return None
2661
2662         all_sub_langs = tuple(available_subs.keys())
2663         if self.params.get('allsubtitles', False):
2664             requested_langs = all_sub_langs
2665         elif self.params.get('subtitleslangs', False):
2666             # A list is used so that the order of languages will be the same as
2667             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2668             requested_langs = []
2669             for lang_re in self.params.get('subtitleslangs'):
2670                 discard = lang_re[0] == '-'
2671                 if discard:
2672                     lang_re = lang_re[1:]
2673                 if lang_re == 'all':
2674                     if discard:
2675                         requested_langs = []
2676                     else:
2677                         requested_langs.extend(all_sub_langs)
2678                     continue
2679                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2680                 if discard:
2681                     for lang in current_langs:
2682                         while lang in requested_langs:
2683                             requested_langs.remove(lang)
2684                 else:
2685                     requested_langs.extend(current_langs)
2686             requested_langs = orderedSet(requested_langs)
2687         elif normal_sub_langs:
2688             requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
2689         else:
2690             requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
2691         if requested_langs:
2692             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2693
2694         formats_query = self.params.get('subtitlesformat', 'best')
2695         formats_preference = formats_query.split('/') if formats_query else []
2696         subs = {}
2697         for lang in requested_langs:
2698             formats = available_subs.get(lang)
2699             if formats is None:
2700                 self.report_warning(f'{lang} subtitles not available for {video_id}')
2701                 continue
2702             for ext in formats_preference:
2703                 if ext == 'best':
2704                     f = formats[-1]
2705                     break
2706                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2707                 if matches:
2708                     f = matches[-1]
2709                     break
2710             else:
2711                 f = formats[-1]
2712                 self.report_warning(
2713                     'No subtitle format found matching "%s" for language %s, '
2714                     'using %s' % (formats_query, lang, f['ext']))
2715             subs[lang] = f
2716         return subs
2717
2718     def _forceprint(self, key, info_dict):
2719         if info_dict is None:
2720             return
2721         info_copy = info_dict.copy()
2722         info_copy['formats_table'] = self.render_formats_table(info_dict)
2723         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2724         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2725         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2726
2727         def format_tmpl(tmpl):
2728             mobj = re.match(r'\w+(=?)$', tmpl)
2729             if mobj and mobj.group(1):
2730                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2731             elif mobj:
2732                 return f'%({tmpl})s'
2733             return tmpl
2734
2735         for tmpl in self.params['forceprint'].get(key, []):
2736             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2737
2738         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2739             filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2740             tmpl = format_tmpl(tmpl)
2741             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2742             if self._ensure_dir_exists(filename):
2743                 with open(filename, 'a', encoding='utf-8') as f:
2744                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2745
2746     def __forced_printings(self, info_dict, filename, incomplete):
2747         def print_mandatory(field, actual_field=None):
2748             if actual_field is None:
2749                 actual_field = field
2750             if (self.params.get('force%s' % field, False)
2751                     and (not incomplete or info_dict.get(actual_field) is not None)):
2752                 self.to_stdout(info_dict[actual_field])
2753
2754         def print_optional(field):
2755             if (self.params.get('force%s' % field, False)
2756                     and info_dict.get(field) is not None):
2757                 self.to_stdout(info_dict[field])
2758
2759         info_dict = info_dict.copy()
2760         if filename is not None:
2761             info_dict['filename'] = filename
2762         if info_dict.get('requested_formats') is not None:
2763             # For RTMP URLs, also include the playpath
2764             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2765         elif info_dict.get('url'):
2766             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2767
2768         if (self.params.get('forcejson')
2769                 or self.params['forceprint'].get('video')
2770                 or self.params['print_to_file'].get('video')):
2771             self.post_extract(info_dict)
2772         self._forceprint('video', info_dict)
2773
2774         print_mandatory('title')
2775         print_mandatory('id')
2776         print_mandatory('url', 'urls')
2777         print_optional('thumbnail')
2778         print_optional('description')
2779         print_optional('filename')
2780         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2781             self.to_stdout(formatSeconds(info_dict['duration']))
2782         print_mandatory('format')
2783
2784         if self.params.get('forcejson'):
2785             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2786
2787     def dl(self, name, info, subtitle=False, test=False):
2788         if not info.get('url'):
2789             self.raise_no_formats(info, True)
2790
2791         if test:
2792             verbose = self.params.get('verbose')
2793             params = {
2794                 'test': True,
2795                 'quiet': self.params.get('quiet') or not verbose,
2796                 'verbose': verbose,
2797                 'noprogress': not verbose,
2798                 'nopart': True,
2799                 'skip_unavailable_fragments': False,
2800                 'keep_fragments': False,
2801                 'overwrites': True,
2802                 '_no_ytdl_file': True,
2803             }
2804         else:
2805             params = self.params
2806         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2807         if not test:
2808             for ph in self._progress_hooks:
2809                 fd.add_progress_hook(ph)
2810             urls = '", "'.join(
2811                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2812                 for f in info.get('requested_formats', []) or [info])
2813             self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
2814
2815         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2816         # But it may contain objects that are not deep-copyable
2817         new_info = self._copy_infodict(info)
2818         if new_info.get('http_headers') is None:
2819             new_info['http_headers'] = self._calc_headers(new_info)
2820         return fd.download(name, new_info, subtitle)
2821
2822     def existing_file(self, filepaths, *, default_overwrite=True):
2823         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2824         if existing_files and not self.params.get('overwrites', default_overwrite):
2825             return existing_files[0]
2826
2827         for file in existing_files:
2828             self.report_file_delete(file)
2829             os.remove(file)
2830         return None
2831
2832     def process_info(self, info_dict):
2833         """Process a single resolved IE result. (Modifies it in-place)"""
2834
2835         assert info_dict.get('_type', 'video') == 'video'
2836         original_infodict = info_dict
2837
2838         if 'format' not in info_dict and 'ext' in info_dict:
2839             info_dict['format'] = info_dict['ext']
2840
2841         # This is mostly just for backward compatibility of process_info
2842         # As a side-effect, this allows for format-specific filters
2843         if self._match_entry(info_dict) is not None:
2844             info_dict['__write_download_archive'] = 'ignore'
2845             return
2846
2847         # Does nothing under normal operation - for backward compatibility of process_info
2848         self.post_extract(info_dict)
2849         self._num_downloads += 1
2850
2851         # info_dict['_filename'] needs to be set for backward compatibility
2852         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2853         temp_filename = self.prepare_filename(info_dict, 'temp')
2854         files_to_move = {}
2855
2856         # Forced printings
2857         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2858
2859         def check_max_downloads():
2860             if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
2861                 raise MaxDownloadsReached()
2862
2863         if self.params.get('simulate'):
2864             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2865             check_max_downloads()
2866             return
2867
2868         if full_filename is None:
2869             return
2870         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2871             return
2872         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2873             return
2874
2875         if self._write_description('video', info_dict,
2876                                    self.prepare_filename(info_dict, 'description')) is None:
2877             return
2878
2879         sub_files = self._write_subtitles(info_dict, temp_filename)
2880         if sub_files is None:
2881             return
2882         files_to_move.update(dict(sub_files))
2883
2884         thumb_files = self._write_thumbnails(
2885             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2886         if thumb_files is None:
2887             return
2888         files_to_move.update(dict(thumb_files))
2889
2890         infofn = self.prepare_filename(info_dict, 'infojson')
2891         _infojson_written = self._write_info_json('video', info_dict, infofn)
2892         if _infojson_written:
2893             info_dict['infojson_filename'] = infofn
2894             # For backward compatibility, even though it was a private field
2895             info_dict['__infojson_filename'] = infofn
2896         elif _infojson_written is None:
2897             return
2898
2899         # Note: Annotations are deprecated
2900         annofn = None
2901         if self.params.get('writeannotations', False):
2902             annofn = self.prepare_filename(info_dict, 'annotation')
2903         if annofn:
2904             if not self._ensure_dir_exists(encodeFilename(annofn)):
2905                 return
2906             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2907                 self.to_screen('[info] Video annotations are already present')
2908             elif not info_dict.get('annotations'):
2909                 self.report_warning('There are no annotations to write.')
2910             else:
2911                 try:
2912                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2913                     with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2914                         annofile.write(info_dict['annotations'])
2915                 except (KeyError, TypeError):
2916                     self.report_warning('There are no annotations to write.')
2917                 except OSError:
2918                     self.report_error('Cannot write annotations file: ' + annofn)
2919                     return
2920
2921         # Write internet shortcut files
2922         def _write_link_file(link_type):
2923             url = try_get(info_dict['webpage_url'], iri_to_uri)
2924             if not url:
2925                 self.report_warning(
2926                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2927                 return True
2928             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2929             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2930                 return False
2931             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2932                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2933                 return True
2934             try:
2935                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2936                 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2937                           newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2938                     template_vars = {'url': url}
2939                     if link_type == 'desktop':
2940                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2941                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2942             except OSError:
2943                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2944                 return False
2945             return True
2946
2947         write_links = {
2948             'url': self.params.get('writeurllink'),
2949             'webloc': self.params.get('writewebloclink'),
2950             'desktop': self.params.get('writedesktoplink'),
2951         }
2952         if self.params.get('writelink'):
2953             link_type = ('webloc' if sys.platform == 'darwin'
2954                          else 'desktop' if sys.platform.startswith('linux')
2955                          else 'url')
2956             write_links[link_type] = True
2957
2958         if any(should_write and not _write_link_file(link_type)
2959                for link_type, should_write in write_links.items()):
2960             return
2961
2962         def replace_info_dict(new_info):
2963             nonlocal info_dict
2964             if new_info == info_dict:
2965                 return
2966             info_dict.clear()
2967             info_dict.update(new_info)
2968
2969         new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2970         replace_info_dict(new_info)
2971
2972         if self.params.get('skip_download'):
2973             info_dict['filepath'] = temp_filename
2974             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2975             info_dict['__files_to_move'] = files_to_move
2976             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2977             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2978         else:
2979             # Download
2980             info_dict.setdefault('__postprocessors', [])
2981             try:
2982
2983                 def existing_video_file(*filepaths):
2984                     ext = info_dict.get('ext')
2985                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2986                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2987                                               default_overwrite=False)
2988                     if file:
2989                         info_dict['ext'] = os.path.splitext(file)[1][1:]
2990                     return file
2991
2992                 fd, success = None, True
2993                 if info_dict.get('protocol') or info_dict.get('url'):
2994                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2995                     if fd is not FFmpegFD and (
2996                             info_dict.get('section_start') or info_dict.get('section_end')):
2997                         msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
2998                                else 'You have requested downloading the video partially, but ffmpeg is not installed')
2999                         self.report_error(f'{msg}. Aborting')
3000                         return
3001
3002                 if info_dict.get('requested_formats') is not None:
3003
3004                     def compatible_formats(formats):
3005                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3006                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
3007                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
3008                         if len(video_formats) > 2 or len(audio_formats) > 2:
3009                             return False
3010
3011                         # Check extension
3012                         exts = {format.get('ext') for format in formats}
3013                         COMPATIBLE_EXTS = (
3014                             {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
3015                             {'webm'},
3016                         )
3017                         for ext_sets in COMPATIBLE_EXTS:
3018                             if ext_sets.issuperset(exts):
3019                                 return True
3020                         # TODO: Check acodec/vcodec
3021                         return False
3022
3023                     requested_formats = info_dict['requested_formats']
3024                     old_ext = info_dict['ext']
3025                     if self.params.get('merge_output_format') is None:
3026                         if not compatible_formats(requested_formats):
3027                             info_dict['ext'] = 'mkv'
3028                             self.report_warning(
3029                                 'Requested formats are incompatible for merge and will be merged into mkv')
3030                         if (info_dict['ext'] == 'webm'
3031                                 and info_dict.get('thumbnails')
3032                                 # check with type instead of pp_key, __name__, or isinstance
3033                                 # since we dont want any custom PPs to trigger this
3034                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):  # noqa: E721
3035                             info_dict['ext'] = 'mkv'
3036                             self.report_warning(
3037                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3038                     new_ext = info_dict['ext']
3039
3040                     def correct_ext(filename, ext=new_ext):
3041                         if filename == '-':
3042                             return filename
3043                         filename_real_ext = os.path.splitext(filename)[1][1:]
3044                         filename_wo_ext = (
3045                             os.path.splitext(filename)[0]
3046                             if filename_real_ext in (old_ext, new_ext)
3047                             else filename)
3048                         return f'{filename_wo_ext}.{ext}'
3049
3050                     # Ensure filename always has a correct extension for successful merge
3051                     full_filename = correct_ext(full_filename)
3052                     temp_filename = correct_ext(temp_filename)
3053                     dl_filename = existing_video_file(full_filename, temp_filename)
3054                     info_dict['__real_download'] = False
3055
3056                     merger = FFmpegMergerPP(self)
3057                     downloaded = []
3058                     if dl_filename is not None:
3059                         self.report_file_already_downloaded(dl_filename)
3060                     elif fd:
3061                         for f in requested_formats if fd != FFmpegFD else []:
3062                             f['filepath'] = fname = prepend_extension(
3063                                 correct_ext(temp_filename, info_dict['ext']),
3064                                 'f%s' % f['format_id'], info_dict['ext'])
3065                             downloaded.append(fname)
3066                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3067                         success, real_download = self.dl(temp_filename, info_dict)
3068                         info_dict['__real_download'] = real_download
3069                     else:
3070                         if self.params.get('allow_unplayable_formats'):
3071                             self.report_warning(
3072                                 'You have requested merging of multiple formats '
3073                                 'while also allowing unplayable formats to be downloaded. '
3074                                 'The formats won\'t be merged to prevent data corruption.')
3075                         elif not merger.available:
3076                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3077                             if not self.params.get('ignoreerrors'):
3078                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3079                                 return
3080                             self.report_warning(f'{msg}. The formats won\'t be merged')
3081
3082                         if temp_filename == '-':
3083                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3084                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3085                                       else 'but ffmpeg is not installed')
3086                             self.report_warning(
3087                                 f'You have requested downloading multiple formats to stdout {reason}. '
3088                                 'The formats will be streamed one after the other')
3089                             fname = temp_filename
3090                         for f in requested_formats:
3091                             new_info = dict(info_dict)
3092                             del new_info['requested_formats']
3093                             new_info.update(f)
3094                             if temp_filename != '-':
3095                                 fname = prepend_extension(
3096                                     correct_ext(temp_filename, new_info['ext']),
3097                                     'f%s' % f['format_id'], new_info['ext'])
3098                                 if not self._ensure_dir_exists(fname):
3099                                     return
3100                                 f['filepath'] = fname
3101                                 downloaded.append(fname)
3102                             partial_success, real_download = self.dl(fname, new_info)
3103                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3104                             success = success and partial_success
3105
3106                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3107                         info_dict['__postprocessors'].append(merger)
3108                         info_dict['__files_to_merge'] = downloaded
3109                         # Even if there were no downloads, it is being merged only now
3110                         info_dict['__real_download'] = True
3111                     else:
3112                         for file in downloaded:
3113                             files_to_move[file] = None
3114                 else:
3115                     # Just a single file
3116                     dl_filename = existing_video_file(full_filename, temp_filename)
3117                     if dl_filename is None or dl_filename == temp_filename:
3118                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3119                         # So we should try to resume the download
3120                         success, real_download = self.dl(temp_filename, info_dict)
3121                         info_dict['__real_download'] = real_download
3122                     else:
3123                         self.report_file_already_downloaded(dl_filename)
3124
3125                 dl_filename = dl_filename or temp_filename
3126                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3127
3128             except network_exceptions as err:
3129                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3130                 return
3131             except OSError as err:
3132                 raise UnavailableVideoError(err)
3133             except (ContentTooShortError, ) as err:
3134                 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3135                 return
3136
3137             self._raise_pending_errors(info_dict)
3138             if success and full_filename != '-':
3139
3140                 def fixup():
3141                     do_fixup = True
3142                     fixup_policy = self.params.get('fixup')
3143                     vid = info_dict['id']
3144
3145                     if fixup_policy in ('ignore', 'never'):
3146                         return
3147                     elif fixup_policy == 'warn':
3148                         do_fixup = 'warn'
3149                     elif fixup_policy != 'force':
3150                         assert fixup_policy in ('detect_or_warn', None)
3151                         if not info_dict.get('__real_download'):
3152                             do_fixup = False
3153
3154                     def ffmpeg_fixup(cndn, msg, cls):
3155                         if not (do_fixup and cndn):
3156                             return
3157                         elif do_fixup == 'warn':
3158                             self.report_warning(f'{vid}: {msg}')
3159                             return
3160                         pp = cls(self)
3161                         if pp.available:
3162                             info_dict['__postprocessors'].append(pp)
3163                         else:
3164                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3165
3166                     stretched_ratio = info_dict.get('stretched_ratio')
3167                     ffmpeg_fixup(
3168                         stretched_ratio not in (1, None),
3169                         f'Non-uniform pixel ratio {stretched_ratio}',
3170                         FFmpegFixupStretchedPP)
3171
3172                     ffmpeg_fixup(
3173                         (info_dict.get('requested_formats') is None
3174                          and info_dict.get('container') == 'm4a_dash'
3175                          and info_dict.get('ext') == 'm4a'),
3176                         'writing DASH m4a. Only some players support this container',
3177                         FFmpegFixupM4aPP)
3178
3179                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3180                     downloader = downloader.FD_NAME if downloader else None
3181
3182                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3183                         ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3184                                      or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3185                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3186                                      FFmpegFixupM3u8PP)
3187                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3188                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3189
3190                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3191                     ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3192
3193                 fixup()
3194                 try:
3195                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3196                 except PostProcessingError as err:
3197                     self.report_error('Postprocessing: %s' % str(err))
3198                     return
3199                 try:
3200                     for ph in self._post_hooks:
3201                         ph(info_dict['filepath'])
3202                 except Exception as err:
3203                     self.report_error('post hooks: %s' % str(err))
3204                     return
3205                 info_dict['__write_download_archive'] = True
3206
3207         assert info_dict is original_infodict  # Make sure the info_dict was modified in-place
3208         if self.params.get('force_write_download_archive'):
3209             info_dict['__write_download_archive'] = True
3210         check_max_downloads()
3211
3212     def __download_wrapper(self, func):
3213         @functools.wraps(func)
3214         def wrapper(*args, **kwargs):
3215             try:
3216                 res = func(*args, **kwargs)
3217             except UnavailableVideoError as e:
3218                 self.report_error(e)
3219             except DownloadCancelled as e:
3220                 self.to_screen(f'[info] {e}')
3221                 if not self.params.get('break_per_url'):
3222                     raise
3223             else:
3224                 if self.params.get('dump_single_json', False):
3225                     self.post_extract(res)
3226                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3227         return wrapper
3228
3229     def download(self, url_list):
3230         """Download a given list of URLs."""
3231         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3232         outtmpl = self.params['outtmpl']['default']
3233         if (len(url_list) > 1
3234                 and outtmpl != '-'
3235                 and '%' not in outtmpl
3236                 and self.params.get('max_downloads') != 1):
3237             raise SameFileError(outtmpl)
3238
3239         for url in url_list:
3240             self.__download_wrapper(self.extract_info)(
3241                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3242
3243         return self._download_retcode
3244
3245     def download_with_info_file(self, info_filename):
3246         with contextlib.closing(fileinput.FileInput(
3247                 [info_filename], mode='r',
3248                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3249             # FileInput doesn't have a read method, we can't call json.load
3250             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3251         try:
3252             self.__download_wrapper(self.process_ie_result)(info, download=True)
3253         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3254             if not isinstance(e, EntryNotInPlaylist):
3255                 self.to_stderr('\r')
3256             webpage_url = info.get('webpage_url')
3257             if webpage_url is not None:
3258                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3259                 return self.download([webpage_url])
3260             else:
3261                 raise
3262         return self._download_retcode
3263
3264     @staticmethod
3265     def sanitize_info(info_dict, remove_private_keys=False):
3266         ''' Sanitize the infodict for converting to json '''
3267         if info_dict is None:
3268             return info_dict
3269         info_dict.setdefault('epoch', int(time.time()))
3270         info_dict.setdefault('_type', 'video')
3271
3272         if remove_private_keys:
3273             reject = lambda k, v: v is None or k.startswith('__') or k in {
3274                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3275                 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3276             }
3277         else:
3278             reject = lambda k, v: False
3279
3280         def filter_fn(obj):
3281             if isinstance(obj, dict):
3282                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3283             elif isinstance(obj, (list, tuple, set, LazyList)):
3284                 return list(map(filter_fn, obj))
3285             elif obj is None or isinstance(obj, (str, int, float, bool)):
3286                 return obj
3287             else:
3288                 return repr(obj)
3289
3290         return filter_fn(info_dict)
3291
3292     @staticmethod
3293     def filter_requested_info(info_dict, actually_filter=True):
3294         ''' Alias of sanitize_info for backward compatibility '''
3295         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3296
3297     def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3298         for filename in set(filter(None, files_to_delete)):
3299             if msg:
3300                 self.to_screen(msg % filename)
3301             try:
3302                 os.remove(filename)
3303             except OSError:
3304                 self.report_warning(f'Unable to delete file {filename}')
3305             if filename in info.get('__files_to_move', []):  # NB: Delete even if None
3306                 del info['__files_to_move'][filename]
3307
3308     @staticmethod
3309     def post_extract(info_dict):
3310         def actual_post_extract(info_dict):
3311             if info_dict.get('_type') in ('playlist', 'multi_video'):
3312                 for video_dict in info_dict.get('entries', {}):
3313                     actual_post_extract(video_dict or {})
3314                 return
3315
3316             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3317             info_dict.update(post_extractor())
3318
3319         actual_post_extract(info_dict or {})
3320
3321     def run_pp(self, pp, infodict):
3322         files_to_delete = []
3323         if '__files_to_move' not in infodict:
3324             infodict['__files_to_move'] = {}
3325         try:
3326             files_to_delete, infodict = pp.run(infodict)
3327         except PostProcessingError as e:
3328             # Must be True and not 'only_download'
3329             if self.params.get('ignoreerrors') is True:
3330                 self.report_error(e)
3331                 return infodict
3332             raise
3333
3334         if not files_to_delete:
3335             return infodict
3336         if self.params.get('keepvideo', False):
3337             for f in files_to_delete:
3338                 infodict['__files_to_move'].setdefault(f, '')
3339         else:
3340             self._delete_downloaded_files(
3341                 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3342         return infodict
3343
3344     def run_all_pps(self, key, info, *, additional_pps=None):
3345         self._forceprint(key, info)
3346         for pp in (additional_pps or []) + self._pps[key]:
3347             info = self.run_pp(pp, info)
3348         return info
3349
3350     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3351         info = dict(ie_info)
3352         info['__files_to_move'] = files_to_move or {}
3353         try:
3354             info = self.run_all_pps(key, info)
3355         except PostProcessingError as err:
3356             msg = f'Preprocessing: {err}'
3357             info.setdefault('__pending_error', msg)
3358             self.report_error(msg, is_error=False)
3359         return info, info.pop('__files_to_move', None)
3360
3361     def post_process(self, filename, info, files_to_move=None):
3362         """Run all the postprocessors on the given file."""
3363         info['filepath'] = filename
3364         info['__files_to_move'] = files_to_move or {}
3365         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3366         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3367         del info['__files_to_move']
3368         return self.run_all_pps('after_move', info)
3369
3370     def _make_archive_id(self, info_dict):
3371         video_id = info_dict.get('id')
3372         if not video_id:
3373             return
3374         # Future-proof against any change in case
3375         # and backwards compatibility with prior versions
3376         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3377         if extractor is None:
3378             url = str_or_none(info_dict.get('url'))
3379             if not url:
3380                 return
3381             # Try to find matching extractor for the URL and take its ie_key
3382             for ie_key, ie in self._ies.items():
3383                 if ie.suitable(url):
3384                     extractor = ie_key
3385                     break
3386             else:
3387                 return
3388         return f'{extractor.lower()} {video_id}'
3389
3390     def in_download_archive(self, info_dict):
3391         fn = self.params.get('download_archive')
3392         if fn is None:
3393             return False
3394
3395         vid_id = self._make_archive_id(info_dict)
3396         if not vid_id:
3397             return False  # Incomplete video information
3398
3399         return vid_id in self.archive
3400
3401     def record_download_archive(self, info_dict):
3402         fn = self.params.get('download_archive')
3403         if fn is None:
3404             return
3405         vid_id = self._make_archive_id(info_dict)
3406         assert vid_id
3407         self.write_debug(f'Adding to archive: {vid_id}')
3408         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3409             archive_file.write(vid_id + '\n')
3410         self.archive.add(vid_id)
3411
3412     @staticmethod
3413     def format_resolution(format, default='unknown'):
3414         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3415             return 'audio only'
3416         if format.get('resolution') is not None:
3417             return format['resolution']
3418         if format.get('width') and format.get('height'):
3419             return '%dx%d' % (format['width'], format['height'])
3420         elif format.get('height'):
3421             return '%sp' % format['height']
3422         elif format.get('width'):
3423             return '%dx?' % format['width']
3424         return default
3425
3426     def _list_format_headers(self, *headers):
3427         if self.params.get('listformats_table', True) is not False:
3428             return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3429         return headers
3430
3431     def _format_note(self, fdict):
3432         res = ''
3433         if fdict.get('ext') in ['f4f', 'f4m']:
3434             res += '(unsupported)'
3435         if fdict.get('language'):
3436             if res:
3437                 res += ' '
3438             res += '[%s]' % fdict['language']
3439         if fdict.get('format_note') is not None:
3440             if res:
3441                 res += ' '
3442             res += fdict['format_note']
3443         if fdict.get('tbr') is not None:
3444             if res:
3445                 res += ', '
3446             res += '%4dk' % fdict['tbr']
3447         if fdict.get('container') is not None:
3448             if res:
3449                 res += ', '
3450             res += '%s container' % fdict['container']
3451         if (fdict.get('vcodec') is not None
3452                 and fdict.get('vcodec') != 'none'):
3453             if res:
3454                 res += ', '
3455             res += fdict['vcodec']
3456             if fdict.get('vbr') is not None:
3457                 res += '@'
3458         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3459             res += 'video@'
3460         if fdict.get('vbr') is not None:
3461             res += '%4dk' % fdict['vbr']
3462         if fdict.get('fps') is not None:
3463             if res:
3464                 res += ', '
3465             res += '%sfps' % fdict['fps']
3466         if fdict.get('acodec') is not None:
3467             if res:
3468                 res += ', '
3469             if fdict['acodec'] == 'none':
3470                 res += 'video only'
3471             else:
3472                 res += '%-5s' % fdict['acodec']
3473         elif fdict.get('abr') is not None:
3474             if res:
3475                 res += ', '
3476             res += 'audio'
3477         if fdict.get('abr') is not None:
3478             res += '@%3dk' % fdict['abr']
3479         if fdict.get('asr') is not None:
3480             res += ' (%5dHz)' % fdict['asr']
3481         if fdict.get('filesize') is not None:
3482             if res:
3483                 res += ', '
3484             res += format_bytes(fdict['filesize'])
3485         elif fdict.get('filesize_approx') is not None:
3486             if res:
3487                 res += ', '
3488             res += '~' + format_bytes(fdict['filesize_approx'])
3489         return res
3490
3491     def render_formats_table(self, info_dict):
3492         if not info_dict.get('formats') and not info_dict.get('url'):
3493             return None
3494
3495         formats = info_dict.get('formats', [info_dict])
3496         if not self.params.get('listformats_table', True) is not False:
3497             table = [
3498                 [
3499                     format_field(f, 'format_id'),
3500                     format_field(f, 'ext'),
3501                     self.format_resolution(f),
3502                     self._format_note(f)
3503                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3504             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3505
3506         delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3507         table = [
3508             [
3509                 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3510                 format_field(f, 'ext'),
3511                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3512                 format_field(f, 'fps', '\t%d'),
3513                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3514                 delim,
3515                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3516                 format_field(f, 'tbr', '\t%dk'),
3517                 shorten_protocol_name(f.get('protocol', '')),
3518                 delim,
3519                 format_field(f, 'vcodec', default='unknown').replace(
3520                     'none', 'images' if f.get('acodec') == 'none'
3521                             else self._format_out('audio only', self.Styles.SUPPRESS)),
3522                 format_field(f, 'vbr', '\t%dk'),
3523                 format_field(f, 'acodec', default='unknown').replace(
3524                     'none', '' if f.get('vcodec') == 'none'
3525                             else self._format_out('video only', self.Styles.SUPPRESS)),
3526                 format_field(f, 'abr', '\t%dk'),
3527                 format_field(f, 'asr', '\t%dHz'),
3528                 join_nonempty(
3529                     self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3530                     format_field(f, 'language', '[%s]'),
3531                     join_nonempty(format_field(f, 'format_note'),
3532                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3533                                   delim=', '),
3534                     delim=' '),
3535             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3536         header_line = self._list_format_headers(
3537             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3538             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3539
3540         return render_table(
3541             header_line, table, hide_empty=True,
3542             delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3543
3544     def render_thumbnails_table(self, info_dict):
3545         thumbnails = list(info_dict.get('thumbnails') or [])
3546         if not thumbnails:
3547             return None
3548         return render_table(
3549             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3550             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3551
3552     def render_subtitles_table(self, video_id, subtitles):
3553         def _row(lang, formats):
3554             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3555             if len(set(names)) == 1:
3556                 names = [] if names[0] == 'unknown' else names[:1]
3557             return [lang, ', '.join(names), ', '.join(exts)]
3558
3559         if not subtitles:
3560             return None
3561         return render_table(
3562             self._list_format_headers('Language', 'Name', 'Formats'),
3563             [_row(lang, formats) for lang, formats in subtitles.items()],
3564             hide_empty=True)
3565
3566     def __list_table(self, video_id, name, func, *args):
3567         table = func(*args)
3568         if not table:
3569             self.to_screen(f'{video_id} has no {name}')
3570             return
3571         self.to_screen(f'[info] Available {name} for {video_id}:')
3572         self.to_stdout(table)
3573
3574     def list_formats(self, info_dict):
3575         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3576
3577     def list_thumbnails(self, info_dict):
3578         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3579
3580     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3581         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3582
3583     def urlopen(self, req):
3584         """ Start an HTTP download """
3585         if isinstance(req, str):
3586             req = sanitized_Request(req)
3587         return self._opener.open(req, timeout=self._socket_timeout)
3588
3589     def print_debug_header(self):
3590         if not self.params.get('verbose'):
3591             return
3592
3593         # These imports can be slow. So import them only as needed
3594         from .extractor.extractors import _LAZY_LOADER
3595         from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3596
3597         def get_encoding(stream):
3598             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3599             if not supports_terminal_sequences(stream):
3600                 from .utils import WINDOWS_VT_MODE  # Must be imported locally
3601                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3602             return ret
3603
3604         encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3605             locale.getpreferredencoding(),
3606             sys.getfilesystemencoding(),
3607             self.get_encoding(),
3608             ', '.join(
3609                 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3610                 if stream is not None and key != 'console')
3611         )
3612
3613         logger = self.params.get('logger')
3614         if logger:
3615             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3616             write_debug(encoding_str)
3617         else:
3618             write_string(f'[debug] {encoding_str}\n', encoding=None)
3619             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3620
3621         source = detect_variant()
3622         write_debug(join_nonempty(
3623             'yt-dlp version', __version__,
3624             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3625             '' if source == 'unknown' else f'({source})',
3626             delim=' '))
3627         if not _LAZY_LOADER:
3628             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3629                 write_debug('Lazy loading extractors is forcibly disabled')
3630             else:
3631                 write_debug('Lazy loading extractors is disabled')
3632         if plugin_extractors or plugin_postprocessors:
3633             write_debug('Plugins: %s' % [
3634                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3635                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3636         if self.params['compat_opts']:
3637             write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3638
3639         if source == 'source':
3640             try:
3641                 stdout, _, _ = Popen.run(
3642                     ['git', 'rev-parse', '--short', 'HEAD'],
3643                     text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
3644                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3645                 if re.fullmatch('[0-9a-f]+', stdout.strip()):
3646                     write_debug(f'Git HEAD: {stdout.strip()}')
3647             except Exception:
3648                 with contextlib.suppress(Exception):
3649                     sys.exc_clear()
3650
3651         def python_implementation():
3652             impl_name = platform.python_implementation()
3653             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3654                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3655             return impl_name
3656
3657         write_debug('Python version %s (%s %s) - %s' % (
3658             platform.python_version(),
3659             python_implementation(),
3660             platform.architecture()[0],
3661             platform_name()))
3662
3663         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3664         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3665         if ffmpeg_features:
3666             exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3667
3668         exe_versions['rtmpdump'] = rtmpdump_version()
3669         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3670         exe_str = ', '.join(
3671             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3672         ) or 'none'
3673         write_debug('exe versions: %s' % exe_str)
3674
3675         from .compat.compat_utils import get_package_info
3676         from .dependencies import available_dependencies
3677
3678         write_debug('Optional libraries: %s' % (', '.join(sorted({
3679             join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3680         })) or 'none'))
3681
3682         self._setup_opener()
3683         proxy_map = {}
3684         for handler in self._opener.handlers:
3685             if hasattr(handler, 'proxies'):
3686                 proxy_map.update(handler.proxies)
3687         write_debug(f'Proxy map: {proxy_map}')
3688
3689         # Not implemented
3690         if False and self.params.get('call_home'):
3691             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3692             write_debug('Public IP address: %s' % ipaddr)
3693             latest_version = self.urlopen(
3694                 'https://yt-dl.org/latest/version').read().decode()
3695             if version_tuple(latest_version) > version_tuple(__version__):
3696                 self.report_warning(
3697                     'You are using an outdated version (newest version: %s)! '
3698                     'See https://yt-dl.org/update if you need help updating.' %
3699                     latest_version)
3700
3701     def _setup_opener(self):
3702         if hasattr(self, '_opener'):
3703             return
3704         timeout_val = self.params.get('socket_timeout')
3705         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3706
3707         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3708         opts_cookiefile = self.params.get('cookiefile')
3709         opts_proxy = self.params.get('proxy')
3710
3711         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3712
3713         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3714         if opts_proxy is not None:
3715             if opts_proxy == '':
3716                 proxies = {}
3717             else:
3718                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3719         else:
3720             proxies = urllib.request.getproxies()
3721             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3722             if 'http' in proxies and 'https' not in proxies:
3723                 proxies['https'] = proxies['http']
3724         proxy_handler = PerRequestProxyHandler(proxies)
3725
3726         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3727         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3728         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3729         redirect_handler = YoutubeDLRedirectHandler()
3730         data_handler = urllib.request.DataHandler()
3731
3732         # When passing our own FileHandler instance, build_opener won't add the
3733         # default FileHandler and allows us to disable the file protocol, which
3734         # can be used for malicious purposes (see
3735         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3736         file_handler = urllib.request.FileHandler()
3737
3738         def file_open(*args, **kwargs):
3739             raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3740         file_handler.file_open = file_open
3741
3742         opener = urllib.request.build_opener(
3743             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3744
3745         # Delete the default user-agent header, which would otherwise apply in
3746         # cases where our custom HTTP handler doesn't come into play
3747         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3748         opener.addheaders = []
3749         self._opener = opener
3750
3751     def encode(self, s):
3752         if isinstance(s, bytes):
3753             return s  # Already encoded
3754
3755         try:
3756             return s.encode(self.get_encoding())
3757         except UnicodeEncodeError as err:
3758             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3759             raise
3760
3761     def get_encoding(self):
3762         encoding = self.params.get('encoding')
3763         if encoding is None:
3764             encoding = preferredencoding()
3765         return encoding
3766
3767     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3768         ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3769         if overwrite is None:
3770             overwrite = self.params.get('overwrites', True)
3771         if not self.params.get('writeinfojson'):
3772             return False
3773         elif not infofn:
3774             self.write_debug(f'Skipping writing {label} infojson')
3775             return False
3776         elif not self._ensure_dir_exists(infofn):
3777             return None
3778         elif not overwrite and os.path.exists(infofn):
3779             self.to_screen(f'[info] {label.title()} metadata is already present')
3780             return 'exists'
3781
3782         self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3783         try:
3784             write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3785             return True
3786         except OSError:
3787             self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3788             return None
3789
3790     def _write_description(self, label, ie_result, descfn):
3791         ''' Write description and returns True = written, False = skip, None = error '''
3792         if not self.params.get('writedescription'):
3793             return False
3794         elif not descfn:
3795             self.write_debug(f'Skipping writing {label} description')
3796             return False
3797         elif not self._ensure_dir_exists(descfn):
3798             return None
3799         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3800             self.to_screen(f'[info] {label.title()} description is already present')
3801         elif ie_result.get('description') is None:
3802             self.report_warning(f'There\'s no {label} description to write')
3803             return False
3804         else:
3805             try:
3806                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3807                 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3808                     descfile.write(ie_result['description'])
3809             except OSError:
3810                 self.report_error(f'Cannot write {label} description file {descfn}')
3811                 return None
3812         return True
3813
3814     def _write_subtitles(self, info_dict, filename):
3815         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3816         ret = []
3817         subtitles = info_dict.get('requested_subtitles')
3818         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3819             # subtitles download errors are already managed as troubles in relevant IE
3820             # that way it will silently go on when used with unsupporting IE
3821             return ret
3822
3823         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3824         if not sub_filename_base:
3825             self.to_screen('[info] Skipping writing video subtitles')
3826             return ret
3827         for sub_lang, sub_info in subtitles.items():
3828             sub_format = sub_info['ext']
3829             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3830             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3831             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3832             if existing_sub:
3833                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3834                 sub_info['filepath'] = existing_sub
3835                 ret.append((existing_sub, sub_filename_final))
3836                 continue
3837
3838             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3839             if sub_info.get('data') is not None:
3840                 try:
3841                     # Use newline='' to prevent conversion of newline characters
3842                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3843                     with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3844                         subfile.write(sub_info['data'])
3845                     sub_info['filepath'] = sub_filename
3846                     ret.append((sub_filename, sub_filename_final))
3847                     continue
3848                 except OSError:
3849                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3850                     return None
3851
3852             try:
3853                 sub_copy = sub_info.copy()
3854                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3855                 self.dl(sub_filename, sub_copy, subtitle=True)
3856                 sub_info['filepath'] = sub_filename
3857                 ret.append((sub_filename, sub_filename_final))
3858             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3859                 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3860                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3861                     if not self.params.get('ignoreerrors'):
3862                         self.report_error(msg)
3863                     raise DownloadError(msg)
3864                 self.report_warning(msg)
3865         return ret
3866
3867     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3868         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3869         write_all = self.params.get('write_all_thumbnails', False)
3870         thumbnails, ret = [], []
3871         if write_all or self.params.get('writethumbnail', False):
3872             thumbnails = info_dict.get('thumbnails') or []
3873         multiple = write_all and len(thumbnails) > 1
3874
3875         if thumb_filename_base is None:
3876             thumb_filename_base = filename
3877         if thumbnails and not thumb_filename_base:
3878             self.write_debug(f'Skipping writing {label} thumbnail')
3879             return ret
3880
3881         for idx, t in list(enumerate(thumbnails))[::-1]:
3882             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3883             thumb_display_id = f'{label} thumbnail {t["id"]}'
3884             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3885             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3886
3887             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3888             if existing_thumb:
3889                 self.to_screen('[info] %s is already present' % (
3890                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3891                 t['filepath'] = existing_thumb
3892                 ret.append((existing_thumb, thumb_filename_final))
3893             else:
3894                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3895                 try:
3896                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3897                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3898                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3899                         shutil.copyfileobj(uf, thumbf)
3900                     ret.append((thumb_filename, thumb_filename_final))
3901                     t['filepath'] = thumb_filename
3902                 except network_exceptions as err:
3903                     thumbnails.pop(idx)
3904                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3905             if ret and not write_all:
3906                 break
3907         return ret