yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     join_nonempty,
  79     LazyList,
  80     LINK_TEMPLATES,
  81     locked_file,
  82     make_dir,
  83     make_HTTPS_handler,
  84     MaxDownloadsReached,
  85     network_exceptions,
  86     number_of_digits,
  87     orderedSet,
  88     OUTTMPL_TYPES,
  89     PagedList,
  90     parse_filesize,
  91     PerRequestProxyHandler,
  92     platform_name,
  93     Popen,
  94     POSTPROCESS_WHEN,
  95     PostProcessingError,
  96     preferredencoding,
  97     prepend_extension,
  98     ReExtractInfo,
  99     register_socks_protocols,
 100     RejectedVideoReached,
 101     remove_terminal_sequences,
 102     render_table,
 103     replace_extension,
 104     SameFileError,
 105     sanitize_filename,
 106     sanitize_path,
 107     sanitize_url,
 108     sanitized_Request,
 109     std_headers,
 110     STR_FORMAT_RE_TMPL,
 111     STR_FORMAT_TYPES,
 112     str_or_none,
 113     strftime_or_none,
 114     subtitles_filename,
 115     supports_terminal_sequences,
 116     timetuple_from_msec,
 117     to_high_limit_path,
 118     traverse_obj,
 119     try_get,
 120     UnavailableVideoError,
 121     url_basename,
 122     variadic,
 123     version_tuple,
 124     write_json_file,
 125     write_string,
 126     YoutubeDLCookieProcessor,
 127     YoutubeDLHandler,
 128     YoutubeDLRedirectHandler,
 129 )
 130 from .cache import Cache
 131 from .minicurses import format_text
 132 from .extractor import (
 133     gen_extractor_classes,
 134     get_info_extractor,
 135     _LAZY_LOADER,
 136     _PLUGIN_CLASSES as plugin_extractors
 137 )
 138 from .extractor.openload import PhantomJSwrapper
 139 from .downloader import (
 140     FFmpegFD,
 141     get_suitable_downloader,
 142     shorten_protocol_name
 143 )
 144 from .downloader.rtmp import rtmpdump_version
 145 from .postprocessor import (
 146     get_postprocessor,
 147     EmbedThumbnailPP,
 148     FFmpegFixupDuplicateMoovPP,
 149     FFmpegFixupDurationPP,
 150     FFmpegFixupM3u8PP,
 151     FFmpegFixupM4aPP,
 152     FFmpegFixupStretchedPP,
 153     FFmpegFixupTimestampPP,
 154     FFmpegMergerPP,
 155     FFmpegPostProcessor,
 156     MoveFilesAfterDownloadPP,
 157     _PLUGIN_CLASSES as plugin_postprocessors
 158 )
 159 from .update import detect_variant
 160 from .version import __version__, RELEASE_GIT_HEAD
 161
 162 if compat_os_name == 'nt':
 163     import ctypes
 164
 165
 166 class YoutubeDL(object):
 167     """YoutubeDL class.
 168
 169     YoutubeDL objects are the ones responsible of downloading the
 170     actual video file and writing it to disk if the user has requested
 171     it, among some other tasks. In most cases there should be one per
 172     program. As, given a video URL, the downloader doesn't know how to
 173     extract all the needed information, task that InfoExtractors do, it
 174     has to pass the URL to one of them.
 175
 176     For this, YoutubeDL objects have a method that allows
 177     InfoExtractors to be registered in a given order. When it is passed
 178     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 179     finds that reports being able to handle it. The InfoExtractor extracts
 180     all the information about the video or videos the URL refers to, and
 181     YoutubeDL process the extracted information, possibly using a File
 182     Downloader to download the video.
 183
 184     YoutubeDL objects accept a lot of parameters. In order not to saturate
 185     the object constructor with arguments, it receives a dictionary of
 186     options instead. These options are available through the params
 187     attribute for the InfoExtractors to use. The YoutubeDL also
 188     registers itself as the downloader in charge for the InfoExtractors
 189     that are added to it, so this is a "mutual registration".
 190
 191     Available options:
 192
 193     username:          Username for authentication purposes.
 194     password:          Password for authentication purposes.
 195     videopassword:     Password for accessing a video.
 196     ap_mso:            Adobe Pass multiple-system operator identifier.
 197     ap_username:       Multiple-system operator account username.
 198     ap_password:       Multiple-system operator account password.
 199     usenetrc:          Use netrc for authentication instead.
 200     verbose:           Print additional info to stdout.
 201     quiet:             Do not print messages to stdout.
 202     no_warnings:       Do not print out anything for warnings.
 203     forceprint:        A dict with keys video/playlist mapped to
 204                        a list of templates to force print to stdout
 205                        For compatibility, a single list is also accepted
 206     forceurl:          Force printing final URL. (Deprecated)
 207     forcetitle:        Force printing title. (Deprecated)
 208     forceid:           Force printing ID. (Deprecated)
 209     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 210     forcedescription:  Force printing description. (Deprecated)
 211     forcefilename:     Force printing final filename. (Deprecated)
 212     forceduration:     Force printing duration. (Deprecated)
 213     forcejson:         Force printing info_dict as JSON.
 214     dump_single_json:  Force printing the info_dict of the whole playlist
 215                        (or video) as a single JSON line.
 216     force_write_download_archive: Force writing download archive regardless
 217                        of 'skip_download' or 'simulate'.
 218     simulate:          Do not download the video files. If unset (or None),
 219                        simulate only if listsubtitles, listformats or list_thumbnails is used
 220     format:            Video format code. see "FORMAT SELECTION" for more details.
 221                        You can also pass a function. The function takes 'ctx' as
 222                        argument and returns the formats to download.
 223                        See "build_format_selector" for an implementation
 224     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 225     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 226                        extracting metadata even if the video is not actually
 227                        available for download (experimental)
 228     format_sort:       A list of fields by which to sort the video formats.
 229                        See "Sorting Formats" for more details.
 230     format_sort_force: Force the given format_sort. see "Sorting Formats"
 231                        for more details.
 232     allow_multiple_video_streams:   Allow multiple video streams to be merged
 233                        into a single file
 234     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 235                        into a single file
 236     check_formats      Whether to test if the formats are downloadable.
 237                        Can be True (check all), False (check none),
 238                        'selected' (check selected formats),
 239                        or None (check only if requested by extractor)
 240     paths:             Dictionary of output paths. The allowed keys are 'home'
 241                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 242     outtmpl:           Dictionary of templates for output names. Allowed keys
 243                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 244                        For compatibility with youtube-dl, a single string can also be used
 245     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 246     restrictfilenames: Do not allow "&" and spaces in file names
 247     trim_file_name:    Limit length of filename (extension excluded)
 248     windowsfilenames:  Force the filenames to be windows compatible
 249     ignoreerrors:      Do not stop on download/postprocessing errors.
 250                        Can be 'only_download' to ignore only download errors.
 251                        Default is 'only_download' for CLI, but False for API
 252     skip_playlist_after_errors: Number of allowed failures until the rest of
 253                        the playlist is skipped
 254     force_generic_extractor: Force downloader to use the generic extractor
 255     overwrites:        Overwrite all video and metadata files if True,
 256                        overwrite only non-video files if None
 257                        and don't overwrite any file if False
 258                        For compatibility with youtube-dl,
 259                        "nooverwrites" may also be used instead
 260     playliststart:     Playlist item to start at.
 261     playlistend:       Playlist item to end at.
 262     playlist_items:    Specific indices of playlist to download.
 263     playlistreverse:   Download playlist items in reverse order.
 264     playlistrandom:    Download playlist items in random order.
 265     matchtitle:        Download only matching titles.
 266     rejecttitle:       Reject downloads for matching titles.
 267     logger:            Log messages to a logging.Logger instance.
 268     logtostderr:       Log messages to stderr instead of stdout.
 269     consoletitle:       Display progress in console window's titlebar.
 270     writedescription:  Write the video description to a .description file
 271     writeinfojson:     Write the video description to a .info.json file
 272     clean_infojson:    Remove private fields from the infojson
 273     getcomments:       Extract video comments. This will not be written to disk
 274                        unless writeinfojson is also given
 275     writeannotations:  Write the video annotations to a .annotations.xml file
 276     writethumbnail:    Write the thumbnail image to a file
 277     allow_playlist_files: Whether to write playlists' description, infojson etc
 278                        also to disk when using the 'write*' options
 279     write_all_thumbnails:  Write all thumbnail formats to files
 280     writelink:         Write an internet shortcut file, depending on the
 281                        current platform (.url/.webloc/.desktop)
 282     writeurllink:      Write a Windows internet shortcut file (.url)
 283     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 284     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 285     writesubtitles:    Write the video subtitles to a file
 286     writeautomaticsub: Write the automatically generated subtitles to a file
 287     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 288                        Downloads all the subtitles of the video
 289                        (requires writesubtitles or writeautomaticsub)
 290     listsubtitles:     Lists all available subtitles for the video
 291     subtitlesformat:   The format code for subtitles
 292     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 293                        The list may contain "all" to refer to all the available
 294                        subtitles. The language can be prefixed with a "-" to
 295                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 296     keepvideo:         Keep the video file after post-processing
 297     daterange:         A DateRange object, download only if the upload_date is in the range.
 298     skip_download:     Skip the actual download of the video file
 299     cachedir:          Location of the cache files in the filesystem.
 300                        False to disable filesystem cache.
 301     noplaylist:        Download single video instead of a playlist if in doubt.
 302     age_limit:         An integer representing the user's age in years.
 303                        Unsuitable videos for the given age are skipped.
 304     min_views:         An integer representing the minimum view count the video
 305                        must have in order to not be skipped.
 306                        Videos without view count information are always
 307                        downloaded. None for no limit.
 308     max_views:         An integer representing the maximum view count.
 309                        Videos that are more popular than that are not
 310                        downloaded.
 311                        Videos without view count information are always
 312                        downloaded. None for no limit.
 313     download_archive:  File name of a file where all downloads are recorded.
 314                        Videos already present in the file are not downloaded
 315                        again.
 316     break_on_existing: Stop the download process after attempting to download a
 317                        file that is in the archive.
 318     break_on_reject:   Stop the download process when encountering a video that
 319                        has been filtered out.
 320     break_per_url:     Whether break_on_reject and break_on_existing
 321                        should act on each input URL as opposed to for the entire queue
 322     cookiefile:        File name where cookies should be read from and dumped to
 323     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 324                        name/pathfrom where cookies are loaded, and the name of the
 325                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 326     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 327                        support RFC 5746 secure renegotiation
 328     nocheckcertificate:  Do not verify SSL certificates
 329     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 330                        At the moment, this is only supported by YouTube.
 331     proxy:             URL of the proxy server to use
 332     geo_verification_proxy:  URL of the proxy to use for IP address verification
 333                        on geo-restricted sites.
 334     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 335     bidi_workaround:   Work around buggy terminals without bidirectional text
 336                        support, using fridibi
 337     debug_printtraffic:Print out sent and received HTTP traffic
 338     include_ads:       Download ads as well (deprecated)
 339     default_search:    Prepend this string if an input url is not valid.
 340                        'auto' for elaborate guessing
 341     encoding:          Use this encoding instead of the system-specified.
 342     extract_flat:      Do not resolve URLs, return the immediate result.
 343                        Pass in 'in_playlist' to only show this behavior for
 344                        playlist items.
 345     wait_for_video:    If given, wait for scheduled streams to become available.
 346                        The value should be a tuple containing the range
 347                        (min_secs, max_secs) to wait between retries
 348     postprocessors:    A list of dictionaries, each with an entry
 349                        * key:  The name of the postprocessor. See
 350                                yt_dlp/postprocessor/__init__.py for a list.
 351                        * when: When to run the postprocessor. Can be one of
 352                                pre_process|before_dl|post_process|after_move.
 353                                Assumed to be 'post_process' if not given
 354     post_hooks:        Deprecated - Register a custom postprocessor instead
 355                        A list of functions that get called as the final step
 356                        for each video file, after all postprocessors have been
 357                        called. The filename will be passed as the only argument.
 358     progress_hooks:    A list of functions that get called on download
 359                        progress, with a dictionary with the entries
 360                        * status: One of "downloading", "error", or "finished".
 361                                  Check this first and ignore unknown values.
 362                        * info_dict: The extracted info_dict
 363
 364                        If status is one of "downloading", or "finished", the
 365                        following properties may also be present:
 366                        * filename: The final filename (always present)
 367                        * tmpfilename: The filename we're currently writing to
 368                        * downloaded_bytes: Bytes on disk
 369                        * total_bytes: Size of the whole file, None if unknown
 370                        * total_bytes_estimate: Guess of the eventual file size,
 371                                                None if unavailable.
 372                        * elapsed: The number of seconds since download started.
 373                        * eta: The estimated time in seconds, None if unknown
 374                        * speed: The download speed in bytes/second, None if
 375                                 unknown
 376                        * fragment_index: The counter of the currently
 377                                          downloaded video fragment.
 378                        * fragment_count: The number of fragments (= individual
 379                                          files that will be merged)
 380
 381                        Progress hooks are guaranteed to be called at least once
 382                        (with status "finished") if the download is successful.
 383     postprocessor_hooks:  A list of functions that get called on postprocessing
 384                        progress, with a dictionary with the entries
 385                        * status: One of "started", "processing", or "finished".
 386                                  Check this first and ignore unknown values.
 387                        * postprocessor: Name of the postprocessor
 388                        * info_dict: The extracted info_dict
 389
 390                        Progress hooks are guaranteed to be called at least twice
 391                        (with status "started" and "finished") if the processing is successful.
 392     merge_output_format: Extension to use when merging formats.
 393     final_ext:         Expected final extension; used to detect when the file was
 394                        already downloaded and converted
 395     fixup:             Automatically correct known faults of the file.
 396                        One of:
 397                        - "never": do nothing
 398                        - "warn": only emit a warning
 399                        - "detect_or_warn": check whether we can do anything
 400                                            about it, warn otherwise (default)
 401     source_address:    Client-side IP address to bind to.
 402     call_home:         Boolean, true iff we are allowed to contact the
 403                        yt-dlp servers for debugging. (BROKEN)
 404     sleep_interval_requests: Number of seconds to sleep between requests
 405                        during extraction
 406     sleep_interval:    Number of seconds to sleep before each download when
 407                        used alone or a lower bound of a range for randomized
 408                        sleep before each download (minimum possible number
 409                        of seconds to sleep) when used along with
 410                        max_sleep_interval.
 411     max_sleep_interval:Upper bound of a range for randomized sleep before each
 412                        download (maximum possible number of seconds to sleep).
 413                        Must only be used along with sleep_interval.
 414                        Actual sleep time will be a random float from range
 415                        [sleep_interval; max_sleep_interval].
 416     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 417     listformats:       Print an overview of available video formats and exit.
 418     list_thumbnails:   Print a table of all thumbnails and exit.
 419     match_filter:      A function that gets called with the info_dict of
 420                        every video.
 421                        If it returns a message, the video is ignored.
 422                        If it returns None, the video is downloaded.
 423                        match_filter_func in utils.py is one example for this.
 424     no_color:          Do not emit color codes in output.
 425     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 426                        HTTP header
 427     geo_bypass_country:
 428                        Two-letter ISO 3166-2 country code that will be used for
 429                        explicit geographic restriction bypassing via faking
 430                        X-Forwarded-For HTTP header
 431     geo_bypass_ip_block:
 432                        IP range in CIDR notation that will be used similarly to
 433                        geo_bypass_country
 434
 435     The following options determine which downloader is picked:
 436     external_downloader: A dictionary of protocol keys and the executable of the
 437                        external downloader to use for it. The allowed protocols
 438                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 439                        Set the value to 'native' to use the native downloader
 440     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 441                        or {'m3u8': 'ffmpeg'} instead.
 442                        Use the native HLS downloader instead of ffmpeg/avconv
 443                        if True, otherwise use ffmpeg/avconv if False, otherwise
 444                        use downloader suggested by extractor if None.
 445     compat_opts:       Compatibility options. See "Differences in default behavior".
 446                        The following options do not work when used through the API:
 447                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 448                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 449                        Refer __init__.py for their implementation
 450     progress_template: Dictionary of templates for progress outputs.
 451                        Allowed keys are 'download', 'postprocess',
 452                        'download-title' (console title) and 'postprocess-title'.
 453                        The template is mapped on a dictionary with keys 'progress' and 'info'
 454
 455     The following parameters are not used by YoutubeDL itself, they are used by
 456     the downloader (see yt_dlp/downloader/common.py):
 457     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 458     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 459     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 460     external_downloader_args, concurrent_fragment_downloads.
 461
 462     The following options are used by the post processors:
 463     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 464                        otherwise prefer ffmpeg. (avconv support is deprecated)
 465     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 466                        to the binary or its containing directory.
 467     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 468                        and a list of additional command-line arguments for the
 469                        postprocessor/executable. The dict can also have "PP+EXE" keys
 470                        which are used when the given exe is used by the given PP.
 471                        Use 'default' as the name for arguments to passed to all PP
 472                        For compatibility with youtube-dl, a single list of args
 473                        can also be used
 474
 475     The following options are used by the extractors:
 476     extractor_retries: Number of times to retry for known errors
 477     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 478     hls_split_discontinuity: Split HLS playlists to different formats at
 479                        discontinuities such as ad breaks (default: False)
 480     extractor_args:    A dictionary of arguments to be passed to the extractors.
 481                        See "EXTRACTOR ARGUMENTS" for details.
 482                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 483     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 484                        If True (default), DASH manifests and related
 485                        data will be downloaded and processed by extractor.
 486                        You can reduce network I/O by disabling it if you don't
 487                        care about DASH. (only for youtube)
 488     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 489                        If True (default), HLS manifests and related
 490                        data will be downloaded and processed by extractor.
 491                        You can reduce network I/O by disabling it if you don't
 492                        care about HLS. (only for youtube)
 493     """
 494
 495     _NUMERIC_FIELDS = set((
 496         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 497         'timestamp', 'release_timestamp',
 498         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 499         'average_rating', 'comment_count', 'age_limit',
 500         'start_time', 'end_time',
 501         'chapter_number', 'season_number', 'episode_number',
 502         'track_number', 'disc_number', 'release_year',
 503     ))
 504
 505     _format_selection_exts = {
 506         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 507         'video': {'mp4', 'flv', 'webm', '3gp'},
 508         'storyboards': {'mhtml'},
 509     }
 510
 511     params = None
 512     _ies = {}
 513     _pps = {k: [] for k in POSTPROCESS_WHEN}
 514     _printed_messages = set()
 515     _first_webpage_request = True
 516     _download_retcode = None
 517     _num_downloads = None
 518     _playlist_level = 0
 519     _playlist_urls = set()
 520     _screen_file = None
 521
 522     def __init__(self, params=None, auto_init=True):
 523         """Create a FileDownloader object with the given options.
 524         @param auto_init    Whether to load the default extractors and print header (if verbose).
 525                             Set to 'no_verbose_header' to not print the header
 526         """
 527         if params is None:
 528             params = {}
 529         self._ies = {}
 530         self._ies_instances = {}
 531         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 532         self._printed_messages = set()
 533         self._first_webpage_request = True
 534         self._post_hooks = []
 535         self._progress_hooks = []
 536         self._postprocessor_hooks = []
 537         self._download_retcode = 0
 538         self._num_downloads = 0
 539         self._num_videos = 0
 540         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 541         self._err_file = sys.stderr
 542         self.params = params
 543         self.cache = Cache(self)
 544
 545         windows_enable_vt_mode()
 546         self._allow_colors = {
 547             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 548             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 549         }
 550
 551         if sys.version_info < (3, 6):
 552             self.report_warning(
 553                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 554
 555         if self.params.get('allow_unplayable_formats'):
 556             self.report_warning(
 557                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 558                 'This is a developer option intended for debugging. \n'
 559                 '         If you experience any issues while using this option, '
 560                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 561
 562         def check_deprecated(param, option, suggestion):
 563             if self.params.get(param) is not None:
 564                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 565                 return True
 566             return False
 567
 568         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 569             if self.params.get('geo_verification_proxy') is None:
 570                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 571
 572         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 573         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 574         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 575
 576         for msg in self.params.get('_warnings', []):
 577             self.report_warning(msg)
 578         for msg in self.params.get('_deprecation_warnings', []):
 579             self.deprecation_warning(msg)
 580
 581         if 'list-formats' in self.params.get('compat_opts', []):
 582             self.params['listformats_table'] = False
 583
 584         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 585             # nooverwrites was unnecessarily changed to overwrites
 586             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 587             # This ensures compatibility with both keys
 588             self.params['overwrites'] = not self.params['nooverwrites']
 589         elif self.params.get('overwrites') is None:
 590             self.params.pop('overwrites', None)
 591         else:
 592             self.params['nooverwrites'] = not self.params['overwrites']
 593
 594         # Compatibility with older syntax
 595         params.setdefault('forceprint', {})
 596         if not isinstance(params['forceprint'], dict):
 597             params['forceprint'] = {'video': params['forceprint']}
 598
 599         if params.get('bidi_workaround', False):
 600             try:
 601                 import pty
 602                 master, slave = pty.openpty()
 603                 width = compat_get_terminal_size().columns
 604                 if width is None:
 605                     width_args = []
 606                 else:
 607                     width_args = ['-w', str(width)]
 608                 sp_kwargs = dict(
 609                     stdin=subprocess.PIPE,
 610                     stdout=slave,
 611                     stderr=self._err_file)
 612                 try:
 613                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 614                 except OSError:
 615                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 616                 self._output_channel = os.fdopen(master, 'rb')
 617             except OSError as ose:
 618                 if ose.errno == errno.ENOENT:
 619                     self.report_warning(
 620                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 621                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 622                 else:
 623                     raise
 624
 625         if (sys.platform != 'win32'
 626                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 627                 and not params.get('restrictfilenames', False)):
 628             # Unicode filesystem API will throw errors (#1474, #13027)
 629             self.report_warning(
 630                 'Assuming --restrict-filenames since file system encoding '
 631                 'cannot encode all characters. '
 632                 'Set the LC_ALL environment variable to fix this.')
 633             self.params['restrictfilenames'] = True
 634
 635         self.outtmpl_dict = self.parse_outtmpl()
 636
 637         # Creating format selector here allows us to catch syntax errors before the extraction
 638         self.format_selector = (
 639             self.params.get('format') if self.params.get('format') in (None, '-')
 640             else self.params['format'] if callable(self.params['format'])
 641             else self.build_format_selector(self.params['format']))
 642
 643         self._setup_opener()
 644
 645         if auto_init:
 646             if auto_init != 'no_verbose_header':
 647                 self.print_debug_header()
 648             self.add_default_info_extractors()
 649
 650         hooks = {
 651             'post_hooks': self.add_post_hook,
 652             'progress_hooks': self.add_progress_hook,
 653             'postprocessor_hooks': self.add_postprocessor_hook,
 654         }
 655         for opt, fn in hooks.items():
 656             for ph in self.params.get(opt, []):
 657                 fn(ph)
 658
 659         for pp_def_raw in self.params.get('postprocessors', []):
 660             pp_def = dict(pp_def_raw)
 661             when = pp_def.pop('when', 'post_process')
 662             self.add_post_processor(
 663                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 664                 when=when)
 665
 666         register_socks_protocols()
 667
 668         def preload_download_archive(fn):
 669             """Preload the archive, if any is specified"""
 670             if fn is None:
 671                 return False
 672             self.write_debug(f'Loading archive file {fn!r}')
 673             try:
 674                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 675                     for line in archive_file:
 676                         self.archive.add(line.strip())
 677             except IOError as ioe:
 678                 if ioe.errno != errno.ENOENT:
 679                     raise
 680                 return False
 681             return True
 682
 683         self.archive = set()
 684         preload_download_archive(self.params.get('download_archive'))
 685
 686     def warn_if_short_id(self, argv):
 687         # short YouTube ID starting with dash?
 688         idxs = [
 689             i for i, a in enumerate(argv)
 690             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 691         if idxs:
 692             correct_argv = (
 693                 ['yt-dlp']
 694                 + [a for i, a in enumerate(argv) if i not in idxs]
 695                 + ['--'] + [argv[i] for i in idxs]
 696             )
 697             self.report_warning(
 698                 'Long argument string detected. '
 699                 'Use -- to separate parameters and URLs, like this:\n%s' %
 700                 args_to_str(correct_argv))
 701
 702     def add_info_extractor(self, ie):
 703         """Add an InfoExtractor object to the end of the list."""
 704         ie_key = ie.ie_key()
 705         self._ies[ie_key] = ie
 706         if not isinstance(ie, type):
 707             self._ies_instances[ie_key] = ie
 708             ie.set_downloader(self)
 709
 710     def _get_info_extractor_class(self, ie_key):
 711         ie = self._ies.get(ie_key)
 712         if ie is None:
 713             ie = get_info_extractor(ie_key)
 714             self.add_info_extractor(ie)
 715         return ie
 716
 717     def get_info_extractor(self, ie_key):
 718         """
 719         Get an instance of an IE with name ie_key, it will try to get one from
 720         the _ies list, if there's no instance it will create a new one and add
 721         it to the extractor list.
 722         """
 723         ie = self._ies_instances.get(ie_key)
 724         if ie is None:
 725             ie = get_info_extractor(ie_key)()
 726             self.add_info_extractor(ie)
 727         return ie
 728
 729     def add_default_info_extractors(self):
 730         """
 731         Add the InfoExtractors returned by gen_extractors to the end of the list
 732         """
 733         for ie in gen_extractor_classes():
 734             self.add_info_extractor(ie)
 735
 736     def add_post_processor(self, pp, when='post_process'):
 737         """Add a PostProcessor object to the end of the chain."""
 738         self._pps[when].append(pp)
 739         pp.set_downloader(self)
 740
 741     def add_post_hook(self, ph):
 742         """Add the post hook"""
 743         self._post_hooks.append(ph)
 744
 745     def add_progress_hook(self, ph):
 746         """Add the download progress hook"""
 747         self._progress_hooks.append(ph)
 748
 749     def add_postprocessor_hook(self, ph):
 750         """Add the postprocessing progress hook"""
 751         self._postprocessor_hooks.append(ph)
 752         for pps in self._pps.values():
 753             for pp in pps:
 754                 pp.add_progress_hook(ph)
 755
 756     def _bidi_workaround(self, message):
 757         if not hasattr(self, '_output_channel'):
 758             return message
 759
 760         assert hasattr(self, '_output_process')
 761         assert isinstance(message, compat_str)
 762         line_count = message.count('\n') + 1
 763         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 764         self._output_process.stdin.flush()
 765         res = ''.join(self._output_channel.readline().decode('utf-8')
 766                       for _ in range(line_count))
 767         return res[:-len('\n')]
 768
 769     def _write_string(self, message, out=None, only_once=False):
 770         if only_once:
 771             if message in self._printed_messages:
 772                 return
 773             self._printed_messages.add(message)
 774         write_string(message, out=out, encoding=self.params.get('encoding'))
 775
 776     def to_stdout(self, message, skip_eol=False, quiet=False):
 777         """Print message to stdout"""
 778         if self.params.get('logger'):
 779             self.params['logger'].debug(message)
 780         elif not quiet or self.params.get('verbose'):
 781             self._write_string(
 782                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 783                 self._err_file if quiet else self._screen_file)
 784
 785     def to_stderr(self, message, only_once=False):
 786         """Print message to stderr"""
 787         assert isinstance(message, compat_str)
 788         if self.params.get('logger'):
 789             self.params['logger'].error(message)
 790         else:
 791             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 792
 793     def to_console_title(self, message):
 794         if not self.params.get('consoletitle', False):
 795             return
 796         message = remove_terminal_sequences(message)
 797         if compat_os_name == 'nt':
 798             if ctypes.windll.kernel32.GetConsoleWindow():
 799                 # c_wchar_p() might not be necessary if `message` is
 800                 # already of type unicode()
 801                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 802         elif 'TERM' in os.environ:
 803             self._write_string('\033]0;%s\007' % message, self._screen_file)
 804
 805     def save_console_title(self):
 806         if not self.params.get('consoletitle', False):
 807             return
 808         if self.params.get('simulate'):
 809             return
 810         if compat_os_name != 'nt' and 'TERM' in os.environ:
 811             # Save the title on stack
 812             self._write_string('\033[22;0t', self._screen_file)
 813
 814     def restore_console_title(self):
 815         if not self.params.get('consoletitle', False):
 816             return
 817         if self.params.get('simulate'):
 818             return
 819         if compat_os_name != 'nt' and 'TERM' in os.environ:
 820             # Restore the title from stack
 821             self._write_string('\033[23;0t', self._screen_file)
 822
 823     def __enter__(self):
 824         self.save_console_title()
 825         return self
 826
 827     def __exit__(self, *args):
 828         self.restore_console_title()
 829
 830         if self.params.get('cookiefile') is not None:
 831             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 832
 833     def trouble(self, message=None, tb=None, is_error=True):
 834         """Determine action to take when a download problem appears.
 835
 836         Depending on if the downloader has been configured to ignore
 837         download errors or not, this method may throw an exception or
 838         not when errors are found, after printing the message.
 839
 840         @param tb          If given, is additional traceback information
 841         @param is_error    Whether to raise error according to ignorerrors
 842         """
 843         if message is not None:
 844             self.to_stderr(message)
 845         if self.params.get('verbose'):
 846             if tb is None:
 847                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 848                     tb = ''
 849                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 850                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 851                     tb += encode_compat_str(traceback.format_exc())
 852                 else:
 853                     tb_data = traceback.format_list(traceback.extract_stack())
 854                     tb = ''.join(tb_data)
 855             if tb:
 856                 self.to_stderr(tb)
 857         if not is_error:
 858             return
 859         if not self.params.get('ignoreerrors'):
 860             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 861                 exc_info = sys.exc_info()[1].exc_info
 862             else:
 863                 exc_info = sys.exc_info()
 864             raise DownloadError(message, exc_info)
 865         self._download_retcode = 1
 866
 867     def to_screen(self, message, skip_eol=False):
 868         """Print message to stdout if not in quiet mode"""
 869         self.to_stdout(
 870             message, skip_eol, quiet=self.params.get('quiet', False))
 871
 872     class Styles(Enum):
 873         HEADERS = 'yellow'
 874         EMPHASIS = 'light blue'
 875         ID = 'green'
 876         DELIM = 'blue'
 877         ERROR = 'red'
 878         WARNING = 'yellow'
 879         SUPPRESS = 'light black'
 880
 881     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 882         if test_encoding:
 883             original_text = text
 884             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 885             text = text.encode(encoding, 'ignore').decode(encoding)
 886             if fallback is not None and text != original_text:
 887                 text = fallback
 888         if isinstance(f, self.Styles):
 889             f = f.value
 890         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 891
 892     def _format_screen(self, *args, **kwargs):
 893         return self._format_text(
 894             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 895
 896     def _format_err(self, *args, **kwargs):
 897         return self._format_text(
 898             self._err_file, self._allow_colors['err'], *args, **kwargs)
 899
 900     def report_warning(self, message, only_once=False):
 901         '''
 902         Print the message to stderr, it will be prefixed with 'WARNING:'
 903         If stderr is a tty file the 'WARNING:' will be colored
 904         '''
 905         if self.params.get('logger') is not None:
 906             self.params['logger'].warning(message)
 907         else:
 908             if self.params.get('no_warnings'):
 909                 return
 910             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 911
 912     def deprecation_warning(self, message):
 913         if self.params.get('logger') is not None:
 914             self.params['logger'].warning('DeprecationWarning: {message}')
 915         else:
 916             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 917
 918     def report_error(self, message, *args, **kwargs):
 919         '''
 920         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 921         in red if stderr is a tty file.
 922         '''
 923         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 924
 925     def write_debug(self, message, only_once=False):
 926         '''Log debug message or Print message to stderr'''
 927         if not self.params.get('verbose', False):
 928             return
 929         message = '[debug] %s' % message
 930         if self.params.get('logger'):
 931             self.params['logger'].debug(message)
 932         else:
 933             self.to_stderr(message, only_once)
 934
 935     def report_file_already_downloaded(self, file_name):
 936         """Report file has already been fully downloaded."""
 937         try:
 938             self.to_screen('[download] %s has already been downloaded' % file_name)
 939         except UnicodeEncodeError:
 940             self.to_screen('[download] The file has already been downloaded')
 941
 942     def report_file_delete(self, file_name):
 943         """Report that existing file will be deleted."""
 944         try:
 945             self.to_screen('Deleting existing file %s' % file_name)
 946         except UnicodeEncodeError:
 947             self.to_screen('Deleting existing file')
 948
 949     def raise_no_formats(self, info, forced=False):
 950         has_drm = info.get('__has_drm')
 951         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 952         expected = self.params.get('ignore_no_formats_error')
 953         if forced or not expected:
 954             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 955                                  expected=has_drm or expected)
 956         else:
 957             self.report_warning(msg)
 958
 959     def parse_outtmpl(self):
 960         outtmpl_dict = self.params.get('outtmpl', {})
 961         if not isinstance(outtmpl_dict, dict):
 962             outtmpl_dict = {'default': outtmpl_dict}
 963         # Remove spaces in the default template
 964         if self.params.get('restrictfilenames'):
 965             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 966         else:
 967             sanitize = lambda x: x
 968         outtmpl_dict.update({
 969             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 970             if outtmpl_dict.get(k) is None})
 971         for key, val in outtmpl_dict.items():
 972             if isinstance(val, bytes):
 973                 self.report_warning(
 974                     'Parameter outtmpl is bytes, but should be a unicode string. '
 975                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 976         return outtmpl_dict
 977
 978     def get_output_path(self, dir_type='', filename=None):
 979         paths = self.params.get('paths', {})
 980         assert isinstance(paths, dict)
 981         path = os.path.join(
 982             expand_path(paths.get('home', '').strip()),
 983             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 984             filename or '')
 985
 986         # Temporary fix for #4787
 987         # 'Treat' all problem characters by passing filename through preferredencoding
 988         # to workaround encoding issues with subprocess on python2 @ Windows
 989         if sys.version_info < (3, 0) and sys.platform == 'win32':
 990             path = encodeFilename(path, True).decode(preferredencoding())
 991         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 992
 993     @staticmethod
 994     def _outtmpl_expandpath(outtmpl):
 995         # expand_path translates '%%' into '%' and '$$' into '$'
 996         # correspondingly that is not what we want since we need to keep
 997         # '%%' intact for template dict substitution step. Working around
 998         # with boundary-alike separator hack.
 999         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1000         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1001
1002         # outtmpl should be expand_path'ed before template dict substitution
1003         # because meta fields may contain env variables we don't want to
1004         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1005         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1006         return expand_path(outtmpl).replace(sep, '')
1007
1008     @staticmethod
1009     def escape_outtmpl(outtmpl):
1010         ''' Escape any remaining strings like %s, %abc% etc. '''
1011         return re.sub(
1012             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1013             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1014             outtmpl)
1015
1016     @classmethod
1017     def validate_outtmpl(cls, outtmpl):
1018         ''' @return None or Exception object '''
1019         outtmpl = re.sub(
1020             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1021             lambda mobj: f'{mobj.group(0)[:-1]}s',
1022             cls._outtmpl_expandpath(outtmpl))
1023         try:
1024             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1025             return None
1026         except ValueError as err:
1027             return err
1028
1029     @staticmethod
1030     def _copy_infodict(info_dict):
1031         info_dict = dict(info_dict)
1032         for key in ('__original_infodict', '__postprocessors'):
1033             info_dict.pop(key, None)
1034         return info_dict
1035
1036     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1037         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1038         @param sanitize    Whether to sanitize the output as a filename.
1039                            For backward compatibility, a function can also be passed
1040         """
1041
1042         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1043
1044         info_dict = self._copy_infodict(info_dict)
1045         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1046             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1047             if info_dict.get('duration', None) is not None
1048             else None)
1049         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1050         info_dict['video_autonumber'] = self._num_videos
1051         if info_dict.get('resolution') is None:
1052             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1053
1054         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1055         # of %(field)s to %(field)0Nd for backward compatibility
1056         field_size_compat_map = {
1057             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1058             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1059             'autonumber': self.params.get('autonumber_size') or 5,
1060         }
1061
1062         TMPL_DICT = {}
1063         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1064         MATH_FUNCTIONS = {
1065             '+': float.__add__,
1066             '-': float.__sub__,
1067         }
1068         # Field is of the form key1.key2...
1069         # where keys (except first) can be string, int or slice
1070         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1071         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1072         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1073         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1074             (?P<negate>-)?
1075             (?P<fields>{field})
1076             (?P<maths>(?:{math_op}{math_field})*)
1077             (?:>(?P<strf_format>.+?))?
1078             (?P<alternate>(?<!\\),[^|&)]+)?
1079             (?:&(?P<replacement>.*?))?
1080             (?:\|(?P<default>.*?))?
1081             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1082
1083         def _traverse_infodict(k):
1084             k = k.split('.')
1085             if k[0] == '':
1086                 k.pop(0)
1087             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1088
1089         def get_value(mdict):
1090             # Object traversal
1091             value = _traverse_infodict(mdict['fields'])
1092             # Negative
1093             if mdict['negate']:
1094                 value = float_or_none(value)
1095                 if value is not None:
1096                     value *= -1
1097             # Do maths
1098             offset_key = mdict['maths']
1099             if offset_key:
1100                 value = float_or_none(value)
1101                 operator = None
1102                 while offset_key:
1103                     item = re.match(
1104                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1105                         offset_key).group(0)
1106                     offset_key = offset_key[len(item):]
1107                     if operator is None:
1108                         operator = MATH_FUNCTIONS[item]
1109                         continue
1110                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1111                     offset = float_or_none(item)
1112                     if offset is None:
1113                         offset = float_or_none(_traverse_infodict(item))
1114                     try:
1115                         value = operator(value, multiplier * offset)
1116                     except (TypeError, ZeroDivisionError):
1117                         return None
1118                     operator = None
1119             # Datetime formatting
1120             if mdict['strf_format']:
1121                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1122
1123             return value
1124
1125         na = self.params.get('outtmpl_na_placeholder', 'NA')
1126
1127         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1128             return sanitize_filename(str(value), restricted=restricted,
1129                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1130
1131         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1132         sanitize = bool(sanitize)
1133
1134         def _dumpjson_default(obj):
1135             if isinstance(obj, (set, LazyList)):
1136                 return list(obj)
1137             return repr(obj)
1138
1139         def create_key(outer_mobj):
1140             if not outer_mobj.group('has_key'):
1141                 return outer_mobj.group(0)
1142             key = outer_mobj.group('key')
1143             mobj = re.match(INTERNAL_FORMAT_RE, key)
1144             initial_field = mobj.group('fields') if mobj else ''
1145             value, replacement, default = None, None, na
1146             while mobj:
1147                 mobj = mobj.groupdict()
1148                 default = mobj['default'] if mobj['default'] is not None else default
1149                 value = get_value(mobj)
1150                 replacement = mobj['replacement']
1151                 if value is None and mobj['alternate']:
1152                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1153                 else:
1154                     break
1155
1156             fmt = outer_mobj.group('format')
1157             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1158                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1159
1160             value = default if value is None else value if replacement is None else replacement
1161
1162             flags = outer_mobj.group('conversion') or ''
1163             str_fmt = f'{fmt[:-1]}s'
1164             if fmt[-1] == 'l':  # list
1165                 delim = '\n' if '#' in flags else ', '
1166                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1167             elif fmt[-1] == 'j':  # json
1168                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1169             elif fmt[-1] == 'q':  # quoted
1170                 value = map(str, variadic(value) if '#' in flags else [value])
1171                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1172             elif fmt[-1] == 'B':  # bytes
1173                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1174                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1175             elif fmt[-1] == 'U':  # unicode normalized
1176                 value, fmt = unicodedata.normalize(
1177                     # "+" = compatibility equivalence, "#" = NFD
1178                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1179                     value), str_fmt
1180             elif fmt[-1] == 'D':  # decimal suffix
1181                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1182                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1183                                               factor=1024 if '#' in flags else 1000)
1184             elif fmt[-1] == 'S':  # filename sanitization
1185                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1186             elif fmt[-1] == 'c':
1187                 if value:
1188                     value = str(value)[0]
1189                 else:
1190                     fmt = str_fmt
1191             elif fmt[-1] not in 'rs':  # numeric
1192                 value = float_or_none(value)
1193                 if value is None:
1194                     value, fmt = default, 's'
1195
1196             if sanitize:
1197                 if fmt[-1] == 'r':
1198                     # If value is an object, sanitize might convert it to a string
1199                     # So we convert it to repr first
1200                     value, fmt = repr(value), str_fmt
1201                 if fmt[-1] in 'csr':
1202                     value = sanitizer(initial_field, value)
1203
1204             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1205             TMPL_DICT[key] = value
1206             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1207
1208         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1209
1210     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1211         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1212         return self.escape_outtmpl(outtmpl) % info_dict
1213
1214     def _prepare_filename(self, info_dict, tmpl_type='default'):
1215         try:
1216             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1217             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1218
1219             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1220             if filename and force_ext is not None:
1221                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1222
1223             # https://github.com/blackjack4494/youtube-dlc/issues/85
1224             trim_file_name = self.params.get('trim_file_name', False)
1225             if trim_file_name:
1226                 no_ext, *ext = filename.rsplit('.', 2)
1227                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1228
1229             return filename
1230         except ValueError as err:
1231             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1232             return None
1233
1234     def prepare_filename(self, info_dict, dir_type='', warn=False):
1235         """Generate the output filename."""
1236
1237         filename = self._prepare_filename(info_dict, dir_type or 'default')
1238         if not filename and dir_type not in ('', 'temp'):
1239             return ''
1240
1241         if warn:
1242             if not self.params.get('paths'):
1243                 pass
1244             elif filename == '-':
1245                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1246             elif os.path.isabs(filename):
1247                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1248         if filename == '-' or not filename:
1249             return filename
1250
1251         return self.get_output_path(dir_type, filename)
1252
1253     def _match_entry(self, info_dict, incomplete=False, silent=False):
1254         """ Returns None if the file should be downloaded """
1255
1256         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1257
1258         def check_filter():
1259             if 'title' in info_dict:
1260                 # This can happen when we're just evaluating the playlist
1261                 title = info_dict['title']
1262                 matchtitle = self.params.get('matchtitle', False)
1263                 if matchtitle:
1264                     if not re.search(matchtitle, title, re.IGNORECASE):
1265                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1266                 rejecttitle = self.params.get('rejecttitle', False)
1267                 if rejecttitle:
1268                     if re.search(rejecttitle, title, re.IGNORECASE):
1269                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1270             date = info_dict.get('upload_date')
1271             if date is not None:
1272                 dateRange = self.params.get('daterange', DateRange())
1273                 if date not in dateRange:
1274                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1275             view_count = info_dict.get('view_count')
1276             if view_count is not None:
1277                 min_views = self.params.get('min_views')
1278                 if min_views is not None and view_count < min_views:
1279                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1280                 max_views = self.params.get('max_views')
1281                 if max_views is not None and view_count > max_views:
1282                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1283             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1284                 return 'Skipping "%s" because it is age restricted' % video_title
1285
1286             match_filter = self.params.get('match_filter')
1287             if match_filter is not None:
1288                 try:
1289                     ret = match_filter(info_dict, incomplete=incomplete)
1290                 except TypeError:
1291                     # For backward compatibility
1292                     ret = None if incomplete else match_filter(info_dict)
1293                 if ret is not None:
1294                     return ret
1295             return None
1296
1297         if self.in_download_archive(info_dict):
1298             reason = '%s has already been recorded in the archive' % video_title
1299             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1300         else:
1301             reason = check_filter()
1302             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1303         if reason is not None:
1304             if not silent:
1305                 self.to_screen('[download] ' + reason)
1306             if self.params.get(break_opt, False):
1307                 raise break_err()
1308         return reason
1309
1310     @staticmethod
1311     def add_extra_info(info_dict, extra_info):
1312         '''Set the keys from extra_info in info dict if they are missing'''
1313         for key, value in extra_info.items():
1314             info_dict.setdefault(key, value)
1315
1316     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1317                      process=True, force_generic_extractor=False):
1318         """
1319         Return a list with a dictionary for each video extracted.
1320
1321         Arguments:
1322         url -- URL to extract
1323
1324         Keyword arguments:
1325         download -- whether to download videos during extraction
1326         ie_key -- extractor key hint
1327         extra_info -- dictionary containing the extra values to add to each result
1328         process -- whether to resolve all unresolved references (URLs, playlist items),
1329             must be True for download to work.
1330         force_generic_extractor -- force using the generic extractor
1331         """
1332
1333         if extra_info is None:
1334             extra_info = {}
1335
1336         if not ie_key and force_generic_extractor:
1337             ie_key = 'Generic'
1338
1339         if ie_key:
1340             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1341         else:
1342             ies = self._ies
1343
1344         for ie_key, ie in ies.items():
1345             if not ie.suitable(url):
1346                 continue
1347
1348             if not ie.working():
1349                 self.report_warning('The program functionality for this site has been marked as broken, '
1350                                     'and will probably not work.')
1351
1352             temp_id = ie.get_temp_id(url)
1353             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1354                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1355                 if self.params.get('break_on_existing', False):
1356                     raise ExistingVideoReached()
1357                 break
1358             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1359         else:
1360             self.report_error('no suitable InfoExtractor for URL %s' % url)
1361
1362     def __handle_extraction_exceptions(func):
1363         @functools.wraps(func)
1364         def wrapper(self, *args, **kwargs):
1365             while True:
1366                 try:
1367                     return func(self, *args, **kwargs)
1368                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1369                     raise
1370                 except ReExtractInfo as e:
1371                     if e.expected:
1372                         self.to_screen(f'{e}; Re-extracting data')
1373                     else:
1374                         self.to_stderr('\r')
1375                         self.report_warning(f'{e}; Re-extracting data')
1376                     continue
1377                 except GeoRestrictedError as e:
1378                     msg = e.msg
1379                     if e.countries:
1380                         msg += '\nThis video is available in %s.' % ', '.join(
1381                             map(ISO3166Utils.short2full, e.countries))
1382                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1383                     self.report_error(msg)
1384                 except ExtractorError as e:  # An error we somewhat expected
1385                     self.report_error(str(e), e.format_traceback())
1386                 except Exception as e:
1387                     if self.params.get('ignoreerrors'):
1388                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1389                     else:
1390                         raise
1391                 break
1392         return wrapper
1393
1394     def _wait_for_video(self, ie_result):
1395         if (not self.params.get('wait_for_video')
1396                 or ie_result.get('_type', 'video') != 'video'
1397                 or ie_result.get('formats') or ie_result.get('url')):
1398             return
1399
1400         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1401         last_msg = ''
1402
1403         def progress(msg):
1404             nonlocal last_msg
1405             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1406             last_msg = msg
1407
1408         min_wait, max_wait = self.params.get('wait_for_video')
1409         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1410         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1411             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1412             self.report_warning('Release time of video is not known')
1413         elif (diff or 0) <= 0:
1414             self.report_warning('Video should already be available according to extracted info')
1415         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1416         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1417
1418         wait_till = time.time() + diff
1419         try:
1420             while True:
1421                 diff = wait_till - time.time()
1422                 if diff <= 0:
1423                     progress('')
1424                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1425                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1426                 time.sleep(1)
1427         except KeyboardInterrupt:
1428             progress('')
1429             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1430         except BaseException as e:
1431             if not isinstance(e, ReExtractInfo):
1432                 self.to_screen('')
1433             raise
1434
1435     @__handle_extraction_exceptions
1436     def __extract_info(self, url, ie, download, extra_info, process):
1437         ie_result = ie.extract(url)
1438         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1439             return
1440         if isinstance(ie_result, list):
1441             # Backwards compatibility: old IE result format
1442             ie_result = {
1443                 '_type': 'compat_list',
1444                 'entries': ie_result,
1445             }
1446         if extra_info.get('original_url'):
1447             ie_result.setdefault('original_url', extra_info['original_url'])
1448         self.add_default_extra_info(ie_result, ie, url)
1449         if process:
1450             self._wait_for_video(ie_result)
1451             return self.process_ie_result(ie_result, download, extra_info)
1452         else:
1453             return ie_result
1454
1455     def add_default_extra_info(self, ie_result, ie, url):
1456         if url is not None:
1457             self.add_extra_info(ie_result, {
1458                 'webpage_url': url,
1459                 'original_url': url,
1460                 'webpage_url_basename': url_basename(url),
1461                 'webpage_url_domain': get_domain(url),
1462             })
1463         if ie is not None:
1464             self.add_extra_info(ie_result, {
1465                 'extractor': ie.IE_NAME,
1466                 'extractor_key': ie.ie_key(),
1467             })
1468
1469     def process_ie_result(self, ie_result, download=True, extra_info=None):
1470         """
1471         Take the result of the ie(may be modified) and resolve all unresolved
1472         references (URLs, playlist items).
1473
1474         It will also download the videos if 'download'.
1475         Returns the resolved ie_result.
1476         """
1477         if extra_info is None:
1478             extra_info = {}
1479         result_type = ie_result.get('_type', 'video')
1480
1481         if result_type in ('url', 'url_transparent'):
1482             ie_result['url'] = sanitize_url(ie_result['url'])
1483             if ie_result.get('original_url'):
1484                 extra_info.setdefault('original_url', ie_result['original_url'])
1485
1486             extract_flat = self.params.get('extract_flat', False)
1487             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1488                     or extract_flat is True):
1489                 info_copy = ie_result.copy()
1490                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1491                 if ie and not ie_result.get('id'):
1492                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1493                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1494                 self.add_extra_info(info_copy, extra_info)
1495                 info_copy, _ = self.pre_process(info_copy)
1496                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1497                 if self.params.get('force_write_download_archive', False):
1498                     self.record_download_archive(info_copy)
1499                 return ie_result
1500
1501         if result_type == 'video':
1502             self.add_extra_info(ie_result, extra_info)
1503             ie_result = self.process_video_result(ie_result, download=download)
1504             additional_urls = (ie_result or {}).get('additional_urls')
1505             if additional_urls:
1506                 # TODO: Improve MetadataParserPP to allow setting a list
1507                 if isinstance(additional_urls, compat_str):
1508                     additional_urls = [additional_urls]
1509                 self.to_screen(
1510                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1511                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1512                 ie_result['additional_entries'] = [
1513                     self.extract_info(
1514                         url, download, extra_info=extra_info,
1515                         force_generic_extractor=self.params.get('force_generic_extractor'))
1516                     for url in additional_urls
1517                 ]
1518             return ie_result
1519         elif result_type == 'url':
1520             # We have to add extra_info to the results because it may be
1521             # contained in a playlist
1522             return self.extract_info(
1523                 ie_result['url'], download,
1524                 ie_key=ie_result.get('ie_key'),
1525                 extra_info=extra_info)
1526         elif result_type == 'url_transparent':
1527             # Use the information from the embedding page
1528             info = self.extract_info(
1529                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1530                 extra_info=extra_info, download=False, process=False)
1531
1532             # extract_info may return None when ignoreerrors is enabled and
1533             # extraction failed with an error, don't crash and return early
1534             # in this case
1535             if not info:
1536                 return info
1537
1538             force_properties = dict(
1539                 (k, v) for k, v in ie_result.items() if v is not None)
1540             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1541                 if f in force_properties:
1542                     del force_properties[f]
1543             new_result = info.copy()
1544             new_result.update(force_properties)
1545
1546             # Extracted info may not be a video result (i.e.
1547             # info.get('_type', 'video') != video) but rather an url or
1548             # url_transparent. In such cases outer metadata (from ie_result)
1549             # should be propagated to inner one (info). For this to happen
1550             # _type of info should be overridden with url_transparent. This
1551             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1552             if new_result.get('_type') == 'url':
1553                 new_result['_type'] = 'url_transparent'
1554
1555             return self.process_ie_result(
1556                 new_result, download=download, extra_info=extra_info)
1557         elif result_type in ('playlist', 'multi_video'):
1558             # Protect from infinite recursion due to recursively nested playlists
1559             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1560             webpage_url = ie_result['webpage_url']
1561             if webpage_url in self._playlist_urls:
1562                 self.to_screen(
1563                     '[download] Skipping already downloaded playlist: %s'
1564                     % ie_result.get('title') or ie_result.get('id'))
1565                 return
1566
1567             self._playlist_level += 1
1568             self._playlist_urls.add(webpage_url)
1569             self._sanitize_thumbnails(ie_result)
1570             try:
1571                 return self.__process_playlist(ie_result, download)
1572             finally:
1573                 self._playlist_level -= 1
1574                 if not self._playlist_level:
1575                     self._playlist_urls.clear()
1576         elif result_type == 'compat_list':
1577             self.report_warning(
1578                 'Extractor %s returned a compat_list result. '
1579                 'It needs to be updated.' % ie_result.get('extractor'))
1580
1581             def _fixup(r):
1582                 self.add_extra_info(r, {
1583                     'extractor': ie_result['extractor'],
1584                     'webpage_url': ie_result['webpage_url'],
1585                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1586                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1587                     'extractor_key': ie_result['extractor_key'],
1588                 })
1589                 return r
1590             ie_result['entries'] = [
1591                 self.process_ie_result(_fixup(r), download, extra_info)
1592                 for r in ie_result['entries']
1593             ]
1594             return ie_result
1595         else:
1596             raise Exception('Invalid result type: %s' % result_type)
1597
1598     def _ensure_dir_exists(self, path):
1599         return make_dir(path, self.report_error)
1600
1601     @staticmethod
1602     def _playlist_infodict(ie_result, **kwargs):
1603         return {
1604             **ie_result,
1605             'playlist': ie_result.get('title') or ie_result.get('id'),
1606             'playlist_id': ie_result.get('id'),
1607             'playlist_title': ie_result.get('title'),
1608             'playlist_uploader': ie_result.get('uploader'),
1609             'playlist_uploader_id': ie_result.get('uploader_id'),
1610             'playlist_index': 0,
1611             **kwargs,
1612         }
1613
1614     def __process_playlist(self, ie_result, download):
1615         # We process each entry in the playlist
1616         playlist = ie_result.get('title') or ie_result.get('id')
1617         self.to_screen('[download] Downloading playlist: %s' % playlist)
1618
1619         if 'entries' not in ie_result:
1620             raise EntryNotInPlaylist('There are no entries')
1621
1622         MissingEntry = object()
1623         incomplete_entries = bool(ie_result.get('requested_entries'))
1624         if incomplete_entries:
1625             def fill_missing_entries(entries, indices):
1626                 ret = [MissingEntry] * max(indices)
1627                 for i, entry in zip(indices, entries):
1628                     ret[i - 1] = entry
1629                 return ret
1630             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1631
1632         playlist_results = []
1633
1634         playliststart = self.params.get('playliststart', 1)
1635         playlistend = self.params.get('playlistend')
1636         # For backwards compatibility, interpret -1 as whole list
1637         if playlistend == -1:
1638             playlistend = None
1639
1640         playlistitems_str = self.params.get('playlist_items')
1641         playlistitems = None
1642         if playlistitems_str is not None:
1643             def iter_playlistitems(format):
1644                 for string_segment in format.split(','):
1645                     if '-' in string_segment:
1646                         start, end = string_segment.split('-')
1647                         for item in range(int(start), int(end) + 1):
1648                             yield int(item)
1649                     else:
1650                         yield int(string_segment)
1651             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1652
1653         ie_entries = ie_result['entries']
1654         if isinstance(ie_entries, list):
1655             playlist_count = len(ie_entries)
1656             msg = f'Collected {playlist_count} videos; downloading %d of them'
1657             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1658
1659             def get_entry(i):
1660                 return ie_entries[i - 1]
1661         else:
1662             msg = 'Downloading %d videos'
1663             if not isinstance(ie_entries, (PagedList, LazyList)):
1664                 ie_entries = LazyList(ie_entries)
1665
1666             def get_entry(i):
1667                 return YoutubeDL.__handle_extraction_exceptions(
1668                     lambda self, i: ie_entries[i - 1]
1669                 )(self, i)
1670
1671         entries, broken = [], False
1672         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1673         for i in items:
1674             if i == 0:
1675                 continue
1676             if playlistitems is None and playlistend is not None and playlistend < i:
1677                 break
1678             entry = None
1679             try:
1680                 entry = get_entry(i)
1681                 if entry is MissingEntry:
1682                     raise EntryNotInPlaylist()
1683             except (IndexError, EntryNotInPlaylist):
1684                 if incomplete_entries:
1685                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1686                 elif not playlistitems:
1687                     break
1688             entries.append(entry)
1689             try:
1690                 if entry is not None:
1691                     self._match_entry(entry, incomplete=True, silent=True)
1692             except (ExistingVideoReached, RejectedVideoReached):
1693                 broken = True
1694                 break
1695         ie_result['entries'] = entries
1696
1697         # Save playlist_index before re-ordering
1698         entries = [
1699             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1700             for i, entry in enumerate(entries, 1)
1701             if entry is not None]
1702         n_entries = len(entries)
1703
1704         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1705             ie_result['playlist_count'] = n_entries
1706
1707         if not playlistitems and (playliststart != 1 or playlistend):
1708             playlistitems = list(range(playliststart, playliststart + n_entries))
1709         ie_result['requested_entries'] = playlistitems
1710
1711         _infojson_written = False
1712         write_playlist_files = self.params.get('allow_playlist_files', True)
1713         if write_playlist_files and self.params.get('list_thumbnails'):
1714             self.list_thumbnails(ie_result)
1715         if write_playlist_files and not self.params.get('simulate'):
1716             ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1717             _infojson_written = self._write_info_json(
1718                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1719             if _infojson_written is None:
1720                 return
1721             if self._write_description('playlist', ie_result,
1722                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1723                 return
1724             # TODO: This should be passed to ThumbnailsConvertor if necessary
1725             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1726
1727         if self.params.get('playlistreverse', False):
1728             entries = entries[::-1]
1729         if self.params.get('playlistrandom', False):
1730             random.shuffle(entries)
1731
1732         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1733
1734         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1735         failures = 0
1736         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1737         for i, entry_tuple in enumerate(entries, 1):
1738             playlist_index, entry = entry_tuple
1739             if 'playlist-index' in self.params.get('compat_opts', []):
1740                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1741             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1742             # This __x_forwarded_for_ip thing is a bit ugly but requires
1743             # minimal changes
1744             if x_forwarded_for:
1745                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1746             extra = {
1747                 'n_entries': n_entries,
1748                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1749                 'playlist_count': ie_result.get('playlist_count'),
1750                 'playlist_index': playlist_index,
1751                 'playlist_autonumber': i,
1752                 'playlist': playlist,
1753                 'playlist_id': ie_result.get('id'),
1754                 'playlist_title': ie_result.get('title'),
1755                 'playlist_uploader': ie_result.get('uploader'),
1756                 'playlist_uploader_id': ie_result.get('uploader_id'),
1757                 'extractor': ie_result['extractor'],
1758                 'webpage_url': ie_result['webpage_url'],
1759                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1760                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1761                 'extractor_key': ie_result['extractor_key'],
1762             }
1763
1764             if self._match_entry(entry, incomplete=True) is not None:
1765                 continue
1766
1767             entry_result = self.__process_iterable_entry(entry, download, extra)
1768             if not entry_result:
1769                 failures += 1
1770             if failures >= max_failures:
1771                 self.report_error(
1772                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1773                 break
1774             playlist_results.append(entry_result)
1775         ie_result['entries'] = playlist_results
1776
1777         # Write the updated info to json
1778         if _infojson_written and self._write_info_json(
1779                 'updated playlist', ie_result,
1780                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1781             return
1782
1783         ie_result = self.run_all_pps('playlist', ie_result)
1784         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1785         return ie_result
1786
1787     @__handle_extraction_exceptions
1788     def __process_iterable_entry(self, entry, download, extra_info):
1789         return self.process_ie_result(
1790             entry, download=download, extra_info=extra_info)
1791
1792     def _build_format_filter(self, filter_spec):
1793         " Returns a function to filter the formats according to the filter_spec "
1794
1795         OPERATORS = {
1796             '<': operator.lt,
1797             '<=': operator.le,
1798             '>': operator.gt,
1799             '>=': operator.ge,
1800             '=': operator.eq,
1801             '!=': operator.ne,
1802         }
1803         operator_rex = re.compile(r'''(?x)\s*
1804             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1805             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1806             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1807             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1808         m = operator_rex.fullmatch(filter_spec)
1809         if m:
1810             try:
1811                 comparison_value = int(m.group('value'))
1812             except ValueError:
1813                 comparison_value = parse_filesize(m.group('value'))
1814                 if comparison_value is None:
1815                     comparison_value = parse_filesize(m.group('value') + 'B')
1816                 if comparison_value is None:
1817                     raise ValueError(
1818                         'Invalid value %r in format specification %r' % (
1819                             m.group('value'), filter_spec))
1820             op = OPERATORS[m.group('op')]
1821
1822         if not m:
1823             STR_OPERATORS = {
1824                 '=': operator.eq,
1825                 '^=': lambda attr, value: attr.startswith(value),
1826                 '$=': lambda attr, value: attr.endswith(value),
1827                 '*=': lambda attr, value: value in attr,
1828             }
1829             str_operator_rex = re.compile(r'''(?x)\s*
1830                 (?P<key>[a-zA-Z0-9._-]+)\s*
1831                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1832                 (?P<value>[a-zA-Z0-9._-]+)\s*
1833                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1834             m = str_operator_rex.fullmatch(filter_spec)
1835             if m:
1836                 comparison_value = m.group('value')
1837                 str_op = STR_OPERATORS[m.group('op')]
1838                 if m.group('negation'):
1839                     op = lambda attr, value: not str_op(attr, value)
1840                 else:
1841                     op = str_op
1842
1843         if not m:
1844             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1845
1846         def _filter(f):
1847             actual_value = f.get(m.group('key'))
1848             if actual_value is None:
1849                 return m.group('none_inclusive')
1850             return op(actual_value, comparison_value)
1851         return _filter
1852
1853     def _check_formats(self, formats):
1854         for f in formats:
1855             self.to_screen('[info] Testing format %s' % f['format_id'])
1856             path = self.get_output_path('temp')
1857             if not self._ensure_dir_exists(f'{path}/'):
1858                 continue
1859             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1860             temp_file.close()
1861             try:
1862                 success, _ = self.dl(temp_file.name, f, test=True)
1863             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1864                 success = False
1865             finally:
1866                 if os.path.exists(temp_file.name):
1867                     try:
1868                         os.remove(temp_file.name)
1869                     except OSError:
1870                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1871             if success:
1872                 yield f
1873             else:
1874                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1875
1876     def _default_format_spec(self, info_dict, download=True):
1877
1878         def can_merge():
1879             merger = FFmpegMergerPP(self)
1880             return merger.available and merger.can_merge()
1881
1882         prefer_best = (
1883             not self.params.get('simulate')
1884             and download
1885             and (
1886                 not can_merge()
1887                 or info_dict.get('is_live', False)
1888                 or self.outtmpl_dict['default'] == '-'))
1889         compat = (
1890             prefer_best
1891             or self.params.get('allow_multiple_audio_streams', False)
1892             or 'format-spec' in self.params.get('compat_opts', []))
1893
1894         return (
1895             'best/bestvideo+bestaudio' if prefer_best
1896             else 'bestvideo*+bestaudio/best' if not compat
1897             else 'bestvideo+bestaudio/best')
1898
1899     def build_format_selector(self, format_spec):
1900         def syntax_error(note, start):
1901             message = (
1902                 'Invalid format specification: '
1903                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1904             return SyntaxError(message)
1905
1906         PICKFIRST = 'PICKFIRST'
1907         MERGE = 'MERGE'
1908         SINGLE = 'SINGLE'
1909         GROUP = 'GROUP'
1910         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1911
1912         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1913                                   'video': self.params.get('allow_multiple_video_streams', False)}
1914
1915         check_formats = self.params.get('check_formats') == 'selected'
1916
1917         def _parse_filter(tokens):
1918             filter_parts = []
1919             for type, string, start, _, _ in tokens:
1920                 if type == tokenize.OP and string == ']':
1921                     return ''.join(filter_parts)
1922                 else:
1923                     filter_parts.append(string)
1924
1925         def _remove_unused_ops(tokens):
1926             # Remove operators that we don't use and join them with the surrounding strings
1927             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1928             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1929             last_string, last_start, last_end, last_line = None, None, None, None
1930             for type, string, start, end, line in tokens:
1931                 if type == tokenize.OP and string == '[':
1932                     if last_string:
1933                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1934                         last_string = None
1935                     yield type, string, start, end, line
1936                     # everything inside brackets will be handled by _parse_filter
1937                     for type, string, start, end, line in tokens:
1938                         yield type, string, start, end, line
1939                         if type == tokenize.OP and string == ']':
1940                             break
1941                 elif type == tokenize.OP and string in ALLOWED_OPS:
1942                     if last_string:
1943                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1944                         last_string = None
1945                     yield type, string, start, end, line
1946                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1947                     if not last_string:
1948                         last_string = string
1949                         last_start = start
1950                         last_end = end
1951                     else:
1952                         last_string += string
1953             if last_string:
1954                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1955
1956         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1957             selectors = []
1958             current_selector = None
1959             for type, string, start, _, _ in tokens:
1960                 # ENCODING is only defined in python 3.x
1961                 if type == getattr(tokenize, 'ENCODING', None):
1962                     continue
1963                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1964                     current_selector = FormatSelector(SINGLE, string, [])
1965                 elif type == tokenize.OP:
1966                     if string == ')':
1967                         if not inside_group:
1968                             # ')' will be handled by the parentheses group
1969                             tokens.restore_last_token()
1970                         break
1971                     elif inside_merge and string in ['/', ',']:
1972                         tokens.restore_last_token()
1973                         break
1974                     elif inside_choice and string == ',':
1975                         tokens.restore_last_token()
1976                         break
1977                     elif string == ',':
1978                         if not current_selector:
1979                             raise syntax_error('"," must follow a format selector', start)
1980                         selectors.append(current_selector)
1981                         current_selector = None
1982                     elif string == '/':
1983                         if not current_selector:
1984                             raise syntax_error('"/" must follow a format selector', start)
1985                         first_choice = current_selector
1986                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1987                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1988                     elif string == '[':
1989                         if not current_selector:
1990                             current_selector = FormatSelector(SINGLE, 'best', [])
1991                         format_filter = _parse_filter(tokens)
1992                         current_selector.filters.append(format_filter)
1993                     elif string == '(':
1994                         if current_selector:
1995                             raise syntax_error('Unexpected "("', start)
1996                         group = _parse_format_selection(tokens, inside_group=True)
1997                         current_selector = FormatSelector(GROUP, group, [])
1998                     elif string == '+':
1999                         if not current_selector:
2000                             raise syntax_error('Unexpected "+"', start)
2001                         selector_1 = current_selector
2002                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2003                         if not selector_2:
2004                             raise syntax_error('Expected a selector', start)
2005                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2006                     else:
2007                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2008                 elif type == tokenize.ENDMARKER:
2009                     break
2010             if current_selector:
2011                 selectors.append(current_selector)
2012             return selectors
2013
2014         def _merge(formats_pair):
2015             format_1, format_2 = formats_pair
2016
2017             formats_info = []
2018             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2019             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2020
2021             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2022                 get_no_more = {'video': False, 'audio': False}
2023                 for (i, fmt_info) in enumerate(formats_info):
2024                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2025                         formats_info.pop(i)
2026                         continue
2027                     for aud_vid in ['audio', 'video']:
2028                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2029                             if get_no_more[aud_vid]:
2030                                 formats_info.pop(i)
2031                                 break
2032                             get_no_more[aud_vid] = True
2033
2034             if len(formats_info) == 1:
2035                 return formats_info[0]
2036
2037             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2038             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2039
2040             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2041             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2042
2043             output_ext = self.params.get('merge_output_format')
2044             if not output_ext:
2045                 if the_only_video:
2046                     output_ext = the_only_video['ext']
2047                 elif the_only_audio and not video_fmts:
2048                     output_ext = the_only_audio['ext']
2049                 else:
2050                     output_ext = 'mkv'
2051
2052             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2053
2054             new_dict = {
2055                 'requested_formats': formats_info,
2056                 'format': '+'.join(filtered('format')),
2057                 'format_id': '+'.join(filtered('format_id')),
2058                 'ext': output_ext,
2059                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2060                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2061                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2062                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2063                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2064             }
2065
2066             if the_only_video:
2067                 new_dict.update({
2068                     'width': the_only_video.get('width'),
2069                     'height': the_only_video.get('height'),
2070                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2071                     'fps': the_only_video.get('fps'),
2072                     'dynamic_range': the_only_video.get('dynamic_range'),
2073                     'vcodec': the_only_video.get('vcodec'),
2074                     'vbr': the_only_video.get('vbr'),
2075                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2076                 })
2077
2078             if the_only_audio:
2079                 new_dict.update({
2080                     'acodec': the_only_audio.get('acodec'),
2081                     'abr': the_only_audio.get('abr'),
2082                     'asr': the_only_audio.get('asr'),
2083                 })
2084
2085             return new_dict
2086
2087         def _check_formats(formats):
2088             if not check_formats:
2089                 yield from formats
2090                 return
2091             yield from self._check_formats(formats)
2092
2093         def _build_selector_function(selector):
2094             if isinstance(selector, list):  # ,
2095                 fs = [_build_selector_function(s) for s in selector]
2096
2097                 def selector_function(ctx):
2098                     for f in fs:
2099                         yield from f(ctx)
2100                 return selector_function
2101
2102             elif selector.type == GROUP:  # ()
2103                 selector_function = _build_selector_function(selector.selector)
2104
2105             elif selector.type == PICKFIRST:  # /
2106                 fs = [_build_selector_function(s) for s in selector.selector]
2107
2108                 def selector_function(ctx):
2109                     for f in fs:
2110                         picked_formats = list(f(ctx))
2111                         if picked_formats:
2112                             return picked_formats
2113                     return []
2114
2115             elif selector.type == MERGE:  # +
2116                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2117
2118                 def selector_function(ctx):
2119                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2120                         yield _merge(pair)
2121
2122             elif selector.type == SINGLE:  # atom
2123                 format_spec = selector.selector or 'best'
2124
2125                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2126                 if format_spec == 'all':
2127                     def selector_function(ctx):
2128                         yield from _check_formats(ctx['formats'][::-1])
2129                 elif format_spec == 'mergeall':
2130                     def selector_function(ctx):
2131                         formats = list(_check_formats(ctx['formats']))
2132                         if not formats:
2133                             return
2134                         merged_format = formats[-1]
2135                         for f in formats[-2::-1]:
2136                             merged_format = _merge((merged_format, f))
2137                         yield merged_format
2138
2139                 else:
2140                     format_fallback, format_reverse, format_idx = False, True, 1
2141                     mobj = re.match(
2142                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2143                         format_spec)
2144                     if mobj is not None:
2145                         format_idx = int_or_none(mobj.group('n'), default=1)
2146                         format_reverse = mobj.group('bw')[0] == 'b'
2147                         format_type = (mobj.group('type') or [None])[0]
2148                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2149                         format_modified = mobj.group('mod') is not None
2150
2151                         format_fallback = not format_type and not format_modified  # for b, w
2152                         _filter_f = (
2153                             (lambda f: f.get('%scodec' % format_type) != 'none')
2154                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2155                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2156                             if format_type  # bv, ba, wv, wa
2157                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2158                             if not format_modified  # b, w
2159                             else lambda f: True)  # b*, w*
2160                         filter_f = lambda f: _filter_f(f) and (
2161                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2162                     else:
2163                         if format_spec in self._format_selection_exts['audio']:
2164                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2165                         elif format_spec in self._format_selection_exts['video']:
2166                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2167                         elif format_spec in self._format_selection_exts['storyboards']:
2168                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2169                         else:
2170                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2171
2172                     def selector_function(ctx):
2173                         formats = list(ctx['formats'])
2174                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2175                         if format_fallback and ctx['incomplete_formats'] and not matches:
2176                             # for extractors with incomplete formats (audio only (soundcloud)
2177                             # or video only (imgur)) best/worst will fallback to
2178                             # best/worst {video,audio}-only format
2179                             matches = formats
2180                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2181                         try:
2182                             yield matches[format_idx - 1]
2183                         except IndexError:
2184                             return
2185
2186             filters = [self._build_format_filter(f) for f in selector.filters]
2187
2188             def final_selector(ctx):
2189                 ctx_copy = dict(ctx)
2190                 for _filter in filters:
2191                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2192                 return selector_function(ctx_copy)
2193             return final_selector
2194
2195         stream = io.BytesIO(format_spec.encode('utf-8'))
2196         try:
2197             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2198         except tokenize.TokenError:
2199             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2200
2201         class TokenIterator(object):
2202             def __init__(self, tokens):
2203                 self.tokens = tokens
2204                 self.counter = 0
2205
2206             def __iter__(self):
2207                 return self
2208
2209             def __next__(self):
2210                 if self.counter >= len(self.tokens):
2211                     raise StopIteration()
2212                 value = self.tokens[self.counter]
2213                 self.counter += 1
2214                 return value
2215
2216             next = __next__
2217
2218             def restore_last_token(self):
2219                 self.counter -= 1
2220
2221         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2222         return _build_selector_function(parsed_selector)
2223
2224     def _calc_headers(self, info_dict):
2225         res = std_headers.copy()
2226
2227         add_headers = info_dict.get('http_headers')
2228         if add_headers:
2229             res.update(add_headers)
2230
2231         cookies = self._calc_cookies(info_dict)
2232         if cookies:
2233             res['Cookie'] = cookies
2234
2235         if 'X-Forwarded-For' not in res:
2236             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2237             if x_forwarded_for_ip:
2238                 res['X-Forwarded-For'] = x_forwarded_for_ip
2239
2240         return res
2241
2242     def _calc_cookies(self, info_dict):
2243         pr = sanitized_Request(info_dict['url'])
2244         self.cookiejar.add_cookie_header(pr)
2245         return pr.get_header('Cookie')
2246
2247     def _sort_thumbnails(self, thumbnails):
2248         thumbnails.sort(key=lambda t: (
2249             t.get('preference') if t.get('preference') is not None else -1,
2250             t.get('width') if t.get('width') is not None else -1,
2251             t.get('height') if t.get('height') is not None else -1,
2252             t.get('id') if t.get('id') is not None else '',
2253             t.get('url')))
2254
2255     def _sanitize_thumbnails(self, info_dict):
2256         thumbnails = info_dict.get('thumbnails')
2257         if thumbnails is None:
2258             thumbnail = info_dict.get('thumbnail')
2259             if thumbnail:
2260                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2261         if not thumbnails:
2262             return
2263
2264         def check_thumbnails(thumbnails):
2265             for t in thumbnails:
2266                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2267                 try:
2268                     self.urlopen(HEADRequest(t['url']))
2269                 except network_exceptions as err:
2270                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2271                     continue
2272                 yield t
2273
2274         self._sort_thumbnails(thumbnails)
2275         for i, t in enumerate(thumbnails):
2276             if t.get('id') is None:
2277                 t['id'] = '%d' % i
2278             if t.get('width') and t.get('height'):
2279                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2280             t['url'] = sanitize_url(t['url'])
2281
2282         if self.params.get('check_formats') is True:
2283             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2284         else:
2285             info_dict['thumbnails'] = thumbnails
2286
2287     def process_video_result(self, info_dict, download=True):
2288         assert info_dict.get('_type', 'video') == 'video'
2289         self._num_videos += 1
2290
2291         if 'id' not in info_dict:
2292             raise ExtractorError('Missing "id" field in extractor result')
2293         if 'title' not in info_dict:
2294             raise ExtractorError('Missing "title" field in extractor result',
2295                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2296
2297         def report_force_conversion(field, field_not, conversion):
2298             self.report_warning(
2299                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2300                 % (field, field_not, conversion))
2301
2302         def sanitize_string_field(info, string_field):
2303             field = info.get(string_field)
2304             if field is None or isinstance(field, compat_str):
2305                 return
2306             report_force_conversion(string_field, 'a string', 'string')
2307             info[string_field] = compat_str(field)
2308
2309         def sanitize_numeric_fields(info):
2310             for numeric_field in self._NUMERIC_FIELDS:
2311                 field = info.get(numeric_field)
2312                 if field is None or isinstance(field, compat_numeric_types):
2313                     continue
2314                 report_force_conversion(numeric_field, 'numeric', 'int')
2315                 info[numeric_field] = int_or_none(field)
2316
2317         sanitize_string_field(info_dict, 'id')
2318         sanitize_numeric_fields(info_dict)
2319
2320         if 'playlist' not in info_dict:
2321             # It isn't part of a playlist
2322             info_dict['playlist'] = None
2323             info_dict['playlist_index'] = None
2324
2325         self._sanitize_thumbnails(info_dict)
2326
2327         thumbnail = info_dict.get('thumbnail')
2328         thumbnails = info_dict.get('thumbnails')
2329         if thumbnail:
2330             info_dict['thumbnail'] = sanitize_url(thumbnail)
2331         elif thumbnails:
2332             info_dict['thumbnail'] = thumbnails[-1]['url']
2333
2334         if info_dict.get('display_id') is None and 'id' in info_dict:
2335             info_dict['display_id'] = info_dict['id']
2336
2337         if info_dict.get('duration') is not None:
2338             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2339
2340         for ts_key, date_key in (
2341                 ('timestamp', 'upload_date'),
2342                 ('release_timestamp', 'release_date'),
2343                 ('modified_timestamp', 'modified_date'),
2344         ):
2345             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2346                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2347                 # see http://bugs.python.org/issue1646728)
2348                 try:
2349                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2350                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2351                 except (ValueError, OverflowError, OSError):
2352                     pass
2353
2354         live_keys = ('is_live', 'was_live')
2355         live_status = info_dict.get('live_status')
2356         if live_status is None:
2357             for key in live_keys:
2358                 if info_dict.get(key) is False:
2359                     continue
2360                 if info_dict.get(key):
2361                     live_status = key
2362                 break
2363             if all(info_dict.get(key) is False for key in live_keys):
2364                 live_status = 'not_live'
2365         if live_status:
2366             info_dict['live_status'] = live_status
2367             for key in live_keys:
2368                 if info_dict.get(key) is None:
2369                     info_dict[key] = (live_status == key)
2370
2371         # Auto generate title fields corresponding to the *_number fields when missing
2372         # in order to always have clean titles. This is very common for TV series.
2373         for field in ('chapter', 'season', 'episode'):
2374             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2375                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2376
2377         for cc_kind in ('subtitles', 'automatic_captions'):
2378             cc = info_dict.get(cc_kind)
2379             if cc:
2380                 for _, subtitle in cc.items():
2381                     for subtitle_format in subtitle:
2382                         if subtitle_format.get('url'):
2383                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2384                         if subtitle_format.get('ext') is None:
2385                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2386
2387         automatic_captions = info_dict.get('automatic_captions')
2388         subtitles = info_dict.get('subtitles')
2389
2390         info_dict['requested_subtitles'] = self.process_subtitles(
2391             info_dict['id'], subtitles, automatic_captions)
2392
2393         if info_dict.get('formats') is None:
2394             # There's only one format available
2395             formats = [info_dict]
2396         else:
2397             formats = info_dict['formats']
2398
2399         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2400         if not self.params.get('allow_unplayable_formats'):
2401             formats = [f for f in formats if not f.get('has_drm')]
2402
2403         # backward compatibility
2404         info_dict['fulltitle'] = info_dict['title']
2405
2406         if info_dict.get('is_live'):
2407             get_from_start = bool(self.params.get('live_from_start'))
2408             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2409             if not get_from_start:
2410                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2411
2412         if not formats:
2413             self.raise_no_formats(info_dict)
2414
2415         def is_wellformed(f):
2416             url = f.get('url')
2417             if not url:
2418                 self.report_warning(
2419                     '"url" field is missing or empty - skipping format, '
2420                     'there is an error in extractor')
2421                 return False
2422             if isinstance(url, bytes):
2423                 sanitize_string_field(f, 'url')
2424             return True
2425
2426         # Filter out malformed formats for better extraction robustness
2427         formats = list(filter(is_wellformed, formats))
2428
2429         formats_dict = {}
2430
2431         # We check that all the formats have the format and format_id fields
2432         for i, format in enumerate(formats):
2433             sanitize_string_field(format, 'format_id')
2434             sanitize_numeric_fields(format)
2435             format['url'] = sanitize_url(format['url'])
2436             if not format.get('format_id'):
2437                 format['format_id'] = compat_str(i)
2438             else:
2439                 # Sanitize format_id from characters used in format selector expression
2440                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2441             format_id = format['format_id']
2442             if format_id not in formats_dict:
2443                 formats_dict[format_id] = []
2444             formats_dict[format_id].append(format)
2445
2446         # Make sure all formats have unique format_id
2447         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2448         for format_id, ambiguous_formats in formats_dict.items():
2449             ambigious_id = len(ambiguous_formats) > 1
2450             for i, format in enumerate(ambiguous_formats):
2451                 if ambigious_id:
2452                     format['format_id'] = '%s-%d' % (format_id, i)
2453                 if format.get('ext') is None:
2454                     format['ext'] = determine_ext(format['url']).lower()
2455                 # Ensure there is no conflict between id and ext in format selection
2456                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2457                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2458                     format['format_id'] = 'f%s' % format['format_id']
2459
2460         for i, format in enumerate(formats):
2461             if format.get('format') is None:
2462                 format['format'] = '{id} - {res}{note}'.format(
2463                     id=format['format_id'],
2464                     res=self.format_resolution(format),
2465                     note=format_field(format, 'format_note', ' (%s)'),
2466                 )
2467             if format.get('protocol') is None:
2468                 format['protocol'] = determine_protocol(format)
2469             if format.get('resolution') is None:
2470                 format['resolution'] = self.format_resolution(format, default=None)
2471             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2472                 format['dynamic_range'] = 'SDR'
2473             if (info_dict.get('duration') and format.get('tbr')
2474                     and not format.get('filesize') and not format.get('filesize_approx')):
2475                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2476
2477             # Add HTTP headers, so that external programs can use them from the
2478             # json output
2479             full_format_info = info_dict.copy()
2480             full_format_info.update(format)
2481             format['http_headers'] = self._calc_headers(full_format_info)
2482         # Remove private housekeeping stuff
2483         if '__x_forwarded_for_ip' in info_dict:
2484             del info_dict['__x_forwarded_for_ip']
2485
2486         # TODO Central sorting goes here
2487
2488         if self.params.get('check_formats') is True:
2489             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2490
2491         if not formats or formats[0] is not info_dict:
2492             # only set the 'formats' fields if the original info_dict list them
2493             # otherwise we end up with a circular reference, the first (and unique)
2494             # element in the 'formats' field in info_dict is info_dict itself,
2495             # which can't be exported to json
2496             info_dict['formats'] = formats
2497
2498         info_dict, _ = self.pre_process(info_dict)
2499
2500         # The pre-processors may have modified the formats
2501         formats = info_dict.get('formats', [info_dict])
2502
2503         list_only = self.params.get('simulate') is None and (
2504             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2505         interactive_format_selection = not list_only and self.format_selector == '-'
2506         if self.params.get('list_thumbnails'):
2507             self.list_thumbnails(info_dict)
2508         if self.params.get('listsubtitles'):
2509             if 'automatic_captions' in info_dict:
2510                 self.list_subtitles(
2511                     info_dict['id'], automatic_captions, 'automatic captions')
2512             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2513         if self.params.get('listformats') or interactive_format_selection:
2514             self.list_formats(info_dict)
2515         if list_only:
2516             # Without this printing, -F --print-json will not work
2517             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2518             return
2519
2520         format_selector = self.format_selector
2521         if format_selector is None:
2522             req_format = self._default_format_spec(info_dict, download=download)
2523             self.write_debug('Default format spec: %s' % req_format)
2524             format_selector = self.build_format_selector(req_format)
2525
2526         while True:
2527             if interactive_format_selection:
2528                 req_format = input(
2529                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2530                 try:
2531                     format_selector = self.build_format_selector(req_format)
2532                 except SyntaxError as err:
2533                     self.report_error(err, tb=False, is_error=False)
2534                     continue
2535
2536             # While in format selection we may need to have an access to the original
2537             # format set in order to calculate some metrics or do some processing.
2538             # For now we need to be able to guess whether original formats provided
2539             # by extractor are incomplete or not (i.e. whether extractor provides only
2540             # video-only or audio-only formats) for proper formats selection for
2541             # extractors with such incomplete formats (see
2542             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2543             # Since formats may be filtered during format selection and may not match
2544             # the original formats the results may be incorrect. Thus original formats
2545             # or pre-calculated metrics should be passed to format selection routines
2546             # as well.
2547             # We will pass a context object containing all necessary additional data
2548             # instead of just formats.
2549             # This fixes incorrect format selection issue (see
2550             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2551             incomplete_formats = (
2552                 # All formats are video-only or
2553                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2554                 # all formats are audio-only
2555                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2556
2557             ctx = {
2558                 'formats': formats,
2559                 'incomplete_formats': incomplete_formats,
2560             }
2561
2562             formats_to_download = list(format_selector(ctx))
2563             if interactive_format_selection and not formats_to_download:
2564                 self.report_error('Requested format is not available', tb=False, is_error=False)
2565                 continue
2566             break
2567
2568         if not formats_to_download:
2569             if not self.params.get('ignore_no_formats_error'):
2570                 raise ExtractorError('Requested format is not available', expected=True,
2571                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2572             self.report_warning('Requested format is not available')
2573             # Process what we can, even without any available formats.
2574             formats_to_download = [{}]
2575
2576         best_format = formats_to_download[-1]
2577         if download:
2578             if best_format:
2579                 self.to_screen(
2580                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2581                     + ', '.join([f['format_id'] for f in formats_to_download]))
2582             max_downloads_reached = False
2583             for i, fmt in enumerate(formats_to_download):
2584                 formats_to_download[i] = new_info = dict(info_dict)
2585                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2586                 new_info.update(fmt)
2587                 new_info['__original_infodict'] = info_dict
2588                 try:
2589                     self.process_info(new_info)
2590                 except MaxDownloadsReached:
2591                     max_downloads_reached = True
2592                 new_info.pop('__original_infodict')
2593                 # Remove copied info
2594                 for key, val in tuple(new_info.items()):
2595                     if info_dict.get(key) == val:
2596                         new_info.pop(key)
2597                 if max_downloads_reached:
2598                     break
2599
2600             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2601             assert write_archive.issubset({True, False, 'ignore'})
2602             if True in write_archive and False not in write_archive:
2603                 self.record_download_archive(info_dict)
2604
2605             info_dict['requested_downloads'] = formats_to_download
2606             info_dict = self.run_all_pps('after_video', info_dict)
2607             if max_downloads_reached:
2608                 raise MaxDownloadsReached()
2609
2610         # We update the info dict with the selected best quality format (backwards compatibility)
2611         info_dict.update(best_format)
2612         return info_dict
2613
2614     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2615         """Select the requested subtitles and their format"""
2616         available_subs = {}
2617         if normal_subtitles and self.params.get('writesubtitles'):
2618             available_subs.update(normal_subtitles)
2619         if automatic_captions and self.params.get('writeautomaticsub'):
2620             for lang, cap_info in automatic_captions.items():
2621                 if lang not in available_subs:
2622                     available_subs[lang] = cap_info
2623
2624         if (not self.params.get('writesubtitles') and not
2625                 self.params.get('writeautomaticsub') or not
2626                 available_subs):
2627             return None
2628
2629         all_sub_langs = available_subs.keys()
2630         if self.params.get('allsubtitles', False):
2631             requested_langs = all_sub_langs
2632         elif self.params.get('subtitleslangs', False):
2633             # A list is used so that the order of languages will be the same as
2634             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2635             requested_langs = []
2636             for lang_re in self.params.get('subtitleslangs'):
2637                 if lang_re == 'all':
2638                     requested_langs.extend(all_sub_langs)
2639                     continue
2640                 discard = lang_re[0] == '-'
2641                 if discard:
2642                     lang_re = lang_re[1:]
2643                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2644                 if discard:
2645                     for lang in current_langs:
2646                         while lang in requested_langs:
2647                             requested_langs.remove(lang)
2648                 else:
2649                     requested_langs.extend(current_langs)
2650             requested_langs = orderedSet(requested_langs)
2651         elif 'en' in available_subs:
2652             requested_langs = ['en']
2653         else:
2654             requested_langs = [list(all_sub_langs)[0]]
2655         if requested_langs:
2656             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2657
2658         formats_query = self.params.get('subtitlesformat', 'best')
2659         formats_preference = formats_query.split('/') if formats_query else []
2660         subs = {}
2661         for lang in requested_langs:
2662             formats = available_subs.get(lang)
2663             if formats is None:
2664                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2665                 continue
2666             for ext in formats_preference:
2667                 if ext == 'best':
2668                     f = formats[-1]
2669                     break
2670                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2671                 if matches:
2672                     f = matches[-1]
2673                     break
2674             else:
2675                 f = formats[-1]
2676                 self.report_warning(
2677                     'No subtitle format found matching "%s" for language %s, '
2678                     'using %s' % (formats_query, lang, f['ext']))
2679             subs[lang] = f
2680         return subs
2681
2682     def _forceprint(self, tmpl, info_dict):
2683         mobj = re.match(r'\w+(=?)$', tmpl)
2684         if mobj and mobj.group(1):
2685             tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2686         elif mobj:
2687             tmpl = '%({})s'.format(tmpl)
2688
2689         info_dict = info_dict.copy()
2690         info_dict['formats_table'] = self.render_formats_table(info_dict)
2691         info_dict['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2692         info_dict['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2693         info_dict['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2694         self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2695
2696     def __forced_printings(self, info_dict, filename, incomplete):
2697         def print_mandatory(field, actual_field=None):
2698             if actual_field is None:
2699                 actual_field = field
2700             if (self.params.get('force%s' % field, False)
2701                     and (not incomplete or info_dict.get(actual_field) is not None)):
2702                 self.to_stdout(info_dict[actual_field])
2703
2704         def print_optional(field):
2705             if (self.params.get('force%s' % field, False)
2706                     and info_dict.get(field) is not None):
2707                 self.to_stdout(info_dict[field])
2708
2709         info_dict = info_dict.copy()
2710         if filename is not None:
2711             info_dict['filename'] = filename
2712         if info_dict.get('requested_formats') is not None:
2713             # For RTMP URLs, also include the playpath
2714             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2715         elif 'url' in info_dict:
2716             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2717
2718         if self.params['forceprint'].get('video') or self.params.get('forcejson'):
2719             self.post_extract(info_dict)
2720         for tmpl in self.params['forceprint'].get('video', []):
2721             self._forceprint(tmpl, info_dict)
2722
2723         print_mandatory('title')
2724         print_mandatory('id')
2725         print_mandatory('url', 'urls')
2726         print_optional('thumbnail')
2727         print_optional('description')
2728         print_optional('filename')
2729         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2730             self.to_stdout(formatSeconds(info_dict['duration']))
2731         print_mandatory('format')
2732
2733         if self.params.get('forcejson'):
2734             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2735
2736     def dl(self, name, info, subtitle=False, test=False):
2737         if not info.get('url'):
2738             self.raise_no_formats(info, True)
2739
2740         if test:
2741             verbose = self.params.get('verbose')
2742             params = {
2743                 'test': True,
2744                 'quiet': self.params.get('quiet') or not verbose,
2745                 'verbose': verbose,
2746                 'noprogress': not verbose,
2747                 'nopart': True,
2748                 'skip_unavailable_fragments': False,
2749                 'keep_fragments': False,
2750                 'overwrites': True,
2751                 '_no_ytdl_file': True,
2752             }
2753         else:
2754             params = self.params
2755         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2756         if not test:
2757             for ph in self._progress_hooks:
2758                 fd.add_progress_hook(ph)
2759             urls = '", "'.join(
2760                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2761                 for f in info.get('requested_formats', []) or [info])
2762             self.write_debug('Invoking downloader on "%s"' % urls)
2763
2764         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2765         # But it may contain objects that are not deep-copyable
2766         new_info = self._copy_infodict(info)
2767         if new_info.get('http_headers') is None:
2768             new_info['http_headers'] = self._calc_headers(new_info)
2769         return fd.download(name, new_info, subtitle)
2770
2771     def existing_file(self, filepaths, *, default_overwrite=True):
2772         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2773         if existing_files and not self.params.get('overwrites', default_overwrite):
2774             return existing_files[0]
2775
2776         for file in existing_files:
2777             self.report_file_delete(file)
2778             os.remove(file)
2779         return None
2780
2781     def process_info(self, info_dict):
2782         """Process a single resolved IE result. (Modified it in-place)"""
2783
2784         assert info_dict.get('_type', 'video') == 'video'
2785         original_infodict = info_dict
2786
2787         if 'format' not in info_dict and 'ext' in info_dict:
2788             info_dict['format'] = info_dict['ext']
2789
2790         if self._match_entry(info_dict) is not None:
2791             info_dict['__write_download_archive'] = 'ignore'
2792             return
2793
2794         self.post_extract(info_dict)
2795         self._num_downloads += 1
2796
2797         # info_dict['_filename'] needs to be set for backward compatibility
2798         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2799         temp_filename = self.prepare_filename(info_dict, 'temp')
2800         files_to_move = {}
2801
2802         # Forced printings
2803         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2804
2805         if self.params.get('simulate'):
2806             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2807             return
2808
2809         if full_filename is None:
2810             return
2811         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2812             return
2813         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2814             return
2815
2816         if self._write_description('video', info_dict,
2817                                    self.prepare_filename(info_dict, 'description')) is None:
2818             return
2819
2820         sub_files = self._write_subtitles(info_dict, temp_filename)
2821         if sub_files is None:
2822             return
2823         files_to_move.update(dict(sub_files))
2824
2825         thumb_files = self._write_thumbnails(
2826             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2827         if thumb_files is None:
2828             return
2829         files_to_move.update(dict(thumb_files))
2830
2831         infofn = self.prepare_filename(info_dict, 'infojson')
2832         _infojson_written = self._write_info_json('video', info_dict, infofn)
2833         if _infojson_written:
2834             info_dict['infojson_filename'] = infofn
2835             # For backward compatibility, even though it was a private field
2836             info_dict['__infojson_filename'] = infofn
2837         elif _infojson_written is None:
2838             return
2839
2840         # Note: Annotations are deprecated
2841         annofn = None
2842         if self.params.get('writeannotations', False):
2843             annofn = self.prepare_filename(info_dict, 'annotation')
2844         if annofn:
2845             if not self._ensure_dir_exists(encodeFilename(annofn)):
2846                 return
2847             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2848                 self.to_screen('[info] Video annotations are already present')
2849             elif not info_dict.get('annotations'):
2850                 self.report_warning('There are no annotations to write.')
2851             else:
2852                 try:
2853                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2854                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2855                         annofile.write(info_dict['annotations'])
2856                 except (KeyError, TypeError):
2857                     self.report_warning('There are no annotations to write.')
2858                 except (OSError, IOError):
2859                     self.report_error('Cannot write annotations file: ' + annofn)
2860                     return
2861
2862         # Write internet shortcut files
2863         def _write_link_file(link_type):
2864             if 'webpage_url' not in info_dict:
2865                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2866                 return False
2867             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2868             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2869                 return False
2870             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2871                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2872                 return True
2873             try:
2874                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2875                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2876                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2877                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2878                     if link_type == 'desktop':
2879                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2880                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2881             except (OSError, IOError):
2882                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2883                 return False
2884             return True
2885
2886         write_links = {
2887             'url': self.params.get('writeurllink'),
2888             'webloc': self.params.get('writewebloclink'),
2889             'desktop': self.params.get('writedesktoplink'),
2890         }
2891         if self.params.get('writelink'):
2892             link_type = ('webloc' if sys.platform == 'darwin'
2893                          else 'desktop' if sys.platform.startswith('linux')
2894                          else 'url')
2895             write_links[link_type] = True
2896
2897         if any(should_write and not _write_link_file(link_type)
2898                for link_type, should_write in write_links.items()):
2899             return
2900
2901         def replace_info_dict(new_info):
2902             nonlocal info_dict
2903             if new_info == info_dict:
2904                 return
2905             info_dict.clear()
2906             info_dict.update(new_info)
2907
2908         try:
2909             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2910             replace_info_dict(new_info)
2911         except PostProcessingError as err:
2912             self.report_error('Preprocessing: %s' % str(err))
2913             return
2914
2915         if self.params.get('skip_download'):
2916             info_dict['filepath'] = temp_filename
2917             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2918             info_dict['__files_to_move'] = files_to_move
2919             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2920             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2921         else:
2922             # Download
2923             info_dict.setdefault('__postprocessors', [])
2924             try:
2925
2926                 def existing_video_file(*filepaths):
2927                     ext = info_dict.get('ext')
2928                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2929                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2930                                               default_overwrite=False)
2931                     if file:
2932                         info_dict['ext'] = os.path.splitext(file)[1][1:]
2933                     return file
2934
2935                 success = True
2936                 if info_dict.get('requested_formats') is not None:
2937
2938                     def compatible_formats(formats):
2939                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2940                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2941                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2942                         if len(video_formats) > 2 or len(audio_formats) > 2:
2943                             return False
2944
2945                         # Check extension
2946                         exts = set(format.get('ext') for format in formats)
2947                         COMPATIBLE_EXTS = (
2948                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2949                             set(('webm',)),
2950                         )
2951                         for ext_sets in COMPATIBLE_EXTS:
2952                             if ext_sets.issuperset(exts):
2953                                 return True
2954                         # TODO: Check acodec/vcodec
2955                         return False
2956
2957                     requested_formats = info_dict['requested_formats']
2958                     old_ext = info_dict['ext']
2959                     if self.params.get('merge_output_format') is None:
2960                         if not compatible_formats(requested_formats):
2961                             info_dict['ext'] = 'mkv'
2962                             self.report_warning(
2963                                 'Requested formats are incompatible for merge and will be merged into mkv')
2964                         if (info_dict['ext'] == 'webm'
2965                                 and info_dict.get('thumbnails')
2966                                 # check with type instead of pp_key, __name__, or isinstance
2967                                 # since we dont want any custom PPs to trigger this
2968                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2969                             info_dict['ext'] = 'mkv'
2970                             self.report_warning(
2971                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2972                     new_ext = info_dict['ext']
2973
2974                     def correct_ext(filename, ext=new_ext):
2975                         if filename == '-':
2976                             return filename
2977                         filename_real_ext = os.path.splitext(filename)[1][1:]
2978                         filename_wo_ext = (
2979                             os.path.splitext(filename)[0]
2980                             if filename_real_ext in (old_ext, new_ext)
2981                             else filename)
2982                         return '%s.%s' % (filename_wo_ext, ext)
2983
2984                     # Ensure filename always has a correct extension for successful merge
2985                     full_filename = correct_ext(full_filename)
2986                     temp_filename = correct_ext(temp_filename)
2987                     dl_filename = existing_video_file(full_filename, temp_filename)
2988                     info_dict['__real_download'] = False
2989
2990                     downloaded = []
2991                     merger = FFmpegMergerPP(self)
2992
2993                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2994                     if dl_filename is not None:
2995                         self.report_file_already_downloaded(dl_filename)
2996                     elif fd:
2997                         for f in requested_formats if fd != FFmpegFD else []:
2998                             f['filepath'] = fname = prepend_extension(
2999                                 correct_ext(temp_filename, info_dict['ext']),
3000                                 'f%s' % f['format_id'], info_dict['ext'])
3001                             downloaded.append(fname)
3002                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3003                         success, real_download = self.dl(temp_filename, info_dict)
3004                         info_dict['__real_download'] = real_download
3005                     else:
3006                         if self.params.get('allow_unplayable_formats'):
3007                             self.report_warning(
3008                                 'You have requested merging of multiple formats '
3009                                 'while also allowing unplayable formats to be downloaded. '
3010                                 'The formats won\'t be merged to prevent data corruption.')
3011                         elif not merger.available:
3012                             self.report_warning(
3013                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
3014                                 'The formats won\'t be merged.')
3015
3016                         if temp_filename == '-':
3017                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3018                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3019                                       else 'but ffmpeg is not installed')
3020                             self.report_warning(
3021                                 f'You have requested downloading multiple formats to stdout {reason}. '
3022                                 'The formats will be streamed one after the other')
3023                             fname = temp_filename
3024                         for f in requested_formats:
3025                             new_info = dict(info_dict)
3026                             del new_info['requested_formats']
3027                             new_info.update(f)
3028                             if temp_filename != '-':
3029                                 fname = prepend_extension(
3030                                     correct_ext(temp_filename, new_info['ext']),
3031                                     'f%s' % f['format_id'], new_info['ext'])
3032                                 if not self._ensure_dir_exists(fname):
3033                                     return
3034                                 f['filepath'] = fname
3035                                 downloaded.append(fname)
3036                             partial_success, real_download = self.dl(fname, new_info)
3037                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3038                             success = success and partial_success
3039
3040                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3041                         info_dict['__postprocessors'].append(merger)
3042                         info_dict['__files_to_merge'] = downloaded
3043                         # Even if there were no downloads, it is being merged only now
3044                         info_dict['__real_download'] = True
3045                     else:
3046                         for file in downloaded:
3047                             files_to_move[file] = None
3048                 else:
3049                     # Just a single file
3050                     dl_filename = existing_video_file(full_filename, temp_filename)
3051                     if dl_filename is None or dl_filename == temp_filename:
3052                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3053                         # So we should try to resume the download
3054                         success, real_download = self.dl(temp_filename, info_dict)
3055                         info_dict['__real_download'] = real_download
3056                     else:
3057                         self.report_file_already_downloaded(dl_filename)
3058
3059                 dl_filename = dl_filename or temp_filename
3060                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3061
3062             except network_exceptions as err:
3063                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3064                 return
3065             except (OSError, IOError) as err:
3066                 raise UnavailableVideoError(err)
3067             except (ContentTooShortError, ) as err:
3068                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3069                 return
3070
3071             if success and full_filename != '-':
3072
3073                 def fixup():
3074                     do_fixup = True
3075                     fixup_policy = self.params.get('fixup')
3076                     vid = info_dict['id']
3077
3078                     if fixup_policy in ('ignore', 'never'):
3079                         return
3080                     elif fixup_policy == 'warn':
3081                         do_fixup = False
3082                     elif fixup_policy != 'force':
3083                         assert fixup_policy in ('detect_or_warn', None)
3084                         if not info_dict.get('__real_download'):
3085                             do_fixup = False
3086
3087                     def ffmpeg_fixup(cndn, msg, cls):
3088                         if not cndn:
3089                             return
3090                         if not do_fixup:
3091                             self.report_warning(f'{vid}: {msg}')
3092                             return
3093                         pp = cls(self)
3094                         if pp.available:
3095                             info_dict['__postprocessors'].append(pp)
3096                         else:
3097                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3098
3099                     stretched_ratio = info_dict.get('stretched_ratio')
3100                     ffmpeg_fixup(
3101                         stretched_ratio not in (1, None),
3102                         f'Non-uniform pixel ratio {stretched_ratio}',
3103                         FFmpegFixupStretchedPP)
3104
3105                     ffmpeg_fixup(
3106                         (info_dict.get('requested_formats') is None
3107                          and info_dict.get('container') == 'm4a_dash'
3108                          and info_dict.get('ext') == 'm4a'),
3109                         'writing DASH m4a. Only some players support this container',
3110                         FFmpegFixupM4aPP)
3111
3112                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3113                     downloader = downloader.__name__ if downloader else None
3114
3115                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3116                         ffmpeg_fixup(downloader == 'HlsFD',
3117                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3118                                      FFmpegFixupM3u8PP)
3119                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3120                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3121
3122                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3123                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3124
3125                 fixup()
3126                 try:
3127                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3128                 except PostProcessingError as err:
3129                     self.report_error('Postprocessing: %s' % str(err))
3130                     return
3131                 try:
3132                     for ph in self._post_hooks:
3133                         ph(info_dict['filepath'])
3134                 except Exception as err:
3135                     self.report_error('post hooks: %s' % str(err))
3136                     return
3137                 info_dict['__write_download_archive'] = True
3138
3139         if self.params.get('force_write_download_archive'):
3140             info_dict['__write_download_archive'] = True
3141
3142         # Make sure the info_dict was modified in-place
3143         assert info_dict is original_infodict
3144
3145         max_downloads = self.params.get('max_downloads')
3146         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3147             raise MaxDownloadsReached()
3148
3149     def __download_wrapper(self, func):
3150         @functools.wraps(func)
3151         def wrapper(*args, **kwargs):
3152             try:
3153                 res = func(*args, **kwargs)
3154             except UnavailableVideoError as e:
3155                 self.report_error(e)
3156             except MaxDownloadsReached as e:
3157                 self.to_screen(f'[info] {e}')
3158                 raise
3159             except DownloadCancelled as e:
3160                 self.to_screen(f'[info] {e}')
3161                 if not self.params.get('break_per_url'):
3162                     raise
3163             else:
3164                 if self.params.get('dump_single_json', False):
3165                     self.post_extract(res)
3166                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3167         return wrapper
3168
3169     def download(self, url_list):
3170         """Download a given list of URLs."""
3171         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3172         outtmpl = self.outtmpl_dict['default']
3173         if (len(url_list) > 1
3174                 and outtmpl != '-'
3175                 and '%' not in outtmpl
3176                 and self.params.get('max_downloads') != 1):
3177             raise SameFileError(outtmpl)
3178
3179         for url in url_list:
3180             self.__download_wrapper(self.extract_info)(
3181                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3182
3183         return self._download_retcode
3184
3185     def download_with_info_file(self, info_filename):
3186         with contextlib.closing(fileinput.FileInput(
3187                 [info_filename], mode='r',
3188                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3189             # FileInput doesn't have a read method, we can't call json.load
3190             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3191         try:
3192             self.__download_wrapper(self.process_ie_result)(info, download=True)
3193         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3194             if not isinstance(e, EntryNotInPlaylist):
3195                 self.to_stderr('\r')
3196             webpage_url = info.get('webpage_url')
3197             if webpage_url is not None:
3198                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3199                 return self.download([webpage_url])
3200             else:
3201                 raise
3202         return self._download_retcode
3203
3204     @staticmethod
3205     def sanitize_info(info_dict, remove_private_keys=False):
3206         ''' Sanitize the infodict for converting to json '''
3207         if info_dict is None:
3208             return info_dict
3209         info_dict.setdefault('epoch', int(time.time()))
3210         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3211         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3212         if remove_private_keys:
3213             remove_keys |= {
3214                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3215                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3216             }
3217             reject = lambda k, v: k not in keep_keys and (
3218                 k.startswith('_') or k in remove_keys or v is None)
3219         else:
3220             reject = lambda k, v: k in remove_keys
3221
3222         def filter_fn(obj):
3223             if isinstance(obj, dict):
3224                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3225             elif isinstance(obj, (list, tuple, set, LazyList)):
3226                 return list(map(filter_fn, obj))
3227             elif obj is None or isinstance(obj, (str, int, float, bool)):
3228                 return obj
3229             else:
3230                 return repr(obj)
3231
3232         return filter_fn(info_dict)
3233
3234     @staticmethod
3235     def filter_requested_info(info_dict, actually_filter=True):
3236         ''' Alias of sanitize_info for backward compatibility '''
3237         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3238
3239     @staticmethod
3240     def post_extract(info_dict):
3241         def actual_post_extract(info_dict):
3242             if info_dict.get('_type') in ('playlist', 'multi_video'):
3243                 for video_dict in info_dict.get('entries', {}):
3244                     actual_post_extract(video_dict or {})
3245                 return
3246
3247             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3248             extra = post_extractor().items()
3249             info_dict.update(extra)
3250             info_dict.pop('__post_extractor', None)
3251
3252             original_infodict = info_dict.get('__original_infodict') or {}
3253             original_infodict.update(extra)
3254             original_infodict.pop('__post_extractor', None)
3255
3256         actual_post_extract(info_dict or {})
3257
3258     def run_pp(self, pp, infodict):
3259         files_to_delete = []
3260         if '__files_to_move' not in infodict:
3261             infodict['__files_to_move'] = {}
3262         try:
3263             files_to_delete, infodict = pp.run(infodict)
3264         except PostProcessingError as e:
3265             # Must be True and not 'only_download'
3266             if self.params.get('ignoreerrors') is True:
3267                 self.report_error(e)
3268                 return infodict
3269             raise
3270
3271         if not files_to_delete:
3272             return infodict
3273         if self.params.get('keepvideo', False):
3274             for f in files_to_delete:
3275                 infodict['__files_to_move'].setdefault(f, '')
3276         else:
3277             for old_filename in set(files_to_delete):
3278                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3279                 try:
3280                     os.remove(encodeFilename(old_filename))
3281                 except (IOError, OSError):
3282                     self.report_warning('Unable to remove downloaded original file')
3283                 if old_filename in infodict['__files_to_move']:
3284                     del infodict['__files_to_move'][old_filename]
3285         return infodict
3286
3287     def run_all_pps(self, key, info, *, additional_pps=None):
3288         for tmpl in self.params['forceprint'].get(key, []):
3289             self._forceprint(tmpl, info)
3290         for pp in (additional_pps or []) + self._pps[key]:
3291             info = self.run_pp(pp, info)
3292         return info
3293
3294     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3295         info = dict(ie_info)
3296         info['__files_to_move'] = files_to_move or {}
3297         info = self.run_all_pps(key, info)
3298         return info, info.pop('__files_to_move', None)
3299
3300     def post_process(self, filename, info, files_to_move=None):
3301         """Run all the postprocessors on the given file."""
3302         info['filepath'] = filename
3303         info['__files_to_move'] = files_to_move or {}
3304         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3305         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3306         del info['__files_to_move']
3307         return self.run_all_pps('after_move', info)
3308
3309     def _make_archive_id(self, info_dict):
3310         video_id = info_dict.get('id')
3311         if not video_id:
3312             return
3313         # Future-proof against any change in case
3314         # and backwards compatibility with prior versions
3315         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3316         if extractor is None:
3317             url = str_or_none(info_dict.get('url'))
3318             if not url:
3319                 return
3320             # Try to find matching extractor for the URL and take its ie_key
3321             for ie_key, ie in self._ies.items():
3322                 if ie.suitable(url):
3323                     extractor = ie_key
3324                     break
3325             else:
3326                 return
3327         return '%s %s' % (extractor.lower(), video_id)
3328
3329     def in_download_archive(self, info_dict):
3330         fn = self.params.get('download_archive')
3331         if fn is None:
3332             return False
3333
3334         vid_id = self._make_archive_id(info_dict)
3335         if not vid_id:
3336             return False  # Incomplete video information
3337
3338         return vid_id in self.archive
3339
3340     def record_download_archive(self, info_dict):
3341         fn = self.params.get('download_archive')
3342         if fn is None:
3343             return
3344         vid_id = self._make_archive_id(info_dict)
3345         assert vid_id
3346         self.write_debug(f'Adding to archive: {vid_id}')
3347         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3348             archive_file.write(vid_id + '\n')
3349         self.archive.add(vid_id)
3350
3351     @staticmethod
3352     def format_resolution(format, default='unknown'):
3353         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3354             return 'audio only'
3355         if format.get('resolution') is not None:
3356             return format['resolution']
3357         if format.get('width') and format.get('height'):
3358             return '%dx%d' % (format['width'], format['height'])
3359         elif format.get('height'):
3360             return '%sp' % format['height']
3361         elif format.get('width'):
3362             return '%dx?' % format['width']
3363         return default
3364
3365     def _list_format_headers(self, *headers):
3366         if self.params.get('listformats_table', True) is not False:
3367             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3368         return headers
3369
3370     def _format_note(self, fdict):
3371         res = ''
3372         if fdict.get('ext') in ['f4f', 'f4m']:
3373             res += '(unsupported)'
3374         if fdict.get('language'):
3375             if res:
3376                 res += ' '
3377             res += '[%s]' % fdict['language']
3378         if fdict.get('format_note') is not None:
3379             if res:
3380                 res += ' '
3381             res += fdict['format_note']
3382         if fdict.get('tbr') is not None:
3383             if res:
3384                 res += ', '
3385             res += '%4dk' % fdict['tbr']
3386         if fdict.get('container') is not None:
3387             if res:
3388                 res += ', '
3389             res += '%s container' % fdict['container']
3390         if (fdict.get('vcodec') is not None
3391                 and fdict.get('vcodec') != 'none'):
3392             if res:
3393                 res += ', '
3394             res += fdict['vcodec']
3395             if fdict.get('vbr') is not None:
3396                 res += '@'
3397         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3398             res += 'video@'
3399         if fdict.get('vbr') is not None:
3400             res += '%4dk' % fdict['vbr']
3401         if fdict.get('fps') is not None:
3402             if res:
3403                 res += ', '
3404             res += '%sfps' % fdict['fps']
3405         if fdict.get('acodec') is not None:
3406             if res:
3407                 res += ', '
3408             if fdict['acodec'] == 'none':
3409                 res += 'video only'
3410             else:
3411                 res += '%-5s' % fdict['acodec']
3412         elif fdict.get('abr') is not None:
3413             if res:
3414                 res += ', '
3415             res += 'audio'
3416         if fdict.get('abr') is not None:
3417             res += '@%3dk' % fdict['abr']
3418         if fdict.get('asr') is not None:
3419             res += ' (%5dHz)' % fdict['asr']
3420         if fdict.get('filesize') is not None:
3421             if res:
3422                 res += ', '
3423             res += format_bytes(fdict['filesize'])
3424         elif fdict.get('filesize_approx') is not None:
3425             if res:
3426                 res += ', '
3427             res += '~' + format_bytes(fdict['filesize_approx'])
3428         return res
3429
3430     def render_formats_table(self, info_dict):
3431         if not info_dict.get('formats') and not info_dict.get('url'):
3432             return None
3433
3434         formats = info_dict.get('formats', [info_dict])
3435         if not self.params.get('listformats_table', True) is not False:
3436             table = [
3437                 [
3438                     format_field(f, 'format_id'),
3439                     format_field(f, 'ext'),
3440                     self.format_resolution(f),
3441                     self._format_note(f)
3442                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3443             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3444
3445         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3446         table = [
3447             [
3448                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3449                 format_field(f, 'ext'),
3450                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3451                 format_field(f, 'fps', '\t%d'),
3452                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3453                 delim,
3454                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3455                 format_field(f, 'tbr', '\t%dk'),
3456                 shorten_protocol_name(f.get('protocol', '')),
3457                 delim,
3458                 format_field(f, 'vcodec', default='unknown').replace(
3459                     'none', 'images' if f.get('acodec') == 'none'
3460                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3461                 format_field(f, 'vbr', '\t%dk'),
3462                 format_field(f, 'acodec', default='unknown').replace(
3463                     'none', '' if f.get('vcodec') == 'none'
3464                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3465                 format_field(f, 'abr', '\t%dk'),
3466                 format_field(f, 'asr', '\t%dHz'),
3467                 join_nonempty(
3468                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3469                     format_field(f, 'language', '[%s]'),
3470                     join_nonempty(format_field(f, 'format_note'),
3471                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3472                                   delim=', '),
3473                     delim=' '),
3474             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3475         header_line = self._list_format_headers(
3476             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3477             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3478
3479         return render_table(
3480             header_line, table, hide_empty=True,
3481             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3482
3483     def render_thumbnails_table(self, info_dict):
3484         thumbnails = list(info_dict.get('thumbnails'))
3485         if not thumbnails:
3486             return None
3487         return render_table(
3488             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3489             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3490
3491     def render_subtitles_table(self, video_id, subtitles):
3492         def _row(lang, formats):
3493             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3494             if len(set(names)) == 1:
3495                 names = [] if names[0] == 'unknown' else names[:1]
3496             return [lang, ', '.join(names), ', '.join(exts)]
3497
3498         if not subtitles:
3499             return None
3500         return render_table(
3501             self._list_format_headers('Language', 'Name', 'Formats'),
3502             [_row(lang, formats) for lang, formats in subtitles.items()],
3503             hide_empty=True)
3504
3505     def __list_table(self, video_id, name, func, *args):
3506         table = func(*args)
3507         if not table:
3508             self.to_screen(f'{video_id} has no {name}')
3509             return
3510         self.to_screen(f'[info] Available {name} for {video_id}:')
3511         self.to_stdout(table)
3512
3513     def list_formats(self, info_dict):
3514         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3515
3516     def list_thumbnails(self, info_dict):
3517         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3518
3519     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3520         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3521
3522     def urlopen(self, req):
3523         """ Start an HTTP download """
3524         if isinstance(req, compat_basestring):
3525             req = sanitized_Request(req)
3526         return self._opener.open(req, timeout=self._socket_timeout)
3527
3528     def print_debug_header(self):
3529         if not self.params.get('verbose'):
3530             return
3531
3532         def get_encoding(stream):
3533             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3534             if not supports_terminal_sequences(stream):
3535                 from .compat import WINDOWS_VT_MODE
3536                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3537             return ret
3538
3539         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3540             locale.getpreferredencoding(),
3541             sys.getfilesystemencoding(),
3542             get_encoding(self._screen_file), get_encoding(self._err_file),
3543             self.get_encoding())
3544
3545         logger = self.params.get('logger')
3546         if logger:
3547             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3548             write_debug(encoding_str)
3549         else:
3550             write_string(f'[debug] {encoding_str}\n', encoding=None)
3551             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3552
3553         source = detect_variant()
3554         write_debug(join_nonempty(
3555             'yt-dlp version', __version__,
3556             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3557             '' if source == 'unknown' else f'({source})',
3558             delim=' '))
3559         if not _LAZY_LOADER:
3560             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3561                 write_debug('Lazy loading extractors is forcibly disabled')
3562             else:
3563                 write_debug('Lazy loading extractors is disabled')
3564         if plugin_extractors or plugin_postprocessors:
3565             write_debug('Plugins: %s' % [
3566                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3567                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3568         if self.params.get('compat_opts'):
3569             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3570
3571         if source == 'source':
3572             try:
3573                 sp = Popen(
3574                     ['git', 'rev-parse', '--short', 'HEAD'],
3575                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3576                     cwd=os.path.dirname(os.path.abspath(__file__)))
3577                 out, err = sp.communicate_or_kill()
3578                 out = out.decode().strip()
3579                 if re.match('[0-9a-f]+', out):
3580                     write_debug('Git HEAD: %s' % out)
3581             except Exception:
3582                 try:
3583                     sys.exc_clear()
3584                 except Exception:
3585                     pass
3586
3587         def python_implementation():
3588             impl_name = platform.python_implementation()
3589             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3590                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3591             return impl_name
3592
3593         write_debug('Python version %s (%s %s) - %s' % (
3594             platform.python_version(),
3595             python_implementation(),
3596             platform.architecture()[0],
3597             platform_name()))
3598
3599         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3600         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3601         if ffmpeg_features:
3602             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3603
3604         exe_versions['rtmpdump'] = rtmpdump_version()
3605         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3606         exe_str = ', '.join(
3607             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3608         ) or 'none'
3609         write_debug('exe versions: %s' % exe_str)
3610
3611         from .downloader.websocket import has_websockets
3612         from .postprocessor.embedthumbnail import has_mutagen
3613         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3614
3615         lib_str = join_nonempty(
3616             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3617             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3618             has_mutagen and 'mutagen',
3619             SQLITE_AVAILABLE and 'sqlite',
3620             has_websockets and 'websockets',
3621             delim=', ') or 'none'
3622         write_debug('Optional libraries: %s' % lib_str)
3623
3624         proxy_map = {}
3625         for handler in self._opener.handlers:
3626             if hasattr(handler, 'proxies'):
3627                 proxy_map.update(handler.proxies)
3628         write_debug(f'Proxy map: {proxy_map}')
3629
3630         # Not implemented
3631         if False and self.params.get('call_home'):
3632             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3633             write_debug('Public IP address: %s' % ipaddr)
3634             latest_version = self.urlopen(
3635                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3636             if version_tuple(latest_version) > version_tuple(__version__):
3637                 self.report_warning(
3638                     'You are using an outdated version (newest version: %s)! '
3639                     'See https://yt-dl.org/update if you need help updating.' %
3640                     latest_version)
3641
3642     def _setup_opener(self):
3643         timeout_val = self.params.get('socket_timeout')
3644         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3645
3646         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3647         opts_cookiefile = self.params.get('cookiefile')
3648         opts_proxy = self.params.get('proxy')
3649
3650         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3651
3652         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3653         if opts_proxy is not None:
3654             if opts_proxy == '':
3655                 proxies = {}
3656             else:
3657                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3658         else:
3659             proxies = compat_urllib_request.getproxies()
3660             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3661             if 'http' in proxies and 'https' not in proxies:
3662                 proxies['https'] = proxies['http']
3663         proxy_handler = PerRequestProxyHandler(proxies)
3664
3665         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3666         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3667         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3668         redirect_handler = YoutubeDLRedirectHandler()
3669         data_handler = compat_urllib_request_DataHandler()
3670
3671         # When passing our own FileHandler instance, build_opener won't add the
3672         # default FileHandler and allows us to disable the file protocol, which
3673         # can be used for malicious purposes (see
3674         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3675         file_handler = compat_urllib_request.FileHandler()
3676
3677         def file_open(*args, **kwargs):
3678             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3679         file_handler.file_open = file_open
3680
3681         opener = compat_urllib_request.build_opener(
3682             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3683
3684         # Delete the default user-agent header, which would otherwise apply in
3685         # cases where our custom HTTP handler doesn't come into play
3686         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3687         opener.addheaders = []
3688         self._opener = opener
3689
3690     def encode(self, s):
3691         if isinstance(s, bytes):
3692             return s  # Already encoded
3693
3694         try:
3695             return s.encode(self.get_encoding())
3696         except UnicodeEncodeError as err:
3697             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3698             raise
3699
3700     def get_encoding(self):
3701         encoding = self.params.get('encoding')
3702         if encoding is None:
3703             encoding = preferredencoding()
3704         return encoding
3705
3706     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3707         ''' Write infojson and returns True = written, False = skip, None = error '''
3708         if overwrite is None:
3709             overwrite = self.params.get('overwrites', True)
3710         if not self.params.get('writeinfojson'):
3711             return False
3712         elif not infofn:
3713             self.write_debug(f'Skipping writing {label} infojson')
3714             return False
3715         elif not self._ensure_dir_exists(infofn):
3716             return None
3717         elif not overwrite and os.path.exists(infofn):
3718             self.to_screen(f'[info] {label.title()} metadata is already present')
3719         else:
3720             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3721             try:
3722                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3723             except (OSError, IOError):
3724                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3725                 return None
3726         return True
3727
3728     def _write_description(self, label, ie_result, descfn):
3729         ''' Write description and returns True = written, False = skip, None = error '''
3730         if not self.params.get('writedescription'):
3731             return False
3732         elif not descfn:
3733             self.write_debug(f'Skipping writing {label} description')
3734             return False
3735         elif not self._ensure_dir_exists(descfn):
3736             return None
3737         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3738             self.to_screen(f'[info] {label.title()} description is already present')
3739         elif ie_result.get('description') is None:
3740             self.report_warning(f'There\'s no {label} description to write')
3741             return False
3742         else:
3743             try:
3744                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3745                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3746                     descfile.write(ie_result['description'])
3747             except (OSError, IOError):
3748                 self.report_error(f'Cannot write {label} description file {descfn}')
3749                 return None
3750         return True
3751
3752     def _write_subtitles(self, info_dict, filename):
3753         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3754         ret = []
3755         subtitles = info_dict.get('requested_subtitles')
3756         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3757             # subtitles download errors are already managed as troubles in relevant IE
3758             # that way it will silently go on when used with unsupporting IE
3759             return ret
3760
3761         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3762         if not sub_filename_base:
3763             self.to_screen('[info] Skipping writing video subtitles')
3764             return ret
3765         for sub_lang, sub_info in subtitles.items():
3766             sub_format = sub_info['ext']
3767             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3768             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3769             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3770             if existing_sub:
3771                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3772                 sub_info['filepath'] = existing_sub
3773                 ret.append((existing_sub, sub_filename_final))
3774                 continue
3775
3776             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3777             if sub_info.get('data') is not None:
3778                 try:
3779                     # Use newline='' to prevent conversion of newline characters
3780                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3781                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3782                         subfile.write(sub_info['data'])
3783                     sub_info['filepath'] = sub_filename
3784                     ret.append((sub_filename, sub_filename_final))
3785                     continue
3786                 except (OSError, IOError):
3787                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3788                     return None
3789
3790             try:
3791                 sub_copy = sub_info.copy()
3792                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3793                 self.dl(sub_filename, sub_copy, subtitle=True)
3794                 sub_info['filepath'] = sub_filename
3795                 ret.append((sub_filename, sub_filename_final))
3796             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3797                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3798                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3799                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3800         return ret
3801
3802     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3803         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3804         write_all = self.params.get('write_all_thumbnails', False)
3805         thumbnails, ret = [], []
3806         if write_all or self.params.get('writethumbnail', False):
3807             thumbnails = info_dict.get('thumbnails') or []
3808         multiple = write_all and len(thumbnails) > 1
3809
3810         if thumb_filename_base is None:
3811             thumb_filename_base = filename
3812         if thumbnails and not thumb_filename_base:
3813             self.write_debug(f'Skipping writing {label} thumbnail')
3814             return ret
3815
3816         for idx, t in list(enumerate(thumbnails))[::-1]:
3817             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3818             thumb_display_id = f'{label} thumbnail {t["id"]}'
3819             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3820             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3821
3822             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3823             if existing_thumb:
3824                 self.to_screen('[info] %s is already present' % (
3825                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3826                 t['filepath'] = existing_thumb
3827                 ret.append((existing_thumb, thumb_filename_final))
3828             else:
3829                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3830                 try:
3831                     uf = self.urlopen(t['url'])
3832                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3833                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3834                         shutil.copyfileobj(uf, thumbf)
3835                     ret.append((thumb_filename, thumb_filename_final))
3836                     t['filepath'] = thumb_filename
3837                 except network_exceptions as err:
3838                     thumbnails.pop(idx)
3839                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3840             if ret and not write_all:
3841                 break
3842         return ret