yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     join_nonempty,
  79     LazyList,
  80     LINK_TEMPLATES,
  81     locked_file,
  82     make_dir,
  83     make_HTTPS_handler,
  84     MaxDownloadsReached,
  85     network_exceptions,
  86     number_of_digits,
  87     orderedSet,
  88     OUTTMPL_TYPES,
  89     PagedList,
  90     parse_filesize,
  91     PerRequestProxyHandler,
  92     platform_name,
  93     Popen,
  94     POSTPROCESS_WHEN,
  95     PostProcessingError,
  96     preferredencoding,
  97     prepend_extension,
  98     ReExtractInfo,
  99     register_socks_protocols,
 100     RejectedVideoReached,
 101     remove_terminal_sequences,
 102     render_table,
 103     replace_extension,
 104     SameFileError,
 105     sanitize_filename,
 106     sanitize_path,
 107     sanitize_url,
 108     sanitized_Request,
 109     std_headers,
 110     STR_FORMAT_RE_TMPL,
 111     STR_FORMAT_TYPES,
 112     str_or_none,
 113     strftime_or_none,
 114     subtitles_filename,
 115     supports_terminal_sequences,
 116     timetuple_from_msec,
 117     to_high_limit_path,
 118     traverse_obj,
 119     try_get,
 120     UnavailableVideoError,
 121     url_basename,
 122     variadic,
 123     version_tuple,
 124     write_json_file,
 125     write_string,
 126     YoutubeDLCookieProcessor,
 127     YoutubeDLHandler,
 128     YoutubeDLRedirectHandler,
 129 )
 130 from .cache import Cache
 131 from .minicurses import format_text
 132 from .extractor import (
 133     gen_extractor_classes,
 134     get_info_extractor,
 135     _LAZY_LOADER,
 136     _PLUGIN_CLASSES as plugin_extractors
 137 )
 138 from .extractor.openload import PhantomJSwrapper
 139 from .downloader import (
 140     FFmpegFD,
 141     get_suitable_downloader,
 142     shorten_protocol_name
 143 )
 144 from .downloader.rtmp import rtmpdump_version
 145 from .postprocessor import (
 146     get_postprocessor,
 147     EmbedThumbnailPP,
 148     FFmpegFixupDuplicateMoovPP,
 149     FFmpegFixupDurationPP,
 150     FFmpegFixupM3u8PP,
 151     FFmpegFixupM4aPP,
 152     FFmpegFixupStretchedPP,
 153     FFmpegFixupTimestampPP,
 154     FFmpegMergerPP,
 155     FFmpegPostProcessor,
 156     MoveFilesAfterDownloadPP,
 157     _PLUGIN_CLASSES as plugin_postprocessors
 158 )
 159 from .update import detect_variant
 160 from .version import __version__, RELEASE_GIT_HEAD
 161
 162 if compat_os_name == 'nt':
 163     import ctypes
 164
 165
 166 class YoutubeDL(object):
 167     """YoutubeDL class.
 168
 169     YoutubeDL objects are the ones responsible of downloading the
 170     actual video file and writing it to disk if the user has requested
 171     it, among some other tasks. In most cases there should be one per
 172     program. As, given a video URL, the downloader doesn't know how to
 173     extract all the needed information, task that InfoExtractors do, it
 174     has to pass the URL to one of them.
 175
 176     For this, YoutubeDL objects have a method that allows
 177     InfoExtractors to be registered in a given order. When it is passed
 178     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 179     finds that reports being able to handle it. The InfoExtractor extracts
 180     all the information about the video or videos the URL refers to, and
 181     YoutubeDL process the extracted information, possibly using a File
 182     Downloader to download the video.
 183
 184     YoutubeDL objects accept a lot of parameters. In order not to saturate
 185     the object constructor with arguments, it receives a dictionary of
 186     options instead. These options are available through the params
 187     attribute for the InfoExtractors to use. The YoutubeDL also
 188     registers itself as the downloader in charge for the InfoExtractors
 189     that are added to it, so this is a "mutual registration".
 190
 191     Available options:
 192
 193     username:          Username for authentication purposes.
 194     password:          Password for authentication purposes.
 195     videopassword:     Password for accessing a video.
 196     ap_mso:            Adobe Pass multiple-system operator identifier.
 197     ap_username:       Multiple-system operator account username.
 198     ap_password:       Multiple-system operator account password.
 199     usenetrc:          Use netrc for authentication instead.
 200     verbose:           Print additional info to stdout.
 201     quiet:             Do not print messages to stdout.
 202     no_warnings:       Do not print out anything for warnings.
 203     forceprint:        A dict with keys video/playlist mapped to
 204                        a list of templates to force print to stdout
 205                        For compatibility, a single list is also accepted
 206     forceurl:          Force printing final URL. (Deprecated)
 207     forcetitle:        Force printing title. (Deprecated)
 208     forceid:           Force printing ID. (Deprecated)
 209     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 210     forcedescription:  Force printing description. (Deprecated)
 211     forcefilename:     Force printing final filename. (Deprecated)
 212     forceduration:     Force printing duration. (Deprecated)
 213     forcejson:         Force printing info_dict as JSON.
 214     dump_single_json:  Force printing the info_dict of the whole playlist
 215                        (or video) as a single JSON line.
 216     force_write_download_archive: Force writing download archive regardless
 217                        of 'skip_download' or 'simulate'.
 218     simulate:          Do not download the video files. If unset (or None),
 219                        simulate only if listsubtitles, listformats or list_thumbnails is used
 220     format:            Video format code. see "FORMAT SELECTION" for more details.
 221                        You can also pass a function. The function takes 'ctx' as
 222                        argument and returns the formats to download.
 223                        See "build_format_selector" for an implementation
 224     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 225     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 226                        extracting metadata even if the video is not actually
 227                        available for download (experimental)
 228     format_sort:       A list of fields by which to sort the video formats.
 229                        See "Sorting Formats" for more details.
 230     format_sort_force: Force the given format_sort. see "Sorting Formats"
 231                        for more details.
 232     allow_multiple_video_streams:   Allow multiple video streams to be merged
 233                        into a single file
 234     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 235                        into a single file
 236     check_formats      Whether to test if the formats are downloadable.
 237                        Can be True (check all), False (check none),
 238                        'selected' (check selected formats),
 239                        or None (check only if requested by extractor)
 240     paths:             Dictionary of output paths. The allowed keys are 'home'
 241                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 242     outtmpl:           Dictionary of templates for output names. Allowed keys
 243                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 244                        For compatibility with youtube-dl, a single string can also be used
 245     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 246     restrictfilenames: Do not allow "&" and spaces in file names
 247     trim_file_name:    Limit length of filename (extension excluded)
 248     windowsfilenames:  Force the filenames to be windows compatible
 249     ignoreerrors:      Do not stop on download/postprocessing errors.
 250                        Can be 'only_download' to ignore only download errors.
 251                        Default is 'only_download' for CLI, but False for API
 252     skip_playlist_after_errors: Number of allowed failures until the rest of
 253                        the playlist is skipped
 254     force_generic_extractor: Force downloader to use the generic extractor
 255     overwrites:        Overwrite all video and metadata files if True,
 256                        overwrite only non-video files if None
 257                        and don't overwrite any file if False
 258                        For compatibility with youtube-dl,
 259                        "nooverwrites" may also be used instead
 260     playliststart:     Playlist item to start at.
 261     playlistend:       Playlist item to end at.
 262     playlist_items:    Specific indices of playlist to download.
 263     playlistreverse:   Download playlist items in reverse order.
 264     playlistrandom:    Download playlist items in random order.
 265     matchtitle:        Download only matching titles.
 266     rejecttitle:       Reject downloads for matching titles.
 267     logger:            Log messages to a logging.Logger instance.
 268     logtostderr:       Log messages to stderr instead of stdout.
 269     consoletitle:       Display progress in console window's titlebar.
 270     writedescription:  Write the video description to a .description file
 271     writeinfojson:     Write the video description to a .info.json file
 272     clean_infojson:    Remove private fields from the infojson
 273     getcomments:       Extract video comments. This will not be written to disk
 274                        unless writeinfojson is also given
 275     writeannotations:  Write the video annotations to a .annotations.xml file
 276     writethumbnail:    Write the thumbnail image to a file
 277     allow_playlist_files: Whether to write playlists' description, infojson etc
 278                        also to disk when using the 'write*' options
 279     write_all_thumbnails:  Write all thumbnail formats to files
 280     writelink:         Write an internet shortcut file, depending on the
 281                        current platform (.url/.webloc/.desktop)
 282     writeurllink:      Write a Windows internet shortcut file (.url)
 283     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 284     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 285     writesubtitles:    Write the video subtitles to a file
 286     writeautomaticsub: Write the automatically generated subtitles to a file
 287     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 288                        Downloads all the subtitles of the video
 289                        (requires writesubtitles or writeautomaticsub)
 290     listsubtitles:     Lists all available subtitles for the video
 291     subtitlesformat:   The format code for subtitles
 292     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 293                        The list may contain "all" to refer to all the available
 294                        subtitles. The language can be prefixed with a "-" to
 295                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 296     keepvideo:         Keep the video file after post-processing
 297     daterange:         A DateRange object, download only if the upload_date is in the range.
 298     skip_download:     Skip the actual download of the video file
 299     cachedir:          Location of the cache files in the filesystem.
 300                        False to disable filesystem cache.
 301     noplaylist:        Download single video instead of a playlist if in doubt.
 302     age_limit:         An integer representing the user's age in years.
 303                        Unsuitable videos for the given age are skipped.
 304     min_views:         An integer representing the minimum view count the video
 305                        must have in order to not be skipped.
 306                        Videos without view count information are always
 307                        downloaded. None for no limit.
 308     max_views:         An integer representing the maximum view count.
 309                        Videos that are more popular than that are not
 310                        downloaded.
 311                        Videos without view count information are always
 312                        downloaded. None for no limit.
 313     download_archive:  File name of a file where all downloads are recorded.
 314                        Videos already present in the file are not downloaded
 315                        again.
 316     break_on_existing: Stop the download process after attempting to download a
 317                        file that is in the archive.
 318     break_on_reject:   Stop the download process when encountering a video that
 319                        has been filtered out.
 320     break_per_url:     Whether break_on_reject and break_on_existing
 321                        should act on each input URL as opposed to for the entire queue
 322     cookiefile:        File name where cookies should be read from and dumped to
 323     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 324                        name/pathfrom where cookies are loaded, and the name of the
 325                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 326     nocheckcertificate:  Do not verify SSL certificates
 327     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 328                        At the moment, this is only supported by YouTube.
 329     proxy:             URL of the proxy server to use
 330     geo_verification_proxy:  URL of the proxy to use for IP address verification
 331                        on geo-restricted sites.
 332     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 333     bidi_workaround:   Work around buggy terminals without bidirectional text
 334                        support, using fridibi
 335     debug_printtraffic:Print out sent and received HTTP traffic
 336     include_ads:       Download ads as well (deprecated)
 337     default_search:    Prepend this string if an input url is not valid.
 338                        'auto' for elaborate guessing
 339     encoding:          Use this encoding instead of the system-specified.
 340     extract_flat:      Do not resolve URLs, return the immediate result.
 341                        Pass in 'in_playlist' to only show this behavior for
 342                        playlist items.
 343     wait_for_video:    If given, wait for scheduled streams to become available.
 344                        The value should be a tuple containing the range
 345                        (min_secs, max_secs) to wait between retries
 346     postprocessors:    A list of dictionaries, each with an entry
 347                        * key:  The name of the postprocessor. See
 348                                yt_dlp/postprocessor/__init__.py for a list.
 349                        * when: When to run the postprocessor. Can be one of
 350                                pre_process|before_dl|post_process|after_move.
 351                                Assumed to be 'post_process' if not given
 352     post_hooks:        Deprecated - Register a custom postprocessor instead
 353                        A list of functions that get called as the final step
 354                        for each video file, after all postprocessors have been
 355                        called. The filename will be passed as the only argument.
 356     progress_hooks:    A list of functions that get called on download
 357                        progress, with a dictionary with the entries
 358                        * status: One of "downloading", "error", or "finished".
 359                                  Check this first and ignore unknown values.
 360                        * info_dict: The extracted info_dict
 361
 362                        If status is one of "downloading", or "finished", the
 363                        following properties may also be present:
 364                        * filename: The final filename (always present)
 365                        * tmpfilename: The filename we're currently writing to
 366                        * downloaded_bytes: Bytes on disk
 367                        * total_bytes: Size of the whole file, None if unknown
 368                        * total_bytes_estimate: Guess of the eventual file size,
 369                                                None if unavailable.
 370                        * elapsed: The number of seconds since download started.
 371                        * eta: The estimated time in seconds, None if unknown
 372                        * speed: The download speed in bytes/second, None if
 373                                 unknown
 374                        * fragment_index: The counter of the currently
 375                                          downloaded video fragment.
 376                        * fragment_count: The number of fragments (= individual
 377                                          files that will be merged)
 378
 379                        Progress hooks are guaranteed to be called at least once
 380                        (with status "finished") if the download is successful.
 381     postprocessor_hooks:  A list of functions that get called on postprocessing
 382                        progress, with a dictionary with the entries
 383                        * status: One of "started", "processing", or "finished".
 384                                  Check this first and ignore unknown values.
 385                        * postprocessor: Name of the postprocessor
 386                        * info_dict: The extracted info_dict
 387
 388                        Progress hooks are guaranteed to be called at least twice
 389                        (with status "started" and "finished") if the processing is successful.
 390     merge_output_format: Extension to use when merging formats.
 391     final_ext:         Expected final extension; used to detect when the file was
 392                        already downloaded and converted
 393     fixup:             Automatically correct known faults of the file.
 394                        One of:
 395                        - "never": do nothing
 396                        - "warn": only emit a warning
 397                        - "detect_or_warn": check whether we can do anything
 398                                            about it, warn otherwise (default)
 399     source_address:    Client-side IP address to bind to.
 400     call_home:         Boolean, true iff we are allowed to contact the
 401                        yt-dlp servers for debugging. (BROKEN)
 402     sleep_interval_requests: Number of seconds to sleep between requests
 403                        during extraction
 404     sleep_interval:    Number of seconds to sleep before each download when
 405                        used alone or a lower bound of a range for randomized
 406                        sleep before each download (minimum possible number
 407                        of seconds to sleep) when used along with
 408                        max_sleep_interval.
 409     max_sleep_interval:Upper bound of a range for randomized sleep before each
 410                        download (maximum possible number of seconds to sleep).
 411                        Must only be used along with sleep_interval.
 412                        Actual sleep time will be a random float from range
 413                        [sleep_interval; max_sleep_interval].
 414     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 415     listformats:       Print an overview of available video formats and exit.
 416     list_thumbnails:   Print a table of all thumbnails and exit.
 417     match_filter:      A function that gets called with the info_dict of
 418                        every video.
 419                        If it returns a message, the video is ignored.
 420                        If it returns None, the video is downloaded.
 421                        match_filter_func in utils.py is one example for this.
 422     no_color:          Do not emit color codes in output.
 423     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 424                        HTTP header
 425     geo_bypass_country:
 426                        Two-letter ISO 3166-2 country code that will be used for
 427                        explicit geographic restriction bypassing via faking
 428                        X-Forwarded-For HTTP header
 429     geo_bypass_ip_block:
 430                        IP range in CIDR notation that will be used similarly to
 431                        geo_bypass_country
 432
 433     The following options determine which downloader is picked:
 434     external_downloader: A dictionary of protocol keys and the executable of the
 435                        external downloader to use for it. The allowed protocols
 436                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 437                        Set the value to 'native' to use the native downloader
 438     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 439                        or {'m3u8': 'ffmpeg'} instead.
 440                        Use the native HLS downloader instead of ffmpeg/avconv
 441                        if True, otherwise use ffmpeg/avconv if False, otherwise
 442                        use downloader suggested by extractor if None.
 443     compat_opts:       Compatibility options. See "Differences in default behavior".
 444                        The following options do not work when used through the API:
 445                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 446                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 447                        Refer __init__.py for their implementation
 448     progress_template: Dictionary of templates for progress outputs.
 449                        Allowed keys are 'download', 'postprocess',
 450                        'download-title' (console title) and 'postprocess-title'.
 451                        The template is mapped on a dictionary with keys 'progress' and 'info'
 452
 453     The following parameters are not used by YoutubeDL itself, they are used by
 454     the downloader (see yt_dlp/downloader/common.py):
 455     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 456     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 457     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 458     external_downloader_args, concurrent_fragment_downloads.
 459
 460     The following options are used by the post processors:
 461     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 462                        otherwise prefer ffmpeg. (avconv support is deprecated)
 463     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 464                        to the binary or its containing directory.
 465     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 466                        and a list of additional command-line arguments for the
 467                        postprocessor/executable. The dict can also have "PP+EXE" keys
 468                        which are used when the given exe is used by the given PP.
 469                        Use 'default' as the name for arguments to passed to all PP
 470                        For compatibility with youtube-dl, a single list of args
 471                        can also be used
 472
 473     The following options are used by the extractors:
 474     extractor_retries: Number of times to retry for known errors
 475     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 476     hls_split_discontinuity: Split HLS playlists to different formats at
 477                        discontinuities such as ad breaks (default: False)
 478     extractor_args:    A dictionary of arguments to be passed to the extractors.
 479                        See "EXTRACTOR ARGUMENTS" for details.
 480                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 481     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 482                        If True (default), DASH manifests and related
 483                        data will be downloaded and processed by extractor.
 484                        You can reduce network I/O by disabling it if you don't
 485                        care about DASH. (only for youtube)
 486     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 487                        If True (default), HLS manifests and related
 488                        data will be downloaded and processed by extractor.
 489                        You can reduce network I/O by disabling it if you don't
 490                        care about HLS. (only for youtube)
 491     """
 492
 493     _NUMERIC_FIELDS = set((
 494         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 495         'timestamp', 'release_timestamp',
 496         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 497         'average_rating', 'comment_count', 'age_limit',
 498         'start_time', 'end_time',
 499         'chapter_number', 'season_number', 'episode_number',
 500         'track_number', 'disc_number', 'release_year',
 501     ))
 502
 503     _format_selection_exts = {
 504         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 505         'video': {'mp4', 'flv', 'webm', '3gp'},
 506         'storyboards': {'mhtml'},
 507     }
 508
 509     params = None
 510     _ies = {}
 511     _pps = {k: [] for k in POSTPROCESS_WHEN}
 512     _printed_messages = set()
 513     _first_webpage_request = True
 514     _download_retcode = None
 515     _num_downloads = None
 516     _playlist_level = 0
 517     _playlist_urls = set()
 518     _screen_file = None
 519
 520     def __init__(self, params=None, auto_init=True):
 521         """Create a FileDownloader object with the given options.
 522         @param auto_init    Whether to load the default extractors and print header (if verbose).
 523                             Set to 'no_verbose_header' to not print the header
 524         """
 525         if params is None:
 526             params = {}
 527         self._ies = {}
 528         self._ies_instances = {}
 529         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 530         self._printed_messages = set()
 531         self._first_webpage_request = True
 532         self._post_hooks = []
 533         self._progress_hooks = []
 534         self._postprocessor_hooks = []
 535         self._download_retcode = 0
 536         self._num_downloads = 0
 537         self._num_videos = 0
 538         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 539         self._err_file = sys.stderr
 540         self.params = params
 541         self.cache = Cache(self)
 542
 543         windows_enable_vt_mode()
 544         self._allow_colors = {
 545             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 546             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 547         }
 548
 549         if sys.version_info < (3, 6):
 550             self.report_warning(
 551                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 552
 553         if self.params.get('allow_unplayable_formats'):
 554             self.report_warning(
 555                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 556                 'This is a developer option intended for debugging. \n'
 557                 '         If you experience any issues while using this option, '
 558                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 559
 560         def check_deprecated(param, option, suggestion):
 561             if self.params.get(param) is not None:
 562                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 563                 return True
 564             return False
 565
 566         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 567             if self.params.get('geo_verification_proxy') is None:
 568                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 569
 570         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 571         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 572         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 573
 574         for msg in self.params.get('_warnings', []):
 575             self.report_warning(msg)
 576         for msg in self.params.get('_deprecation_warnings', []):
 577             self.deprecation_warning(msg)
 578
 579         if 'list-formats' in self.params.get('compat_opts', []):
 580             self.params['listformats_table'] = False
 581
 582         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 583             # nooverwrites was unnecessarily changed to overwrites
 584             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 585             # This ensures compatibility with both keys
 586             self.params['overwrites'] = not self.params['nooverwrites']
 587         elif self.params.get('overwrites') is None:
 588             self.params.pop('overwrites', None)
 589         else:
 590             self.params['nooverwrites'] = not self.params['overwrites']
 591
 592         # Compatibility with older syntax
 593         params.setdefault('forceprint', {})
 594         if not isinstance(params['forceprint'], dict):
 595             params['forceprint'] = {'video': params['forceprint']}
 596
 597         if params.get('bidi_workaround', False):
 598             try:
 599                 import pty
 600                 master, slave = pty.openpty()
 601                 width = compat_get_terminal_size().columns
 602                 if width is None:
 603                     width_args = []
 604                 else:
 605                     width_args = ['-w', str(width)]
 606                 sp_kwargs = dict(
 607                     stdin=subprocess.PIPE,
 608                     stdout=slave,
 609                     stderr=self._err_file)
 610                 try:
 611                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 612                 except OSError:
 613                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 614                 self._output_channel = os.fdopen(master, 'rb')
 615             except OSError as ose:
 616                 if ose.errno == errno.ENOENT:
 617                     self.report_warning(
 618                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 619                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 620                 else:
 621                     raise
 622
 623         if (sys.platform != 'win32'
 624                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 625                 and not params.get('restrictfilenames', False)):
 626             # Unicode filesystem API will throw errors (#1474, #13027)
 627             self.report_warning(
 628                 'Assuming --restrict-filenames since file system encoding '
 629                 'cannot encode all characters. '
 630                 'Set the LC_ALL environment variable to fix this.')
 631             self.params['restrictfilenames'] = True
 632
 633         self.outtmpl_dict = self.parse_outtmpl()
 634
 635         # Creating format selector here allows us to catch syntax errors before the extraction
 636         self.format_selector = (
 637             self.params.get('format') if self.params.get('format') in (None, '-')
 638             else self.params['format'] if callable(self.params['format'])
 639             else self.build_format_selector(self.params['format']))
 640
 641         self._setup_opener()
 642
 643         if auto_init:
 644             if auto_init != 'no_verbose_header':
 645                 self.print_debug_header()
 646             self.add_default_info_extractors()
 647
 648         hooks = {
 649             'post_hooks': self.add_post_hook,
 650             'progress_hooks': self.add_progress_hook,
 651             'postprocessor_hooks': self.add_postprocessor_hook,
 652         }
 653         for opt, fn in hooks.items():
 654             for ph in self.params.get(opt, []):
 655                 fn(ph)
 656
 657         for pp_def_raw in self.params.get('postprocessors', []):
 658             pp_def = dict(pp_def_raw)
 659             when = pp_def.pop('when', 'post_process')
 660             self.add_post_processor(
 661                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 662                 when=when)
 663
 664         register_socks_protocols()
 665
 666         def preload_download_archive(fn):
 667             """Preload the archive, if any is specified"""
 668             if fn is None:
 669                 return False
 670             self.write_debug(f'Loading archive file {fn!r}')
 671             try:
 672                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 673                     for line in archive_file:
 674                         self.archive.add(line.strip())
 675             except IOError as ioe:
 676                 if ioe.errno != errno.ENOENT:
 677                     raise
 678                 return False
 679             return True
 680
 681         self.archive = set()
 682         preload_download_archive(self.params.get('download_archive'))
 683
 684     def warn_if_short_id(self, argv):
 685         # short YouTube ID starting with dash?
 686         idxs = [
 687             i for i, a in enumerate(argv)
 688             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 689         if idxs:
 690             correct_argv = (
 691                 ['yt-dlp']
 692                 + [a for i, a in enumerate(argv) if i not in idxs]
 693                 + ['--'] + [argv[i] for i in idxs]
 694             )
 695             self.report_warning(
 696                 'Long argument string detected. '
 697                 'Use -- to separate parameters and URLs, like this:\n%s' %
 698                 args_to_str(correct_argv))
 699
 700     def add_info_extractor(self, ie):
 701         """Add an InfoExtractor object to the end of the list."""
 702         ie_key = ie.ie_key()
 703         self._ies[ie_key] = ie
 704         if not isinstance(ie, type):
 705             self._ies_instances[ie_key] = ie
 706             ie.set_downloader(self)
 707
 708     def _get_info_extractor_class(self, ie_key):
 709         ie = self._ies.get(ie_key)
 710         if ie is None:
 711             ie = get_info_extractor(ie_key)
 712             self.add_info_extractor(ie)
 713         return ie
 714
 715     def get_info_extractor(self, ie_key):
 716         """
 717         Get an instance of an IE with name ie_key, it will try to get one from
 718         the _ies list, if there's no instance it will create a new one and add
 719         it to the extractor list.
 720         """
 721         ie = self._ies_instances.get(ie_key)
 722         if ie is None:
 723             ie = get_info_extractor(ie_key)()
 724             self.add_info_extractor(ie)
 725         return ie
 726
 727     def add_default_info_extractors(self):
 728         """
 729         Add the InfoExtractors returned by gen_extractors to the end of the list
 730         """
 731         for ie in gen_extractor_classes():
 732             self.add_info_extractor(ie)
 733
 734     def add_post_processor(self, pp, when='post_process'):
 735         """Add a PostProcessor object to the end of the chain."""
 736         self._pps[when].append(pp)
 737         pp.set_downloader(self)
 738
 739     def add_post_hook(self, ph):
 740         """Add the post hook"""
 741         self._post_hooks.append(ph)
 742
 743     def add_progress_hook(self, ph):
 744         """Add the download progress hook"""
 745         self._progress_hooks.append(ph)
 746
 747     def add_postprocessor_hook(self, ph):
 748         """Add the postprocessing progress hook"""
 749         self._postprocessor_hooks.append(ph)
 750         for pps in self._pps.values():
 751             for pp in pps:
 752                 pp.add_progress_hook(ph)
 753
 754     def _bidi_workaround(self, message):
 755         if not hasattr(self, '_output_channel'):
 756             return message
 757
 758         assert hasattr(self, '_output_process')
 759         assert isinstance(message, compat_str)
 760         line_count = message.count('\n') + 1
 761         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 762         self._output_process.stdin.flush()
 763         res = ''.join(self._output_channel.readline().decode('utf-8')
 764                       for _ in range(line_count))
 765         return res[:-len('\n')]
 766
 767     def _write_string(self, message, out=None, only_once=False):
 768         if only_once:
 769             if message in self._printed_messages:
 770                 return
 771             self._printed_messages.add(message)
 772         write_string(message, out=out, encoding=self.params.get('encoding'))
 773
 774     def to_stdout(self, message, skip_eol=False, quiet=False):
 775         """Print message to stdout"""
 776         if self.params.get('logger'):
 777             self.params['logger'].debug(message)
 778         elif not quiet or self.params.get('verbose'):
 779             self._write_string(
 780                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 781                 self._err_file if quiet else self._screen_file)
 782
 783     def to_stderr(self, message, only_once=False):
 784         """Print message to stderr"""
 785         assert isinstance(message, compat_str)
 786         if self.params.get('logger'):
 787             self.params['logger'].error(message)
 788         else:
 789             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 790
 791     def to_console_title(self, message):
 792         if not self.params.get('consoletitle', False):
 793             return
 794         message = remove_terminal_sequences(message)
 795         if compat_os_name == 'nt':
 796             if ctypes.windll.kernel32.GetConsoleWindow():
 797                 # c_wchar_p() might not be necessary if `message` is
 798                 # already of type unicode()
 799                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 800         elif 'TERM' in os.environ:
 801             self._write_string('\033]0;%s\007' % message, self._screen_file)
 802
 803     def save_console_title(self):
 804         if not self.params.get('consoletitle', False):
 805             return
 806         if self.params.get('simulate'):
 807             return
 808         if compat_os_name != 'nt' and 'TERM' in os.environ:
 809             # Save the title on stack
 810             self._write_string('\033[22;0t', self._screen_file)
 811
 812     def restore_console_title(self):
 813         if not self.params.get('consoletitle', False):
 814             return
 815         if self.params.get('simulate'):
 816             return
 817         if compat_os_name != 'nt' and 'TERM' in os.environ:
 818             # Restore the title from stack
 819             self._write_string('\033[23;0t', self._screen_file)
 820
 821     def __enter__(self):
 822         self.save_console_title()
 823         return self
 824
 825     def __exit__(self, *args):
 826         self.restore_console_title()
 827
 828         if self.params.get('cookiefile') is not None:
 829             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 830
 831     def trouble(self, message=None, tb=None, is_error=True):
 832         """Determine action to take when a download problem appears.
 833
 834         Depending on if the downloader has been configured to ignore
 835         download errors or not, this method may throw an exception or
 836         not when errors are found, after printing the message.
 837
 838         @param tb          If given, is additional traceback information
 839         @param is_error    Whether to raise error according to ignorerrors
 840         """
 841         if message is not None:
 842             self.to_stderr(message)
 843         if self.params.get('verbose'):
 844             if tb is None:
 845                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 846                     tb = ''
 847                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 848                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 849                     tb += encode_compat_str(traceback.format_exc())
 850                 else:
 851                     tb_data = traceback.format_list(traceback.extract_stack())
 852                     tb = ''.join(tb_data)
 853             if tb:
 854                 self.to_stderr(tb)
 855         if not is_error:
 856             return
 857         if not self.params.get('ignoreerrors'):
 858             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 859                 exc_info = sys.exc_info()[1].exc_info
 860             else:
 861                 exc_info = sys.exc_info()
 862             raise DownloadError(message, exc_info)
 863         self._download_retcode = 1
 864
 865     def to_screen(self, message, skip_eol=False):
 866         """Print message to stdout if not in quiet mode"""
 867         self.to_stdout(
 868             message, skip_eol, quiet=self.params.get('quiet', False))
 869
 870     class Styles(Enum):
 871         HEADERS = 'yellow'
 872         EMPHASIS = 'light blue'
 873         ID = 'green'
 874         DELIM = 'blue'
 875         ERROR = 'red'
 876         WARNING = 'yellow'
 877         SUPPRESS = 'light black'
 878
 879     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 880         if test_encoding:
 881             original_text = text
 882             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 883             text = text.encode(encoding, 'ignore').decode(encoding)
 884             if fallback is not None and text != original_text:
 885                 text = fallback
 886         if isinstance(f, self.Styles):
 887             f = f.value
 888         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 889
 890     def _format_screen(self, *args, **kwargs):
 891         return self._format_text(
 892             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 893
 894     def _format_err(self, *args, **kwargs):
 895         return self._format_text(
 896             self._err_file, self._allow_colors['err'], *args, **kwargs)
 897
 898     def report_warning(self, message, only_once=False):
 899         '''
 900         Print the message to stderr, it will be prefixed with 'WARNING:'
 901         If stderr is a tty file the 'WARNING:' will be colored
 902         '''
 903         if self.params.get('logger') is not None:
 904             self.params['logger'].warning(message)
 905         else:
 906             if self.params.get('no_warnings'):
 907                 return
 908             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 909
 910     def deprecation_warning(self, message):
 911         if self.params.get('logger') is not None:
 912             self.params['logger'].warning('DeprecationWarning: {message}')
 913         else:
 914             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 915
 916     def report_error(self, message, *args, **kwargs):
 917         '''
 918         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 919         in red if stderr is a tty file.
 920         '''
 921         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 922
 923     def write_debug(self, message, only_once=False):
 924         '''Log debug message or Print message to stderr'''
 925         if not self.params.get('verbose', False):
 926             return
 927         message = '[debug] %s' % message
 928         if self.params.get('logger'):
 929             self.params['logger'].debug(message)
 930         else:
 931             self.to_stderr(message, only_once)
 932
 933     def report_file_already_downloaded(self, file_name):
 934         """Report file has already been fully downloaded."""
 935         try:
 936             self.to_screen('[download] %s has already been downloaded' % file_name)
 937         except UnicodeEncodeError:
 938             self.to_screen('[download] The file has already been downloaded')
 939
 940     def report_file_delete(self, file_name):
 941         """Report that existing file will be deleted."""
 942         try:
 943             self.to_screen('Deleting existing file %s' % file_name)
 944         except UnicodeEncodeError:
 945             self.to_screen('Deleting existing file')
 946
 947     def raise_no_formats(self, info, forced=False):
 948         has_drm = info.get('__has_drm')
 949         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 950         expected = self.params.get('ignore_no_formats_error')
 951         if forced or not expected:
 952             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 953                                  expected=has_drm or expected)
 954         else:
 955             self.report_warning(msg)
 956
 957     def parse_outtmpl(self):
 958         outtmpl_dict = self.params.get('outtmpl', {})
 959         if not isinstance(outtmpl_dict, dict):
 960             outtmpl_dict = {'default': outtmpl_dict}
 961         # Remove spaces in the default template
 962         if self.params.get('restrictfilenames'):
 963             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 964         else:
 965             sanitize = lambda x: x
 966         outtmpl_dict.update({
 967             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 968             if outtmpl_dict.get(k) is None})
 969         for key, val in outtmpl_dict.items():
 970             if isinstance(val, bytes):
 971                 self.report_warning(
 972                     'Parameter outtmpl is bytes, but should be a unicode string. '
 973                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 974         return outtmpl_dict
 975
 976     def get_output_path(self, dir_type='', filename=None):
 977         paths = self.params.get('paths', {})
 978         assert isinstance(paths, dict)
 979         path = os.path.join(
 980             expand_path(paths.get('home', '').strip()),
 981             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 982             filename or '')
 983
 984         # Temporary fix for #4787
 985         # 'Treat' all problem characters by passing filename through preferredencoding
 986         # to workaround encoding issues with subprocess on python2 @ Windows
 987         if sys.version_info < (3, 0) and sys.platform == 'win32':
 988             path = encodeFilename(path, True).decode(preferredencoding())
 989         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 990
 991     @staticmethod
 992     def _outtmpl_expandpath(outtmpl):
 993         # expand_path translates '%%' into '%' and '$$' into '$'
 994         # correspondingly that is not what we want since we need to keep
 995         # '%%' intact for template dict substitution step. Working around
 996         # with boundary-alike separator hack.
 997         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 998         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 999
1000         # outtmpl should be expand_path'ed before template dict substitution
1001         # because meta fields may contain env variables we don't want to
1002         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1003         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1004         return expand_path(outtmpl).replace(sep, '')
1005
1006     @staticmethod
1007     def escape_outtmpl(outtmpl):
1008         ''' Escape any remaining strings like %s, %abc% etc. '''
1009         return re.sub(
1010             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1011             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1012             outtmpl)
1013
1014     @classmethod
1015     def validate_outtmpl(cls, outtmpl):
1016         ''' @return None or Exception object '''
1017         outtmpl = re.sub(
1018             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1019             lambda mobj: f'{mobj.group(0)[:-1]}s',
1020             cls._outtmpl_expandpath(outtmpl))
1021         try:
1022             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1023             return None
1024         except ValueError as err:
1025             return err
1026
1027     @staticmethod
1028     def _copy_infodict(info_dict):
1029         info_dict = dict(info_dict)
1030         for key in ('__original_infodict', '__postprocessors'):
1031             info_dict.pop(key, None)
1032         return info_dict
1033
1034     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1035         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1036         @param sanitize    Whether to sanitize the output as a filename.
1037                            For backward compatibility, a function can also be passed
1038         """
1039
1040         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1041
1042         info_dict = self._copy_infodict(info_dict)
1043         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1044             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1045             if info_dict.get('duration', None) is not None
1046             else None)
1047         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1048         info_dict['video_autonumber'] = self._num_videos
1049         if info_dict.get('resolution') is None:
1050             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1051
1052         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1053         # of %(field)s to %(field)0Nd for backward compatibility
1054         field_size_compat_map = {
1055             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1056             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1057             'autonumber': self.params.get('autonumber_size') or 5,
1058         }
1059
1060         TMPL_DICT = {}
1061         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1062         MATH_FUNCTIONS = {
1063             '+': float.__add__,
1064             '-': float.__sub__,
1065         }
1066         # Field is of the form key1.key2...
1067         # where keys (except first) can be string, int or slice
1068         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1069         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1070         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1071         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1072             (?P<negate>-)?
1073             (?P<fields>{field})
1074             (?P<maths>(?:{math_op}{math_field})*)
1075             (?:>(?P<strf_format>.+?))?
1076             (?P<alternate>(?<!\\),[^|&)]+)?
1077             (?:&(?P<replacement>.*?))?
1078             (?:\|(?P<default>.*?))?
1079             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1080
1081         def _traverse_infodict(k):
1082             k = k.split('.')
1083             if k[0] == '':
1084                 k.pop(0)
1085             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1086
1087         def get_value(mdict):
1088             # Object traversal
1089             value = _traverse_infodict(mdict['fields'])
1090             # Negative
1091             if mdict['negate']:
1092                 value = float_or_none(value)
1093                 if value is not None:
1094                     value *= -1
1095             # Do maths
1096             offset_key = mdict['maths']
1097             if offset_key:
1098                 value = float_or_none(value)
1099                 operator = None
1100                 while offset_key:
1101                     item = re.match(
1102                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1103                         offset_key).group(0)
1104                     offset_key = offset_key[len(item):]
1105                     if operator is None:
1106                         operator = MATH_FUNCTIONS[item]
1107                         continue
1108                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1109                     offset = float_or_none(item)
1110                     if offset is None:
1111                         offset = float_or_none(_traverse_infodict(item))
1112                     try:
1113                         value = operator(value, multiplier * offset)
1114                     except (TypeError, ZeroDivisionError):
1115                         return None
1116                     operator = None
1117             # Datetime formatting
1118             if mdict['strf_format']:
1119                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1120
1121             return value
1122
1123         na = self.params.get('outtmpl_na_placeholder', 'NA')
1124
1125         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1126             return sanitize_filename(str(value), restricted=restricted,
1127                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1128
1129         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1130         sanitize = bool(sanitize)
1131
1132         def _dumpjson_default(obj):
1133             if isinstance(obj, (set, LazyList)):
1134                 return list(obj)
1135             return repr(obj)
1136
1137         def create_key(outer_mobj):
1138             if not outer_mobj.group('has_key'):
1139                 return outer_mobj.group(0)
1140             key = outer_mobj.group('key')
1141             mobj = re.match(INTERNAL_FORMAT_RE, key)
1142             initial_field = mobj.group('fields') if mobj else ''
1143             value, replacement, default = None, None, na
1144             while mobj:
1145                 mobj = mobj.groupdict()
1146                 default = mobj['default'] if mobj['default'] is not None else default
1147                 value = get_value(mobj)
1148                 replacement = mobj['replacement']
1149                 if value is None and mobj['alternate']:
1150                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1151                 else:
1152                     break
1153
1154             fmt = outer_mobj.group('format')
1155             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1156                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1157
1158             value = default if value is None else value if replacement is None else replacement
1159
1160             flags = outer_mobj.group('conversion') or ''
1161             str_fmt = f'{fmt[:-1]}s'
1162             if fmt[-1] == 'l':  # list
1163                 delim = '\n' if '#' in flags else ', '
1164                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1165             elif fmt[-1] == 'j':  # json
1166                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1167             elif fmt[-1] == 'q':  # quoted
1168                 value = map(str, variadic(value) if '#' in flags else [value])
1169                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1170             elif fmt[-1] == 'B':  # bytes
1171                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1172                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1173             elif fmt[-1] == 'U':  # unicode normalized
1174                 value, fmt = unicodedata.normalize(
1175                     # "+" = compatibility equivalence, "#" = NFD
1176                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1177                     value), str_fmt
1178             elif fmt[-1] == 'D':  # decimal suffix
1179                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1180                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1181                                               factor=1024 if '#' in flags else 1000)
1182             elif fmt[-1] == 'S':  # filename sanitization
1183                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1184             elif fmt[-1] == 'c':
1185                 if value:
1186                     value = str(value)[0]
1187                 else:
1188                     fmt = str_fmt
1189             elif fmt[-1] not in 'rs':  # numeric
1190                 value = float_or_none(value)
1191                 if value is None:
1192                     value, fmt = default, 's'
1193
1194             if sanitize:
1195                 if fmt[-1] == 'r':
1196                     # If value is an object, sanitize might convert it to a string
1197                     # So we convert it to repr first
1198                     value, fmt = repr(value), str_fmt
1199                 if fmt[-1] in 'csr':
1200                     value = sanitizer(initial_field, value)
1201
1202             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1203             TMPL_DICT[key] = value
1204             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1205
1206         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1207
1208     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1209         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1210         return self.escape_outtmpl(outtmpl) % info_dict
1211
1212     def _prepare_filename(self, info_dict, tmpl_type='default'):
1213         try:
1214             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1215             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1216
1217             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1218             if filename and force_ext is not None:
1219                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1220
1221             # https://github.com/blackjack4494/youtube-dlc/issues/85
1222             trim_file_name = self.params.get('trim_file_name', False)
1223             if trim_file_name:
1224                 no_ext, *ext = filename.rsplit('.', 2)
1225                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1226
1227             return filename
1228         except ValueError as err:
1229             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1230             return None
1231
1232     def prepare_filename(self, info_dict, dir_type='', warn=False):
1233         """Generate the output filename."""
1234
1235         filename = self._prepare_filename(info_dict, dir_type or 'default')
1236         if not filename and dir_type not in ('', 'temp'):
1237             return ''
1238
1239         if warn:
1240             if not self.params.get('paths'):
1241                 pass
1242             elif filename == '-':
1243                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1244             elif os.path.isabs(filename):
1245                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1246         if filename == '-' or not filename:
1247             return filename
1248
1249         return self.get_output_path(dir_type, filename)
1250
1251     def _match_entry(self, info_dict, incomplete=False, silent=False):
1252         """ Returns None if the file should be downloaded """
1253
1254         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1255
1256         def check_filter():
1257             if 'title' in info_dict:
1258                 # This can happen when we're just evaluating the playlist
1259                 title = info_dict['title']
1260                 matchtitle = self.params.get('matchtitle', False)
1261                 if matchtitle:
1262                     if not re.search(matchtitle, title, re.IGNORECASE):
1263                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1264                 rejecttitle = self.params.get('rejecttitle', False)
1265                 if rejecttitle:
1266                     if re.search(rejecttitle, title, re.IGNORECASE):
1267                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1268             date = info_dict.get('upload_date')
1269             if date is not None:
1270                 dateRange = self.params.get('daterange', DateRange())
1271                 if date not in dateRange:
1272                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1273             view_count = info_dict.get('view_count')
1274             if view_count is not None:
1275                 min_views = self.params.get('min_views')
1276                 if min_views is not None and view_count < min_views:
1277                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1278                 max_views = self.params.get('max_views')
1279                 if max_views is not None and view_count > max_views:
1280                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1281             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1282                 return 'Skipping "%s" because it is age restricted' % video_title
1283
1284             match_filter = self.params.get('match_filter')
1285             if match_filter is not None:
1286                 try:
1287                     ret = match_filter(info_dict, incomplete=incomplete)
1288                 except TypeError:
1289                     # For backward compatibility
1290                     ret = None if incomplete else match_filter(info_dict)
1291                 if ret is not None:
1292                     return ret
1293             return None
1294
1295         if self.in_download_archive(info_dict):
1296             reason = '%s has already been recorded in the archive' % video_title
1297             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1298         else:
1299             reason = check_filter()
1300             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1301         if reason is not None:
1302             if not silent:
1303                 self.to_screen('[download] ' + reason)
1304             if self.params.get(break_opt, False):
1305                 raise break_err()
1306         return reason
1307
1308     @staticmethod
1309     def add_extra_info(info_dict, extra_info):
1310         '''Set the keys from extra_info in info dict if they are missing'''
1311         for key, value in extra_info.items():
1312             info_dict.setdefault(key, value)
1313
1314     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1315                      process=True, force_generic_extractor=False):
1316         """
1317         Return a list with a dictionary for each video extracted.
1318
1319         Arguments:
1320         url -- URL to extract
1321
1322         Keyword arguments:
1323         download -- whether to download videos during extraction
1324         ie_key -- extractor key hint
1325         extra_info -- dictionary containing the extra values to add to each result
1326         process -- whether to resolve all unresolved references (URLs, playlist items),
1327             must be True for download to work.
1328         force_generic_extractor -- force using the generic extractor
1329         """
1330
1331         if extra_info is None:
1332             extra_info = {}
1333
1334         if not ie_key and force_generic_extractor:
1335             ie_key = 'Generic'
1336
1337         if ie_key:
1338             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1339         else:
1340             ies = self._ies
1341
1342         for ie_key, ie in ies.items():
1343             if not ie.suitable(url):
1344                 continue
1345
1346             if not ie.working():
1347                 self.report_warning('The program functionality for this site has been marked as broken, '
1348                                     'and will probably not work.')
1349
1350             temp_id = ie.get_temp_id(url)
1351             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1352                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1353                 if self.params.get('break_on_existing', False):
1354                     raise ExistingVideoReached()
1355                 break
1356             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1357         else:
1358             self.report_error('no suitable InfoExtractor for URL %s' % url)
1359
1360     def __handle_extraction_exceptions(func):
1361         @functools.wraps(func)
1362         def wrapper(self, *args, **kwargs):
1363             while True:
1364                 try:
1365                     return func(self, *args, **kwargs)
1366                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1367                     raise
1368                 except ReExtractInfo as e:
1369                     if e.expected:
1370                         self.to_screen(f'{e}; Re-extracting data')
1371                     else:
1372                         self.to_stderr('\r')
1373                         self.report_warning(f'{e}; Re-extracting data')
1374                     continue
1375                 except GeoRestrictedError as e:
1376                     msg = e.msg
1377                     if e.countries:
1378                         msg += '\nThis video is available in %s.' % ', '.join(
1379                             map(ISO3166Utils.short2full, e.countries))
1380                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1381                     self.report_error(msg)
1382                 except ExtractorError as e:  # An error we somewhat expected
1383                     self.report_error(str(e), e.format_traceback())
1384                 except Exception as e:
1385                     if self.params.get('ignoreerrors'):
1386                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1387                     else:
1388                         raise
1389                 break
1390         return wrapper
1391
1392     def _wait_for_video(self, ie_result):
1393         if (not self.params.get('wait_for_video')
1394                 or ie_result.get('_type', 'video') != 'video'
1395                 or ie_result.get('formats') or ie_result.get('url')):
1396             return
1397
1398         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1399         last_msg = ''
1400
1401         def progress(msg):
1402             nonlocal last_msg
1403             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1404             last_msg = msg
1405
1406         min_wait, max_wait = self.params.get('wait_for_video')
1407         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1408         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1409             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1410             self.report_warning('Release time of video is not known')
1411         elif (diff or 0) <= 0:
1412             self.report_warning('Video should already be available according to extracted info')
1413         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1414         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1415
1416         wait_till = time.time() + diff
1417         try:
1418             while True:
1419                 diff = wait_till - time.time()
1420                 if diff <= 0:
1421                     progress('')
1422                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1423                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1424                 time.sleep(1)
1425         except KeyboardInterrupt:
1426             progress('')
1427             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1428         except BaseException as e:
1429             if not isinstance(e, ReExtractInfo):
1430                 self.to_screen('')
1431             raise
1432
1433     @__handle_extraction_exceptions
1434     def __extract_info(self, url, ie, download, extra_info, process):
1435         ie_result = ie.extract(url)
1436         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1437             return
1438         if isinstance(ie_result, list):
1439             # Backwards compatibility: old IE result format
1440             ie_result = {
1441                 '_type': 'compat_list',
1442                 'entries': ie_result,
1443             }
1444         if extra_info.get('original_url'):
1445             ie_result.setdefault('original_url', extra_info['original_url'])
1446         self.add_default_extra_info(ie_result, ie, url)
1447         if process:
1448             self._wait_for_video(ie_result)
1449             return self.process_ie_result(ie_result, download, extra_info)
1450         else:
1451             return ie_result
1452
1453     def add_default_extra_info(self, ie_result, ie, url):
1454         if url is not None:
1455             self.add_extra_info(ie_result, {
1456                 'webpage_url': url,
1457                 'original_url': url,
1458                 'webpage_url_basename': url_basename(url),
1459                 'webpage_url_domain': get_domain(url),
1460             })
1461         if ie is not None:
1462             self.add_extra_info(ie_result, {
1463                 'extractor': ie.IE_NAME,
1464                 'extractor_key': ie.ie_key(),
1465             })
1466
1467     def process_ie_result(self, ie_result, download=True, extra_info=None):
1468         """
1469         Take the result of the ie(may be modified) and resolve all unresolved
1470         references (URLs, playlist items).
1471
1472         It will also download the videos if 'download'.
1473         Returns the resolved ie_result.
1474         """
1475         if extra_info is None:
1476             extra_info = {}
1477         result_type = ie_result.get('_type', 'video')
1478
1479         if result_type in ('url', 'url_transparent'):
1480             ie_result['url'] = sanitize_url(ie_result['url'])
1481             if ie_result.get('original_url'):
1482                 extra_info.setdefault('original_url', ie_result['original_url'])
1483
1484             extract_flat = self.params.get('extract_flat', False)
1485             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1486                     or extract_flat is True):
1487                 info_copy = ie_result.copy()
1488                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1489                 if ie and not ie_result.get('id'):
1490                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1491                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1492                 self.add_extra_info(info_copy, extra_info)
1493                 info_copy, _ = self.pre_process(info_copy)
1494                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1495                 if self.params.get('force_write_download_archive', False):
1496                     self.record_download_archive(info_copy)
1497                 return ie_result
1498
1499         if result_type == 'video':
1500             self.add_extra_info(ie_result, extra_info)
1501             ie_result = self.process_video_result(ie_result, download=download)
1502             additional_urls = (ie_result or {}).get('additional_urls')
1503             if additional_urls:
1504                 # TODO: Improve MetadataParserPP to allow setting a list
1505                 if isinstance(additional_urls, compat_str):
1506                     additional_urls = [additional_urls]
1507                 self.to_screen(
1508                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1509                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1510                 ie_result['additional_entries'] = [
1511                     self.extract_info(
1512                         url, download, extra_info=extra_info,
1513                         force_generic_extractor=self.params.get('force_generic_extractor'))
1514                     for url in additional_urls
1515                 ]
1516             return ie_result
1517         elif result_type == 'url':
1518             # We have to add extra_info to the results because it may be
1519             # contained in a playlist
1520             return self.extract_info(
1521                 ie_result['url'], download,
1522                 ie_key=ie_result.get('ie_key'),
1523                 extra_info=extra_info)
1524         elif result_type == 'url_transparent':
1525             # Use the information from the embedding page
1526             info = self.extract_info(
1527                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1528                 extra_info=extra_info, download=False, process=False)
1529
1530             # extract_info may return None when ignoreerrors is enabled and
1531             # extraction failed with an error, don't crash and return early
1532             # in this case
1533             if not info:
1534                 return info
1535
1536             force_properties = dict(
1537                 (k, v) for k, v in ie_result.items() if v is not None)
1538             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1539                 if f in force_properties:
1540                     del force_properties[f]
1541             new_result = info.copy()
1542             new_result.update(force_properties)
1543
1544             # Extracted info may not be a video result (i.e.
1545             # info.get('_type', 'video') != video) but rather an url or
1546             # url_transparent. In such cases outer metadata (from ie_result)
1547             # should be propagated to inner one (info). For this to happen
1548             # _type of info should be overridden with url_transparent. This
1549             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1550             if new_result.get('_type') == 'url':
1551                 new_result['_type'] = 'url_transparent'
1552
1553             return self.process_ie_result(
1554                 new_result, download=download, extra_info=extra_info)
1555         elif result_type in ('playlist', 'multi_video'):
1556             # Protect from infinite recursion due to recursively nested playlists
1557             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1558             webpage_url = ie_result['webpage_url']
1559             if webpage_url in self._playlist_urls:
1560                 self.to_screen(
1561                     '[download] Skipping already downloaded playlist: %s'
1562                     % ie_result.get('title') or ie_result.get('id'))
1563                 return
1564
1565             self._playlist_level += 1
1566             self._playlist_urls.add(webpage_url)
1567             self._sanitize_thumbnails(ie_result)
1568             try:
1569                 return self.__process_playlist(ie_result, download)
1570             finally:
1571                 self._playlist_level -= 1
1572                 if not self._playlist_level:
1573                     self._playlist_urls.clear()
1574         elif result_type == 'compat_list':
1575             self.report_warning(
1576                 'Extractor %s returned a compat_list result. '
1577                 'It needs to be updated.' % ie_result.get('extractor'))
1578
1579             def _fixup(r):
1580                 self.add_extra_info(r, {
1581                     'extractor': ie_result['extractor'],
1582                     'webpage_url': ie_result['webpage_url'],
1583                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1584                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1585                     'extractor_key': ie_result['extractor_key'],
1586                 })
1587                 return r
1588             ie_result['entries'] = [
1589                 self.process_ie_result(_fixup(r), download, extra_info)
1590                 for r in ie_result['entries']
1591             ]
1592             return ie_result
1593         else:
1594             raise Exception('Invalid result type: %s' % result_type)
1595
1596     def _ensure_dir_exists(self, path):
1597         return make_dir(path, self.report_error)
1598
1599     def __process_playlist(self, ie_result, download):
1600         # We process each entry in the playlist
1601         playlist = ie_result.get('title') or ie_result.get('id')
1602         self.to_screen('[download] Downloading playlist: %s' % playlist)
1603
1604         if 'entries' not in ie_result:
1605             raise EntryNotInPlaylist('There are no entries')
1606
1607         MissingEntry = object()
1608         incomplete_entries = bool(ie_result.get('requested_entries'))
1609         if incomplete_entries:
1610             def fill_missing_entries(entries, indices):
1611                 ret = [MissingEntry] * max(indices)
1612                 for i, entry in zip(indices, entries):
1613                     ret[i - 1] = entry
1614                 return ret
1615             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1616
1617         playlist_results = []
1618
1619         playliststart = self.params.get('playliststart', 1)
1620         playlistend = self.params.get('playlistend')
1621         # For backwards compatibility, interpret -1 as whole list
1622         if playlistend == -1:
1623             playlistend = None
1624
1625         playlistitems_str = self.params.get('playlist_items')
1626         playlistitems = None
1627         if playlistitems_str is not None:
1628             def iter_playlistitems(format):
1629                 for string_segment in format.split(','):
1630                     if '-' in string_segment:
1631                         start, end = string_segment.split('-')
1632                         for item in range(int(start), int(end) + 1):
1633                             yield int(item)
1634                     else:
1635                         yield int(string_segment)
1636             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1637
1638         ie_entries = ie_result['entries']
1639         if isinstance(ie_entries, list):
1640             playlist_count = len(ie_entries)
1641             msg = f'Collected {playlist_count} videos; downloading %d of them'
1642             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1643
1644             def get_entry(i):
1645                 return ie_entries[i - 1]
1646         else:
1647             msg = 'Downloading %d videos'
1648             if not isinstance(ie_entries, (PagedList, LazyList)):
1649                 ie_entries = LazyList(ie_entries)
1650
1651             def get_entry(i):
1652                 return YoutubeDL.__handle_extraction_exceptions(
1653                     lambda self, i: ie_entries[i - 1]
1654                 )(self, i)
1655
1656         entries, broken = [], False
1657         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1658         for i in items:
1659             if i == 0:
1660                 continue
1661             if playlistitems is None and playlistend is not None and playlistend < i:
1662                 break
1663             entry = None
1664             try:
1665                 entry = get_entry(i)
1666                 if entry is MissingEntry:
1667                     raise EntryNotInPlaylist()
1668             except (IndexError, EntryNotInPlaylist):
1669                 if incomplete_entries:
1670                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1671                 elif not playlistitems:
1672                     break
1673             entries.append(entry)
1674             try:
1675                 if entry is not None:
1676                     self._match_entry(entry, incomplete=True, silent=True)
1677             except (ExistingVideoReached, RejectedVideoReached):
1678                 broken = True
1679                 break
1680         ie_result['entries'] = entries
1681
1682         # Save playlist_index before re-ordering
1683         entries = [
1684             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1685             for i, entry in enumerate(entries, 1)
1686             if entry is not None]
1687         n_entries = len(entries)
1688
1689         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1690             ie_result['playlist_count'] = n_entries
1691
1692         if not playlistitems and (playliststart != 1 or playlistend):
1693             playlistitems = list(range(playliststart, playliststart + n_entries))
1694         ie_result['requested_entries'] = playlistitems
1695
1696         _infojson_written = False
1697         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1698             ie_copy = {
1699                 'playlist': playlist,
1700                 'playlist_id': ie_result.get('id'),
1701                 'playlist_title': ie_result.get('title'),
1702                 'playlist_uploader': ie_result.get('uploader'),
1703                 'playlist_uploader_id': ie_result.get('uploader_id'),
1704                 'playlist_index': 0,
1705                 'n_entries': n_entries,
1706             }
1707             ie_copy.update(dict(ie_result))
1708
1709             _infojson_written = self._write_info_json(
1710                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1711             if _infojson_written is None:
1712                 return
1713             if self._write_description('playlist', ie_result,
1714                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1715                 return
1716             # TODO: This should be passed to ThumbnailsConvertor if necessary
1717             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1718
1719         if self.params.get('playlistreverse', False):
1720             entries = entries[::-1]
1721         if self.params.get('playlistrandom', False):
1722             random.shuffle(entries)
1723
1724         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1725
1726         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1727         failures = 0
1728         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1729         for i, entry_tuple in enumerate(entries, 1):
1730             playlist_index, entry = entry_tuple
1731             if 'playlist-index' in self.params.get('compat_opts', []):
1732                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1733             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1734             # This __x_forwarded_for_ip thing is a bit ugly but requires
1735             # minimal changes
1736             if x_forwarded_for:
1737                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1738             extra = {
1739                 'n_entries': n_entries,
1740                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1741                 'playlist_count': ie_result.get('playlist_count'),
1742                 'playlist_index': playlist_index,
1743                 'playlist_autonumber': i,
1744                 'playlist': playlist,
1745                 'playlist_id': ie_result.get('id'),
1746                 'playlist_title': ie_result.get('title'),
1747                 'playlist_uploader': ie_result.get('uploader'),
1748                 'playlist_uploader_id': ie_result.get('uploader_id'),
1749                 'extractor': ie_result['extractor'],
1750                 'webpage_url': ie_result['webpage_url'],
1751                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1752                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1753                 'extractor_key': ie_result['extractor_key'],
1754             }
1755
1756             if self._match_entry(entry, incomplete=True) is not None:
1757                 continue
1758
1759             entry_result = self.__process_iterable_entry(entry, download, extra)
1760             if not entry_result:
1761                 failures += 1
1762             if failures >= max_failures:
1763                 self.report_error(
1764                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1765                 break
1766             playlist_results.append(entry_result)
1767         ie_result['entries'] = playlist_results
1768
1769         # Write the updated info to json
1770         if _infojson_written and self._write_info_json(
1771                 'updated playlist', ie_result,
1772                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1773             return
1774
1775         ie_result = self.run_all_pps('playlist', ie_result)
1776         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1777         return ie_result
1778
1779     @__handle_extraction_exceptions
1780     def __process_iterable_entry(self, entry, download, extra_info):
1781         return self.process_ie_result(
1782             entry, download=download, extra_info=extra_info)
1783
1784     def _build_format_filter(self, filter_spec):
1785         " Returns a function to filter the formats according to the filter_spec "
1786
1787         OPERATORS = {
1788             '<': operator.lt,
1789             '<=': operator.le,
1790             '>': operator.gt,
1791             '>=': operator.ge,
1792             '=': operator.eq,
1793             '!=': operator.ne,
1794         }
1795         operator_rex = re.compile(r'''(?x)\s*
1796             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1797             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1798             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1799             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1800         m = operator_rex.fullmatch(filter_spec)
1801         if m:
1802             try:
1803                 comparison_value = int(m.group('value'))
1804             except ValueError:
1805                 comparison_value = parse_filesize(m.group('value'))
1806                 if comparison_value is None:
1807                     comparison_value = parse_filesize(m.group('value') + 'B')
1808                 if comparison_value is None:
1809                     raise ValueError(
1810                         'Invalid value %r in format specification %r' % (
1811                             m.group('value'), filter_spec))
1812             op = OPERATORS[m.group('op')]
1813
1814         if not m:
1815             STR_OPERATORS = {
1816                 '=': operator.eq,
1817                 '^=': lambda attr, value: attr.startswith(value),
1818                 '$=': lambda attr, value: attr.endswith(value),
1819                 '*=': lambda attr, value: value in attr,
1820             }
1821             str_operator_rex = re.compile(r'''(?x)\s*
1822                 (?P<key>[a-zA-Z0-9._-]+)\s*
1823                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1824                 (?P<value>[a-zA-Z0-9._-]+)\s*
1825                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1826             m = str_operator_rex.fullmatch(filter_spec)
1827             if m:
1828                 comparison_value = m.group('value')
1829                 str_op = STR_OPERATORS[m.group('op')]
1830                 if m.group('negation'):
1831                     op = lambda attr, value: not str_op(attr, value)
1832                 else:
1833                     op = str_op
1834
1835         if not m:
1836             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1837
1838         def _filter(f):
1839             actual_value = f.get(m.group('key'))
1840             if actual_value is None:
1841                 return m.group('none_inclusive')
1842             return op(actual_value, comparison_value)
1843         return _filter
1844
1845     def _check_formats(self, formats):
1846         for f in formats:
1847             self.to_screen('[info] Testing format %s' % f['format_id'])
1848             path = self.get_output_path('temp')
1849             if not self._ensure_dir_exists(f'{path}/'):
1850                 continue
1851             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1852             temp_file.close()
1853             try:
1854                 success, _ = self.dl(temp_file.name, f, test=True)
1855             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1856                 success = False
1857             finally:
1858                 if os.path.exists(temp_file.name):
1859                     try:
1860                         os.remove(temp_file.name)
1861                     except OSError:
1862                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1863             if success:
1864                 yield f
1865             else:
1866                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1867
1868     def _default_format_spec(self, info_dict, download=True):
1869
1870         def can_merge():
1871             merger = FFmpegMergerPP(self)
1872             return merger.available and merger.can_merge()
1873
1874         prefer_best = (
1875             not self.params.get('simulate')
1876             and download
1877             and (
1878                 not can_merge()
1879                 or info_dict.get('is_live', False)
1880                 or self.outtmpl_dict['default'] == '-'))
1881         compat = (
1882             prefer_best
1883             or self.params.get('allow_multiple_audio_streams', False)
1884             or 'format-spec' in self.params.get('compat_opts', []))
1885
1886         return (
1887             'best/bestvideo+bestaudio' if prefer_best
1888             else 'bestvideo*+bestaudio/best' if not compat
1889             else 'bestvideo+bestaudio/best')
1890
1891     def build_format_selector(self, format_spec):
1892         def syntax_error(note, start):
1893             message = (
1894                 'Invalid format specification: '
1895                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1896             return SyntaxError(message)
1897
1898         PICKFIRST = 'PICKFIRST'
1899         MERGE = 'MERGE'
1900         SINGLE = 'SINGLE'
1901         GROUP = 'GROUP'
1902         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1903
1904         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1905                                   'video': self.params.get('allow_multiple_video_streams', False)}
1906
1907         check_formats = self.params.get('check_formats') == 'selected'
1908
1909         def _parse_filter(tokens):
1910             filter_parts = []
1911             for type, string, start, _, _ in tokens:
1912                 if type == tokenize.OP and string == ']':
1913                     return ''.join(filter_parts)
1914                 else:
1915                     filter_parts.append(string)
1916
1917         def _remove_unused_ops(tokens):
1918             # Remove operators that we don't use and join them with the surrounding strings
1919             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1920             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1921             last_string, last_start, last_end, last_line = None, None, None, None
1922             for type, string, start, end, line in tokens:
1923                 if type == tokenize.OP and string == '[':
1924                     if last_string:
1925                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1926                         last_string = None
1927                     yield type, string, start, end, line
1928                     # everything inside brackets will be handled by _parse_filter
1929                     for type, string, start, end, line in tokens:
1930                         yield type, string, start, end, line
1931                         if type == tokenize.OP and string == ']':
1932                             break
1933                 elif type == tokenize.OP and string in ALLOWED_OPS:
1934                     if last_string:
1935                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1936                         last_string = None
1937                     yield type, string, start, end, line
1938                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1939                     if not last_string:
1940                         last_string = string
1941                         last_start = start
1942                         last_end = end
1943                     else:
1944                         last_string += string
1945             if last_string:
1946                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1947
1948         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1949             selectors = []
1950             current_selector = None
1951             for type, string, start, _, _ in tokens:
1952                 # ENCODING is only defined in python 3.x
1953                 if type == getattr(tokenize, 'ENCODING', None):
1954                     continue
1955                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1956                     current_selector = FormatSelector(SINGLE, string, [])
1957                 elif type == tokenize.OP:
1958                     if string == ')':
1959                         if not inside_group:
1960                             # ')' will be handled by the parentheses group
1961                             tokens.restore_last_token()
1962                         break
1963                     elif inside_merge and string in ['/', ',']:
1964                         tokens.restore_last_token()
1965                         break
1966                     elif inside_choice and string == ',':
1967                         tokens.restore_last_token()
1968                         break
1969                     elif string == ',':
1970                         if not current_selector:
1971                             raise syntax_error('"," must follow a format selector', start)
1972                         selectors.append(current_selector)
1973                         current_selector = None
1974                     elif string == '/':
1975                         if not current_selector:
1976                             raise syntax_error('"/" must follow a format selector', start)
1977                         first_choice = current_selector
1978                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1979                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1980                     elif string == '[':
1981                         if not current_selector:
1982                             current_selector = FormatSelector(SINGLE, 'best', [])
1983                         format_filter = _parse_filter(tokens)
1984                         current_selector.filters.append(format_filter)
1985                     elif string == '(':
1986                         if current_selector:
1987                             raise syntax_error('Unexpected "("', start)
1988                         group = _parse_format_selection(tokens, inside_group=True)
1989                         current_selector = FormatSelector(GROUP, group, [])
1990                     elif string == '+':
1991                         if not current_selector:
1992                             raise syntax_error('Unexpected "+"', start)
1993                         selector_1 = current_selector
1994                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1995                         if not selector_2:
1996                             raise syntax_error('Expected a selector', start)
1997                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1998                     else:
1999                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2000                 elif type == tokenize.ENDMARKER:
2001                     break
2002             if current_selector:
2003                 selectors.append(current_selector)
2004             return selectors
2005
2006         def _merge(formats_pair):
2007             format_1, format_2 = formats_pair
2008
2009             formats_info = []
2010             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2011             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2012
2013             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2014                 get_no_more = {'video': False, 'audio': False}
2015                 for (i, fmt_info) in enumerate(formats_info):
2016                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2017                         formats_info.pop(i)
2018                         continue
2019                     for aud_vid in ['audio', 'video']:
2020                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2021                             if get_no_more[aud_vid]:
2022                                 formats_info.pop(i)
2023                                 break
2024                             get_no_more[aud_vid] = True
2025
2026             if len(formats_info) == 1:
2027                 return formats_info[0]
2028
2029             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2030             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2031
2032             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2033             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2034
2035             output_ext = self.params.get('merge_output_format')
2036             if not output_ext:
2037                 if the_only_video:
2038                     output_ext = the_only_video['ext']
2039                 elif the_only_audio and not video_fmts:
2040                     output_ext = the_only_audio['ext']
2041                 else:
2042                     output_ext = 'mkv'
2043
2044             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2045
2046             new_dict = {
2047                 'requested_formats': formats_info,
2048                 'format': '+'.join(filtered('format')),
2049                 'format_id': '+'.join(filtered('format_id')),
2050                 'ext': output_ext,
2051                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2052                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2053                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2054                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2055                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2056             }
2057
2058             if the_only_video:
2059                 new_dict.update({
2060                     'width': the_only_video.get('width'),
2061                     'height': the_only_video.get('height'),
2062                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2063                     'fps': the_only_video.get('fps'),
2064                     'dynamic_range': the_only_video.get('dynamic_range'),
2065                     'vcodec': the_only_video.get('vcodec'),
2066                     'vbr': the_only_video.get('vbr'),
2067                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2068                 })
2069
2070             if the_only_audio:
2071                 new_dict.update({
2072                     'acodec': the_only_audio.get('acodec'),
2073                     'abr': the_only_audio.get('abr'),
2074                     'asr': the_only_audio.get('asr'),
2075                 })
2076
2077             return new_dict
2078
2079         def _check_formats(formats):
2080             if not check_formats:
2081                 yield from formats
2082                 return
2083             yield from self._check_formats(formats)
2084
2085         def _build_selector_function(selector):
2086             if isinstance(selector, list):  # ,
2087                 fs = [_build_selector_function(s) for s in selector]
2088
2089                 def selector_function(ctx):
2090                     for f in fs:
2091                         yield from f(ctx)
2092                 return selector_function
2093
2094             elif selector.type == GROUP:  # ()
2095                 selector_function = _build_selector_function(selector.selector)
2096
2097             elif selector.type == PICKFIRST:  # /
2098                 fs = [_build_selector_function(s) for s in selector.selector]
2099
2100                 def selector_function(ctx):
2101                     for f in fs:
2102                         picked_formats = list(f(ctx))
2103                         if picked_formats:
2104                             return picked_formats
2105                     return []
2106
2107             elif selector.type == MERGE:  # +
2108                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2109
2110                 def selector_function(ctx):
2111                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2112                         yield _merge(pair)
2113
2114             elif selector.type == SINGLE:  # atom
2115                 format_spec = selector.selector or 'best'
2116
2117                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2118                 if format_spec == 'all':
2119                     def selector_function(ctx):
2120                         yield from _check_formats(ctx['formats'][::-1])
2121                 elif format_spec == 'mergeall':
2122                     def selector_function(ctx):
2123                         formats = list(_check_formats(ctx['formats']))
2124                         if not formats:
2125                             return
2126                         merged_format = formats[-1]
2127                         for f in formats[-2::-1]:
2128                             merged_format = _merge((merged_format, f))
2129                         yield merged_format
2130
2131                 else:
2132                     format_fallback, format_reverse, format_idx = False, True, 1
2133                     mobj = re.match(
2134                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2135                         format_spec)
2136                     if mobj is not None:
2137                         format_idx = int_or_none(mobj.group('n'), default=1)
2138                         format_reverse = mobj.group('bw')[0] == 'b'
2139                         format_type = (mobj.group('type') or [None])[0]
2140                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2141                         format_modified = mobj.group('mod') is not None
2142
2143                         format_fallback = not format_type and not format_modified  # for b, w
2144                         _filter_f = (
2145                             (lambda f: f.get('%scodec' % format_type) != 'none')
2146                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2147                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2148                             if format_type  # bv, ba, wv, wa
2149                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2150                             if not format_modified  # b, w
2151                             else lambda f: True)  # b*, w*
2152                         filter_f = lambda f: _filter_f(f) and (
2153                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2154                     else:
2155                         if format_spec in self._format_selection_exts['audio']:
2156                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2157                         elif format_spec in self._format_selection_exts['video']:
2158                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2159                         elif format_spec in self._format_selection_exts['storyboards']:
2160                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2161                         else:
2162                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2163
2164                     def selector_function(ctx):
2165                         formats = list(ctx['formats'])
2166                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2167                         if format_fallback and ctx['incomplete_formats'] and not matches:
2168                             # for extractors with incomplete formats (audio only (soundcloud)
2169                             # or video only (imgur)) best/worst will fallback to
2170                             # best/worst {video,audio}-only format
2171                             matches = formats
2172                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2173                         try:
2174                             yield matches[format_idx - 1]
2175                         except IndexError:
2176                             return
2177
2178             filters = [self._build_format_filter(f) for f in selector.filters]
2179
2180             def final_selector(ctx):
2181                 ctx_copy = dict(ctx)
2182                 for _filter in filters:
2183                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2184                 return selector_function(ctx_copy)
2185             return final_selector
2186
2187         stream = io.BytesIO(format_spec.encode('utf-8'))
2188         try:
2189             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2190         except tokenize.TokenError:
2191             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2192
2193         class TokenIterator(object):
2194             def __init__(self, tokens):
2195                 self.tokens = tokens
2196                 self.counter = 0
2197
2198             def __iter__(self):
2199                 return self
2200
2201             def __next__(self):
2202                 if self.counter >= len(self.tokens):
2203                     raise StopIteration()
2204                 value = self.tokens[self.counter]
2205                 self.counter += 1
2206                 return value
2207
2208             next = __next__
2209
2210             def restore_last_token(self):
2211                 self.counter -= 1
2212
2213         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2214         return _build_selector_function(parsed_selector)
2215
2216     def _calc_headers(self, info_dict):
2217         res = std_headers.copy()
2218
2219         add_headers = info_dict.get('http_headers')
2220         if add_headers:
2221             res.update(add_headers)
2222
2223         cookies = self._calc_cookies(info_dict)
2224         if cookies:
2225             res['Cookie'] = cookies
2226
2227         if 'X-Forwarded-For' not in res:
2228             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2229             if x_forwarded_for_ip:
2230                 res['X-Forwarded-For'] = x_forwarded_for_ip
2231
2232         return res
2233
2234     def _calc_cookies(self, info_dict):
2235         pr = sanitized_Request(info_dict['url'])
2236         self.cookiejar.add_cookie_header(pr)
2237         return pr.get_header('Cookie')
2238
2239     def _sort_thumbnails(self, thumbnails):
2240         thumbnails.sort(key=lambda t: (
2241             t.get('preference') if t.get('preference') is not None else -1,
2242             t.get('width') if t.get('width') is not None else -1,
2243             t.get('height') if t.get('height') is not None else -1,
2244             t.get('id') if t.get('id') is not None else '',
2245             t.get('url')))
2246
2247     def _sanitize_thumbnails(self, info_dict):
2248         thumbnails = info_dict.get('thumbnails')
2249         if thumbnails is None:
2250             thumbnail = info_dict.get('thumbnail')
2251             if thumbnail:
2252                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2253         if not thumbnails:
2254             return
2255
2256         def check_thumbnails(thumbnails):
2257             for t in thumbnails:
2258                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2259                 try:
2260                     self.urlopen(HEADRequest(t['url']))
2261                 except network_exceptions as err:
2262                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2263                     continue
2264                 yield t
2265
2266         self._sort_thumbnails(thumbnails)
2267         for i, t in enumerate(thumbnails):
2268             if t.get('id') is None:
2269                 t['id'] = '%d' % i
2270             if t.get('width') and t.get('height'):
2271                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2272             t['url'] = sanitize_url(t['url'])
2273
2274         if self.params.get('check_formats') is True:
2275             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2276         else:
2277             info_dict['thumbnails'] = thumbnails
2278
2279     def process_video_result(self, info_dict, download=True):
2280         assert info_dict.get('_type', 'video') == 'video'
2281         self._num_videos += 1
2282
2283         if 'id' not in info_dict:
2284             raise ExtractorError('Missing "id" field in extractor result')
2285         if 'title' not in info_dict:
2286             raise ExtractorError('Missing "title" field in extractor result',
2287                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2288
2289         def report_force_conversion(field, field_not, conversion):
2290             self.report_warning(
2291                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2292                 % (field, field_not, conversion))
2293
2294         def sanitize_string_field(info, string_field):
2295             field = info.get(string_field)
2296             if field is None or isinstance(field, compat_str):
2297                 return
2298             report_force_conversion(string_field, 'a string', 'string')
2299             info[string_field] = compat_str(field)
2300
2301         def sanitize_numeric_fields(info):
2302             for numeric_field in self._NUMERIC_FIELDS:
2303                 field = info.get(numeric_field)
2304                 if field is None or isinstance(field, compat_numeric_types):
2305                     continue
2306                 report_force_conversion(numeric_field, 'numeric', 'int')
2307                 info[numeric_field] = int_or_none(field)
2308
2309         sanitize_string_field(info_dict, 'id')
2310         sanitize_numeric_fields(info_dict)
2311
2312         if 'playlist' not in info_dict:
2313             # It isn't part of a playlist
2314             info_dict['playlist'] = None
2315             info_dict['playlist_index'] = None
2316
2317         self._sanitize_thumbnails(info_dict)
2318
2319         thumbnail = info_dict.get('thumbnail')
2320         thumbnails = info_dict.get('thumbnails')
2321         if thumbnail:
2322             info_dict['thumbnail'] = sanitize_url(thumbnail)
2323         elif thumbnails:
2324             info_dict['thumbnail'] = thumbnails[-1]['url']
2325
2326         if info_dict.get('display_id') is None and 'id' in info_dict:
2327             info_dict['display_id'] = info_dict['id']
2328
2329         if info_dict.get('duration') is not None:
2330             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2331
2332         for ts_key, date_key in (
2333                 ('timestamp', 'upload_date'),
2334                 ('release_timestamp', 'release_date'),
2335                 ('modified_timestamp', 'modified_date'),
2336         ):
2337             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2338                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2339                 # see http://bugs.python.org/issue1646728)
2340                 try:
2341                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2342                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2343                 except (ValueError, OverflowError, OSError):
2344                     pass
2345
2346         live_keys = ('is_live', 'was_live')
2347         live_status = info_dict.get('live_status')
2348         if live_status is None:
2349             for key in live_keys:
2350                 if info_dict.get(key) is False:
2351                     continue
2352                 if info_dict.get(key):
2353                     live_status = key
2354                 break
2355             if all(info_dict.get(key) is False for key in live_keys):
2356                 live_status = 'not_live'
2357         if live_status:
2358             info_dict['live_status'] = live_status
2359             for key in live_keys:
2360                 if info_dict.get(key) is None:
2361                     info_dict[key] = (live_status == key)
2362
2363         # Auto generate title fields corresponding to the *_number fields when missing
2364         # in order to always have clean titles. This is very common for TV series.
2365         for field in ('chapter', 'season', 'episode'):
2366             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2367                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2368
2369         for cc_kind in ('subtitles', 'automatic_captions'):
2370             cc = info_dict.get(cc_kind)
2371             if cc:
2372                 for _, subtitle in cc.items():
2373                     for subtitle_format in subtitle:
2374                         if subtitle_format.get('url'):
2375                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2376                         if subtitle_format.get('ext') is None:
2377                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2378
2379         automatic_captions = info_dict.get('automatic_captions')
2380         subtitles = info_dict.get('subtitles')
2381
2382         info_dict['requested_subtitles'] = self.process_subtitles(
2383             info_dict['id'], subtitles, automatic_captions)
2384
2385         if info_dict.get('formats') is None:
2386             # There's only one format available
2387             formats = [info_dict]
2388         else:
2389             formats = info_dict['formats']
2390
2391         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2392         if not self.params.get('allow_unplayable_formats'):
2393             formats = [f for f in formats if not f.get('has_drm')]
2394
2395         # backward compatibility
2396         info_dict['fulltitle'] = info_dict['title']
2397
2398         if info_dict.get('is_live'):
2399             get_from_start = bool(self.params.get('live_from_start'))
2400             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2401             if not get_from_start:
2402                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2403
2404         if not formats:
2405             self.raise_no_formats(info_dict)
2406
2407         def is_wellformed(f):
2408             url = f.get('url')
2409             if not url:
2410                 self.report_warning(
2411                     '"url" field is missing or empty - skipping format, '
2412                     'there is an error in extractor')
2413                 return False
2414             if isinstance(url, bytes):
2415                 sanitize_string_field(f, 'url')
2416             return True
2417
2418         # Filter out malformed formats for better extraction robustness
2419         formats = list(filter(is_wellformed, formats))
2420
2421         formats_dict = {}
2422
2423         # We check that all the formats have the format and format_id fields
2424         for i, format in enumerate(formats):
2425             sanitize_string_field(format, 'format_id')
2426             sanitize_numeric_fields(format)
2427             format['url'] = sanitize_url(format['url'])
2428             if not format.get('format_id'):
2429                 format['format_id'] = compat_str(i)
2430             else:
2431                 # Sanitize format_id from characters used in format selector expression
2432                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2433             format_id = format['format_id']
2434             if format_id not in formats_dict:
2435                 formats_dict[format_id] = []
2436             formats_dict[format_id].append(format)
2437
2438         # Make sure all formats have unique format_id
2439         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2440         for format_id, ambiguous_formats in formats_dict.items():
2441             ambigious_id = len(ambiguous_formats) > 1
2442             for i, format in enumerate(ambiguous_formats):
2443                 if ambigious_id:
2444                     format['format_id'] = '%s-%d' % (format_id, i)
2445                 if format.get('ext') is None:
2446                     format['ext'] = determine_ext(format['url']).lower()
2447                 # Ensure there is no conflict between id and ext in format selection
2448                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2449                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2450                     format['format_id'] = 'f%s' % format['format_id']
2451
2452         for i, format in enumerate(formats):
2453             if format.get('format') is None:
2454                 format['format'] = '{id} - {res}{note}'.format(
2455                     id=format['format_id'],
2456                     res=self.format_resolution(format),
2457                     note=format_field(format, 'format_note', ' (%s)'),
2458                 )
2459             if format.get('protocol') is None:
2460                 format['protocol'] = determine_protocol(format)
2461             if format.get('resolution') is None:
2462                 format['resolution'] = self.format_resolution(format, default=None)
2463             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2464                 format['dynamic_range'] = 'SDR'
2465             if (info_dict.get('duration') and format.get('tbr')
2466                     and not format.get('filesize') and not format.get('filesize_approx')):
2467                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2468
2469             # Add HTTP headers, so that external programs can use them from the
2470             # json output
2471             full_format_info = info_dict.copy()
2472             full_format_info.update(format)
2473             format['http_headers'] = self._calc_headers(full_format_info)
2474         # Remove private housekeeping stuff
2475         if '__x_forwarded_for_ip' in info_dict:
2476             del info_dict['__x_forwarded_for_ip']
2477
2478         # TODO Central sorting goes here
2479
2480         if self.params.get('check_formats') is True:
2481             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2482
2483         if not formats or formats[0] is not info_dict:
2484             # only set the 'formats' fields if the original info_dict list them
2485             # otherwise we end up with a circular reference, the first (and unique)
2486             # element in the 'formats' field in info_dict is info_dict itself,
2487             # which can't be exported to json
2488             info_dict['formats'] = formats
2489
2490         info_dict, _ = self.pre_process(info_dict)
2491
2492         # The pre-processors may have modified the formats
2493         formats = info_dict.get('formats', [info_dict])
2494
2495         list_only = self.params.get('simulate') is None and (
2496             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2497         interactive_format_selection = not list_only and self.format_selector == '-'
2498         if self.params.get('list_thumbnails'):
2499             self.list_thumbnails(info_dict)
2500         if self.params.get('listsubtitles'):
2501             if 'automatic_captions' in info_dict:
2502                 self.list_subtitles(
2503                     info_dict['id'], automatic_captions, 'automatic captions')
2504             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2505         if self.params.get('listformats') or interactive_format_selection:
2506             self.list_formats(info_dict)
2507         if list_only:
2508             # Without this printing, -F --print-json will not work
2509             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2510             return
2511
2512         format_selector = self.format_selector
2513         if format_selector is None:
2514             req_format = self._default_format_spec(info_dict, download=download)
2515             self.write_debug('Default format spec: %s' % req_format)
2516             format_selector = self.build_format_selector(req_format)
2517
2518         while True:
2519             if interactive_format_selection:
2520                 req_format = input(
2521                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2522                 try:
2523                     format_selector = self.build_format_selector(req_format)
2524                 except SyntaxError as err:
2525                     self.report_error(err, tb=False, is_error=False)
2526                     continue
2527
2528             # While in format selection we may need to have an access to the original
2529             # format set in order to calculate some metrics or do some processing.
2530             # For now we need to be able to guess whether original formats provided
2531             # by extractor are incomplete or not (i.e. whether extractor provides only
2532             # video-only or audio-only formats) for proper formats selection for
2533             # extractors with such incomplete formats (see
2534             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2535             # Since formats may be filtered during format selection and may not match
2536             # the original formats the results may be incorrect. Thus original formats
2537             # or pre-calculated metrics should be passed to format selection routines
2538             # as well.
2539             # We will pass a context object containing all necessary additional data
2540             # instead of just formats.
2541             # This fixes incorrect format selection issue (see
2542             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2543             incomplete_formats = (
2544                 # All formats are video-only or
2545                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2546                 # all formats are audio-only
2547                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2548
2549             ctx = {
2550                 'formats': formats,
2551                 'incomplete_formats': incomplete_formats,
2552             }
2553
2554             formats_to_download = list(format_selector(ctx))
2555             if interactive_format_selection and not formats_to_download:
2556                 self.report_error('Requested format is not available', tb=False, is_error=False)
2557                 continue
2558             break
2559
2560         if not formats_to_download:
2561             if not self.params.get('ignore_no_formats_error'):
2562                 raise ExtractorError('Requested format is not available', expected=True,
2563                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2564             self.report_warning('Requested format is not available')
2565             # Process what we can, even without any available formats.
2566             formats_to_download = [{}]
2567
2568         best_format = formats_to_download[-1]
2569         if download:
2570             if best_format:
2571                 self.to_screen(
2572                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2573                     + ', '.join([f['format_id'] for f in formats_to_download]))
2574             max_downloads_reached = False
2575             for i, fmt in enumerate(formats_to_download):
2576                 formats_to_download[i] = new_info = dict(info_dict)
2577                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2578                 new_info.update(fmt)
2579                 new_info['__original_infodict'] = info_dict
2580                 try:
2581                     self.process_info(new_info)
2582                 except MaxDownloadsReached:
2583                     max_downloads_reached = True
2584                 new_info.pop('__original_infodict')
2585                 # Remove copied info
2586                 for key, val in tuple(new_info.items()):
2587                     if info_dict.get(key) == val:
2588                         new_info.pop(key)
2589                 if max_downloads_reached:
2590                     break
2591
2592             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2593             assert write_archive.issubset({True, False, 'ignore'})
2594             if True in write_archive and False not in write_archive:
2595                 self.record_download_archive(info_dict)
2596
2597             info_dict['requested_downloads'] = formats_to_download
2598             info_dict = self.run_all_pps('after_video', info_dict)
2599             if max_downloads_reached:
2600                 raise MaxDownloadsReached()
2601
2602         # We update the info dict with the selected best quality format (backwards compatibility)
2603         info_dict.update(best_format)
2604         return info_dict
2605
2606     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2607         """Select the requested subtitles and their format"""
2608         available_subs = {}
2609         if normal_subtitles and self.params.get('writesubtitles'):
2610             available_subs.update(normal_subtitles)
2611         if automatic_captions and self.params.get('writeautomaticsub'):
2612             for lang, cap_info in automatic_captions.items():
2613                 if lang not in available_subs:
2614                     available_subs[lang] = cap_info
2615
2616         if (not self.params.get('writesubtitles') and not
2617                 self.params.get('writeautomaticsub') or not
2618                 available_subs):
2619             return None
2620
2621         all_sub_langs = available_subs.keys()
2622         if self.params.get('allsubtitles', False):
2623             requested_langs = all_sub_langs
2624         elif self.params.get('subtitleslangs', False):
2625             # A list is used so that the order of languages will be the same as
2626             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2627             requested_langs = []
2628             for lang_re in self.params.get('subtitleslangs'):
2629                 if lang_re == 'all':
2630                     requested_langs.extend(all_sub_langs)
2631                     continue
2632                 discard = lang_re[0] == '-'
2633                 if discard:
2634                     lang_re = lang_re[1:]
2635                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2636                 if discard:
2637                     for lang in current_langs:
2638                         while lang in requested_langs:
2639                             requested_langs.remove(lang)
2640                 else:
2641                     requested_langs.extend(current_langs)
2642             requested_langs = orderedSet(requested_langs)
2643         elif 'en' in available_subs:
2644             requested_langs = ['en']
2645         else:
2646             requested_langs = [list(all_sub_langs)[0]]
2647         if requested_langs:
2648             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2649
2650         formats_query = self.params.get('subtitlesformat', 'best')
2651         formats_preference = formats_query.split('/') if formats_query else []
2652         subs = {}
2653         for lang in requested_langs:
2654             formats = available_subs.get(lang)
2655             if formats is None:
2656                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2657                 continue
2658             for ext in formats_preference:
2659                 if ext == 'best':
2660                     f = formats[-1]
2661                     break
2662                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2663                 if matches:
2664                     f = matches[-1]
2665                     break
2666             else:
2667                 f = formats[-1]
2668                 self.report_warning(
2669                     'No subtitle format found matching "%s" for language %s, '
2670                     'using %s' % (formats_query, lang, f['ext']))
2671             subs[lang] = f
2672         return subs
2673
2674     def _forceprint(self, tmpl, info_dict):
2675         mobj = re.match(r'\w+(=?)$', tmpl)
2676         if mobj and mobj.group(1):
2677             tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2678         elif mobj:
2679             tmpl = '%({})s'.format(tmpl)
2680
2681         info_dict = info_dict.copy()
2682         info_dict['formats_table'] = self.render_formats_table(info_dict)
2683         info_dict['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2684         info_dict['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2685         info_dict['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2686         self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2687
2688     def __forced_printings(self, info_dict, filename, incomplete):
2689         def print_mandatory(field, actual_field=None):
2690             if actual_field is None:
2691                 actual_field = field
2692             if (self.params.get('force%s' % field, False)
2693                     and (not incomplete or info_dict.get(actual_field) is not None)):
2694                 self.to_stdout(info_dict[actual_field])
2695
2696         def print_optional(field):
2697             if (self.params.get('force%s' % field, False)
2698                     and info_dict.get(field) is not None):
2699                 self.to_stdout(info_dict[field])
2700
2701         info_dict = info_dict.copy()
2702         if filename is not None:
2703             info_dict['filename'] = filename
2704         if info_dict.get('requested_formats') is not None:
2705             # For RTMP URLs, also include the playpath
2706             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2707         elif 'url' in info_dict:
2708             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2709
2710         if self.params['forceprint'].get('video') or self.params.get('forcejson'):
2711             self.post_extract(info_dict)
2712         for tmpl in self.params['forceprint'].get('video', []):
2713             self._forceprint(tmpl, info_dict)
2714
2715         print_mandatory('title')
2716         print_mandatory('id')
2717         print_mandatory('url', 'urls')
2718         print_optional('thumbnail')
2719         print_optional('description')
2720         print_optional('filename')
2721         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2722             self.to_stdout(formatSeconds(info_dict['duration']))
2723         print_mandatory('format')
2724
2725         if self.params.get('forcejson'):
2726             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2727
2728     def dl(self, name, info, subtitle=False, test=False):
2729         if not info.get('url'):
2730             self.raise_no_formats(info, True)
2731
2732         if test:
2733             verbose = self.params.get('verbose')
2734             params = {
2735                 'test': True,
2736                 'quiet': self.params.get('quiet') or not verbose,
2737                 'verbose': verbose,
2738                 'noprogress': not verbose,
2739                 'nopart': True,
2740                 'skip_unavailable_fragments': False,
2741                 'keep_fragments': False,
2742                 'overwrites': True,
2743                 '_no_ytdl_file': True,
2744             }
2745         else:
2746             params = self.params
2747         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2748         if not test:
2749             for ph in self._progress_hooks:
2750                 fd.add_progress_hook(ph)
2751             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2752             self.write_debug('Invoking downloader on "%s"' % urls)
2753
2754         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2755         # But it may contain objects that are not deep-copyable
2756         new_info = self._copy_infodict(info)
2757         if new_info.get('http_headers') is None:
2758             new_info['http_headers'] = self._calc_headers(new_info)
2759         return fd.download(name, new_info, subtitle)
2760
2761     def existing_file(self, filepaths, *, default_overwrite=True):
2762         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2763         if existing_files and not self.params.get('overwrites', default_overwrite):
2764             return existing_files[0]
2765
2766         for file in existing_files:
2767             self.report_file_delete(file)
2768             os.remove(file)
2769         return None
2770
2771     def process_info(self, info_dict):
2772         """Process a single resolved IE result. (Modified it in-place)"""
2773
2774         assert info_dict.get('_type', 'video') == 'video'
2775         original_infodict = info_dict
2776
2777         if 'format' not in info_dict and 'ext' in info_dict:
2778             info_dict['format'] = info_dict['ext']
2779
2780         if self._match_entry(info_dict) is not None:
2781             info_dict['__write_download_archive'] = 'ignore'
2782             return
2783
2784         self.post_extract(info_dict)
2785         self._num_downloads += 1
2786
2787         # info_dict['_filename'] needs to be set for backward compatibility
2788         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2789         temp_filename = self.prepare_filename(info_dict, 'temp')
2790         files_to_move = {}
2791
2792         # Forced printings
2793         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2794
2795         if self.params.get('simulate'):
2796             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2797             return
2798
2799         if full_filename is None:
2800             return
2801         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2802             return
2803         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2804             return
2805
2806         if self._write_description('video', info_dict,
2807                                    self.prepare_filename(info_dict, 'description')) is None:
2808             return
2809
2810         sub_files = self._write_subtitles(info_dict, temp_filename)
2811         if sub_files is None:
2812             return
2813         files_to_move.update(dict(sub_files))
2814
2815         thumb_files = self._write_thumbnails(
2816             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2817         if thumb_files is None:
2818             return
2819         files_to_move.update(dict(thumb_files))
2820
2821         infofn = self.prepare_filename(info_dict, 'infojson')
2822         _infojson_written = self._write_info_json('video', info_dict, infofn)
2823         if _infojson_written:
2824             info_dict['infojson_filename'] = infofn
2825             # For backward compatibility, even though it was a private field
2826             info_dict['__infojson_filename'] = infofn
2827         elif _infojson_written is None:
2828             return
2829
2830         # Note: Annotations are deprecated
2831         annofn = None
2832         if self.params.get('writeannotations', False):
2833             annofn = self.prepare_filename(info_dict, 'annotation')
2834         if annofn:
2835             if not self._ensure_dir_exists(encodeFilename(annofn)):
2836                 return
2837             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2838                 self.to_screen('[info] Video annotations are already present')
2839             elif not info_dict.get('annotations'):
2840                 self.report_warning('There are no annotations to write.')
2841             else:
2842                 try:
2843                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2844                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2845                         annofile.write(info_dict['annotations'])
2846                 except (KeyError, TypeError):
2847                     self.report_warning('There are no annotations to write.')
2848                 except (OSError, IOError):
2849                     self.report_error('Cannot write annotations file: ' + annofn)
2850                     return
2851
2852         # Write internet shortcut files
2853         def _write_link_file(link_type):
2854             if 'webpage_url' not in info_dict:
2855                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2856                 return False
2857             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2858             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2859                 return False
2860             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2861                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2862                 return True
2863             try:
2864                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2865                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2866                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2867                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2868                     if link_type == 'desktop':
2869                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2870                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2871             except (OSError, IOError):
2872                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2873                 return False
2874             return True
2875
2876         write_links = {
2877             'url': self.params.get('writeurllink'),
2878             'webloc': self.params.get('writewebloclink'),
2879             'desktop': self.params.get('writedesktoplink'),
2880         }
2881         if self.params.get('writelink'):
2882             link_type = ('webloc' if sys.platform == 'darwin'
2883                          else 'desktop' if sys.platform.startswith('linux')
2884                          else 'url')
2885             write_links[link_type] = True
2886
2887         if any(should_write and not _write_link_file(link_type)
2888                for link_type, should_write in write_links.items()):
2889             return
2890
2891         def replace_info_dict(new_info):
2892             nonlocal info_dict
2893             if new_info == info_dict:
2894                 return
2895             info_dict.clear()
2896             info_dict.update(new_info)
2897
2898         try:
2899             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2900             replace_info_dict(new_info)
2901         except PostProcessingError as err:
2902             self.report_error('Preprocessing: %s' % str(err))
2903             return
2904
2905         if self.params.get('skip_download'):
2906             info_dict['filepath'] = temp_filename
2907             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2908             info_dict['__files_to_move'] = files_to_move
2909             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2910             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2911         else:
2912             # Download
2913             info_dict.setdefault('__postprocessors', [])
2914             try:
2915
2916                 def existing_video_file(*filepaths):
2917                     ext = info_dict.get('ext')
2918                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2919                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2920                                               default_overwrite=False)
2921                     if file:
2922                         info_dict['ext'] = os.path.splitext(file)[1][1:]
2923                     return file
2924
2925                 success = True
2926                 if info_dict.get('requested_formats') is not None:
2927
2928                     def compatible_formats(formats):
2929                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2930                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2931                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2932                         if len(video_formats) > 2 or len(audio_formats) > 2:
2933                             return False
2934
2935                         # Check extension
2936                         exts = set(format.get('ext') for format in formats)
2937                         COMPATIBLE_EXTS = (
2938                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2939                             set(('webm',)),
2940                         )
2941                         for ext_sets in COMPATIBLE_EXTS:
2942                             if ext_sets.issuperset(exts):
2943                                 return True
2944                         # TODO: Check acodec/vcodec
2945                         return False
2946
2947                     requested_formats = info_dict['requested_formats']
2948                     old_ext = info_dict['ext']
2949                     if self.params.get('merge_output_format') is None:
2950                         if not compatible_formats(requested_formats):
2951                             info_dict['ext'] = 'mkv'
2952                             self.report_warning(
2953                                 'Requested formats are incompatible for merge and will be merged into mkv')
2954                         if (info_dict['ext'] == 'webm'
2955                                 and info_dict.get('thumbnails')
2956                                 # check with type instead of pp_key, __name__, or isinstance
2957                                 # since we dont want any custom PPs to trigger this
2958                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2959                             info_dict['ext'] = 'mkv'
2960                             self.report_warning(
2961                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2962                     new_ext = info_dict['ext']
2963
2964                     def correct_ext(filename, ext=new_ext):
2965                         if filename == '-':
2966                             return filename
2967                         filename_real_ext = os.path.splitext(filename)[1][1:]
2968                         filename_wo_ext = (
2969                             os.path.splitext(filename)[0]
2970                             if filename_real_ext in (old_ext, new_ext)
2971                             else filename)
2972                         return '%s.%s' % (filename_wo_ext, ext)
2973
2974                     # Ensure filename always has a correct extension for successful merge
2975                     full_filename = correct_ext(full_filename)
2976                     temp_filename = correct_ext(temp_filename)
2977                     dl_filename = existing_video_file(full_filename, temp_filename)
2978                     info_dict['__real_download'] = False
2979
2980                     downloaded = []
2981                     merger = FFmpegMergerPP(self)
2982
2983                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2984                     if dl_filename is not None:
2985                         self.report_file_already_downloaded(dl_filename)
2986                     elif fd:
2987                         for f in requested_formats if fd != FFmpegFD else []:
2988                             f['filepath'] = fname = prepend_extension(
2989                                 correct_ext(temp_filename, info_dict['ext']),
2990                                 'f%s' % f['format_id'], info_dict['ext'])
2991                             downloaded.append(fname)
2992                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2993                         success, real_download = self.dl(temp_filename, info_dict)
2994                         info_dict['__real_download'] = real_download
2995                     else:
2996                         if self.params.get('allow_unplayable_formats'):
2997                             self.report_warning(
2998                                 'You have requested merging of multiple formats '
2999                                 'while also allowing unplayable formats to be downloaded. '
3000                                 'The formats won\'t be merged to prevent data corruption.')
3001                         elif not merger.available:
3002                             self.report_warning(
3003                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
3004                                 'The formats won\'t be merged.')
3005
3006                         if temp_filename == '-':
3007                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3008                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3009                                       else 'but ffmpeg is not installed')
3010                             self.report_warning(
3011                                 f'You have requested downloading multiple formats to stdout {reason}. '
3012                                 'The formats will be streamed one after the other')
3013                             fname = temp_filename
3014                         for f in requested_formats:
3015                             new_info = dict(info_dict)
3016                             del new_info['requested_formats']
3017                             new_info.update(f)
3018                             if temp_filename != '-':
3019                                 fname = prepend_extension(
3020                                     correct_ext(temp_filename, new_info['ext']),
3021                                     'f%s' % f['format_id'], new_info['ext'])
3022                                 if not self._ensure_dir_exists(fname):
3023                                     return
3024                                 f['filepath'] = fname
3025                                 downloaded.append(fname)
3026                             partial_success, real_download = self.dl(fname, new_info)
3027                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3028                             success = success and partial_success
3029
3030                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3031                         info_dict['__postprocessors'].append(merger)
3032                         info_dict['__files_to_merge'] = downloaded
3033                         # Even if there were no downloads, it is being merged only now
3034                         info_dict['__real_download'] = True
3035                     else:
3036                         for file in downloaded:
3037                             files_to_move[file] = None
3038                 else:
3039                     # Just a single file
3040                     dl_filename = existing_video_file(full_filename, temp_filename)
3041                     if dl_filename is None or dl_filename == temp_filename:
3042                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3043                         # So we should try to resume the download
3044                         success, real_download = self.dl(temp_filename, info_dict)
3045                         info_dict['__real_download'] = real_download
3046                     else:
3047                         self.report_file_already_downloaded(dl_filename)
3048
3049                 dl_filename = dl_filename or temp_filename
3050                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3051
3052             except network_exceptions as err:
3053                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3054                 return
3055             except (OSError, IOError) as err:
3056                 raise UnavailableVideoError(err)
3057             except (ContentTooShortError, ) as err:
3058                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3059                 return
3060
3061             if success and full_filename != '-':
3062
3063                 def fixup():
3064                     do_fixup = True
3065                     fixup_policy = self.params.get('fixup')
3066                     vid = info_dict['id']
3067
3068                     if fixup_policy in ('ignore', 'never'):
3069                         return
3070                     elif fixup_policy == 'warn':
3071                         do_fixup = False
3072                     elif fixup_policy != 'force':
3073                         assert fixup_policy in ('detect_or_warn', None)
3074                         if not info_dict.get('__real_download'):
3075                             do_fixup = False
3076
3077                     def ffmpeg_fixup(cndn, msg, cls):
3078                         if not cndn:
3079                             return
3080                         if not do_fixup:
3081                             self.report_warning(f'{vid}: {msg}')
3082                             return
3083                         pp = cls(self)
3084                         if pp.available:
3085                             info_dict['__postprocessors'].append(pp)
3086                         else:
3087                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3088
3089                     stretched_ratio = info_dict.get('stretched_ratio')
3090                     ffmpeg_fixup(
3091                         stretched_ratio not in (1, None),
3092                         f'Non-uniform pixel ratio {stretched_ratio}',
3093                         FFmpegFixupStretchedPP)
3094
3095                     ffmpeg_fixup(
3096                         (info_dict.get('requested_formats') is None
3097                          and info_dict.get('container') == 'm4a_dash'
3098                          and info_dict.get('ext') == 'm4a'),
3099                         'writing DASH m4a. Only some players support this container',
3100                         FFmpegFixupM4aPP)
3101
3102                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3103                     downloader = downloader.__name__ if downloader else None
3104
3105                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3106                         ffmpeg_fixup(downloader == 'HlsFD',
3107                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3108                                      FFmpegFixupM3u8PP)
3109                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3110                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3111
3112                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3113                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3114
3115                 fixup()
3116                 try:
3117                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3118                 except PostProcessingError as err:
3119                     self.report_error('Postprocessing: %s' % str(err))
3120                     return
3121                 try:
3122                     for ph in self._post_hooks:
3123                         ph(info_dict['filepath'])
3124                 except Exception as err:
3125                     self.report_error('post hooks: %s' % str(err))
3126                     return
3127                 info_dict['__write_download_archive'] = True
3128
3129         if self.params.get('force_write_download_archive'):
3130             info_dict['__write_download_archive'] = True
3131
3132         # Make sure the info_dict was modified in-place
3133         assert info_dict is original_infodict
3134
3135         max_downloads = self.params.get('max_downloads')
3136         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3137             raise MaxDownloadsReached()
3138
3139     def __download_wrapper(self, func):
3140         @functools.wraps(func)
3141         def wrapper(*args, **kwargs):
3142             try:
3143                 res = func(*args, **kwargs)
3144             except UnavailableVideoError as e:
3145                 self.report_error(e)
3146             except MaxDownloadsReached as e:
3147                 self.to_screen(f'[info] {e}')
3148                 raise
3149             except DownloadCancelled as e:
3150                 self.to_screen(f'[info] {e}')
3151                 if not self.params.get('break_per_url'):
3152                     raise
3153             else:
3154                 if self.params.get('dump_single_json', False):
3155                     self.post_extract(res)
3156                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3157         return wrapper
3158
3159     def download(self, url_list):
3160         """Download a given list of URLs."""
3161         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3162         outtmpl = self.outtmpl_dict['default']
3163         if (len(url_list) > 1
3164                 and outtmpl != '-'
3165                 and '%' not in outtmpl
3166                 and self.params.get('max_downloads') != 1):
3167             raise SameFileError(outtmpl)
3168
3169         for url in url_list:
3170             self.__download_wrapper(self.extract_info)(
3171                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3172
3173         return self._download_retcode
3174
3175     def download_with_info_file(self, info_filename):
3176         with contextlib.closing(fileinput.FileInput(
3177                 [info_filename], mode='r',
3178                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3179             # FileInput doesn't have a read method, we can't call json.load
3180             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3181         try:
3182             self.__download_wrapper(self.process_ie_result)(info, download=True)
3183         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3184             if not isinstance(e, EntryNotInPlaylist):
3185                 self.to_stderr('\r')
3186             webpage_url = info.get('webpage_url')
3187             if webpage_url is not None:
3188                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3189                 return self.download([webpage_url])
3190             else:
3191                 raise
3192         return self._download_retcode
3193
3194     @staticmethod
3195     def sanitize_info(info_dict, remove_private_keys=False):
3196         ''' Sanitize the infodict for converting to json '''
3197         if info_dict is None:
3198             return info_dict
3199         info_dict.setdefault('epoch', int(time.time()))
3200         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3201         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3202         if remove_private_keys:
3203             remove_keys |= {
3204                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3205                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3206             }
3207             reject = lambda k, v: k not in keep_keys and (
3208                 k.startswith('_') or k in remove_keys or v is None)
3209         else:
3210             reject = lambda k, v: k in remove_keys
3211
3212         def filter_fn(obj):
3213             if isinstance(obj, dict):
3214                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3215             elif isinstance(obj, (list, tuple, set, LazyList)):
3216                 return list(map(filter_fn, obj))
3217             elif obj is None or isinstance(obj, (str, int, float, bool)):
3218                 return obj
3219             else:
3220                 return repr(obj)
3221
3222         return filter_fn(info_dict)
3223
3224     @staticmethod
3225     def filter_requested_info(info_dict, actually_filter=True):
3226         ''' Alias of sanitize_info for backward compatibility '''
3227         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3228
3229     @staticmethod
3230     def post_extract(info_dict):
3231         def actual_post_extract(info_dict):
3232             if info_dict.get('_type') in ('playlist', 'multi_video'):
3233                 for video_dict in info_dict.get('entries', {}):
3234                     actual_post_extract(video_dict or {})
3235                 return
3236
3237             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3238             extra = post_extractor().items()
3239             info_dict.update(extra)
3240             info_dict.pop('__post_extractor', None)
3241
3242             original_infodict = info_dict.get('__original_infodict') or {}
3243             original_infodict.update(extra)
3244             original_infodict.pop('__post_extractor', None)
3245
3246         actual_post_extract(info_dict or {})
3247
3248     def run_pp(self, pp, infodict):
3249         files_to_delete = []
3250         if '__files_to_move' not in infodict:
3251             infodict['__files_to_move'] = {}
3252         try:
3253             files_to_delete, infodict = pp.run(infodict)
3254         except PostProcessingError as e:
3255             # Must be True and not 'only_download'
3256             if self.params.get('ignoreerrors') is True:
3257                 self.report_error(e)
3258                 return infodict
3259             raise
3260
3261         if not files_to_delete:
3262             return infodict
3263         if self.params.get('keepvideo', False):
3264             for f in files_to_delete:
3265                 infodict['__files_to_move'].setdefault(f, '')
3266         else:
3267             for old_filename in set(files_to_delete):
3268                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3269                 try:
3270                     os.remove(encodeFilename(old_filename))
3271                 except (IOError, OSError):
3272                     self.report_warning('Unable to remove downloaded original file')
3273                 if old_filename in infodict['__files_to_move']:
3274                     del infodict['__files_to_move'][old_filename]
3275         return infodict
3276
3277     def run_all_pps(self, key, info, *, additional_pps=None):
3278         for tmpl in self.params['forceprint'].get(key, []):
3279             self._forceprint(tmpl, info)
3280         for pp in (additional_pps or []) + self._pps[key]:
3281             info = self.run_pp(pp, info)
3282         return info
3283
3284     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3285         info = dict(ie_info)
3286         info['__files_to_move'] = files_to_move or {}
3287         info = self.run_all_pps(key, info)
3288         return info, info.pop('__files_to_move', None)
3289
3290     def post_process(self, filename, info, files_to_move=None):
3291         """Run all the postprocessors on the given file."""
3292         info['filepath'] = filename
3293         info['__files_to_move'] = files_to_move or {}
3294         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3295         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3296         del info['__files_to_move']
3297         return self.run_all_pps('after_move', info)
3298
3299     def _make_archive_id(self, info_dict):
3300         video_id = info_dict.get('id')
3301         if not video_id:
3302             return
3303         # Future-proof against any change in case
3304         # and backwards compatibility with prior versions
3305         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3306         if extractor is None:
3307             url = str_or_none(info_dict.get('url'))
3308             if not url:
3309                 return
3310             # Try to find matching extractor for the URL and take its ie_key
3311             for ie_key, ie in self._ies.items():
3312                 if ie.suitable(url):
3313                     extractor = ie_key
3314                     break
3315             else:
3316                 return
3317         return '%s %s' % (extractor.lower(), video_id)
3318
3319     def in_download_archive(self, info_dict):
3320         fn = self.params.get('download_archive')
3321         if fn is None:
3322             return False
3323
3324         vid_id = self._make_archive_id(info_dict)
3325         if not vid_id:
3326             return False  # Incomplete video information
3327
3328         return vid_id in self.archive
3329
3330     def record_download_archive(self, info_dict):
3331         fn = self.params.get('download_archive')
3332         if fn is None:
3333             return
3334         vid_id = self._make_archive_id(info_dict)
3335         assert vid_id
3336         self.write_debug(f'Adding to archive: {vid_id}')
3337         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3338             archive_file.write(vid_id + '\n')
3339         self.archive.add(vid_id)
3340
3341     @staticmethod
3342     def format_resolution(format, default='unknown'):
3343         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3344             return 'audio only'
3345         if format.get('resolution') is not None:
3346             return format['resolution']
3347         if format.get('width') and format.get('height'):
3348             return '%dx%d' % (format['width'], format['height'])
3349         elif format.get('height'):
3350             return '%sp' % format['height']
3351         elif format.get('width'):
3352             return '%dx?' % format['width']
3353         return default
3354
3355     def _list_format_headers(self, *headers):
3356         if self.params.get('listformats_table', True) is not False:
3357             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3358         return headers
3359
3360     def _format_note(self, fdict):
3361         res = ''
3362         if fdict.get('ext') in ['f4f', 'f4m']:
3363             res += '(unsupported)'
3364         if fdict.get('language'):
3365             if res:
3366                 res += ' '
3367             res += '[%s]' % fdict['language']
3368         if fdict.get('format_note') is not None:
3369             if res:
3370                 res += ' '
3371             res += fdict['format_note']
3372         if fdict.get('tbr') is not None:
3373             if res:
3374                 res += ', '
3375             res += '%4dk' % fdict['tbr']
3376         if fdict.get('container') is not None:
3377             if res:
3378                 res += ', '
3379             res += '%s container' % fdict['container']
3380         if (fdict.get('vcodec') is not None
3381                 and fdict.get('vcodec') != 'none'):
3382             if res:
3383                 res += ', '
3384             res += fdict['vcodec']
3385             if fdict.get('vbr') is not None:
3386                 res += '@'
3387         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3388             res += 'video@'
3389         if fdict.get('vbr') is not None:
3390             res += '%4dk' % fdict['vbr']
3391         if fdict.get('fps') is not None:
3392             if res:
3393                 res += ', '
3394             res += '%sfps' % fdict['fps']
3395         if fdict.get('acodec') is not None:
3396             if res:
3397                 res += ', '
3398             if fdict['acodec'] == 'none':
3399                 res += 'video only'
3400             else:
3401                 res += '%-5s' % fdict['acodec']
3402         elif fdict.get('abr') is not None:
3403             if res:
3404                 res += ', '
3405             res += 'audio'
3406         if fdict.get('abr') is not None:
3407             res += '@%3dk' % fdict['abr']
3408         if fdict.get('asr') is not None:
3409             res += ' (%5dHz)' % fdict['asr']
3410         if fdict.get('filesize') is not None:
3411             if res:
3412                 res += ', '
3413             res += format_bytes(fdict['filesize'])
3414         elif fdict.get('filesize_approx') is not None:
3415             if res:
3416                 res += ', '
3417             res += '~' + format_bytes(fdict['filesize_approx'])
3418         return res
3419
3420     def render_formats_table(self, info_dict):
3421         if not info_dict.get('formats') and not info_dict.get('url'):
3422             return None
3423
3424         formats = info_dict.get('formats', [info_dict])
3425         if not self.params.get('listformats_table', True) is not False:
3426             table = [
3427                 [
3428                     format_field(f, 'format_id'),
3429                     format_field(f, 'ext'),
3430                     self.format_resolution(f),
3431                     self._format_note(f)
3432                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3433             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3434
3435         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3436         table = [
3437             [
3438                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3439                 format_field(f, 'ext'),
3440                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3441                 format_field(f, 'fps', '\t%d'),
3442                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3443                 delim,
3444                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3445                 format_field(f, 'tbr', '\t%dk'),
3446                 shorten_protocol_name(f.get('protocol', '')),
3447                 delim,
3448                 format_field(f, 'vcodec', default='unknown').replace(
3449                     'none', 'images' if f.get('acodec') == 'none'
3450                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3451                 format_field(f, 'vbr', '\t%dk'),
3452                 format_field(f, 'acodec', default='unknown').replace(
3453                     'none', '' if f.get('vcodec') == 'none'
3454                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3455                 format_field(f, 'abr', '\t%dk'),
3456                 format_field(f, 'asr', '\t%dHz'),
3457                 join_nonempty(
3458                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3459                     format_field(f, 'language', '[%s]'),
3460                     join_nonempty(format_field(f, 'format_note'),
3461                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3462                                   delim=', '),
3463                     delim=' '),
3464             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3465         header_line = self._list_format_headers(
3466             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3467             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3468
3469         return render_table(
3470             header_line, table, hide_empty=True,
3471             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3472
3473     def render_thumbnails_table(self, info_dict):
3474         thumbnails = list(info_dict.get('thumbnails'))
3475         if not thumbnails:
3476             return None
3477         return render_table(
3478             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3479             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3480
3481     def render_subtitles_table(self, video_id, subtitles):
3482         def _row(lang, formats):
3483             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3484             if len(set(names)) == 1:
3485                 names = [] if names[0] == 'unknown' else names[:1]
3486             return [lang, ', '.join(names), ', '.join(exts)]
3487
3488         if not subtitles:
3489             return None
3490         return render_table(
3491             self._list_format_headers('Language', 'Name', 'Formats'),
3492             [_row(lang, formats) for lang, formats in subtitles.items()],
3493             hide_empty=True)
3494
3495     def __list_table(self, video_id, name, func, *args):
3496         table = func(*args)
3497         if not table:
3498             self.to_screen(f'{video_id} has no {name}')
3499             return
3500         self.to_screen(f'[info] Available {name} for {video_id}:')
3501         self.to_stdout(table)
3502
3503     def list_formats(self, info_dict):
3504         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3505
3506     def list_thumbnails(self, info_dict):
3507         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3508
3509     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3510         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3511
3512     def urlopen(self, req):
3513         """ Start an HTTP download """
3514         if isinstance(req, compat_basestring):
3515             req = sanitized_Request(req)
3516         return self._opener.open(req, timeout=self._socket_timeout)
3517
3518     def print_debug_header(self):
3519         if not self.params.get('verbose'):
3520             return
3521
3522         def get_encoding(stream):
3523             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3524             if not supports_terminal_sequences(stream):
3525                 from .compat import WINDOWS_VT_MODE
3526                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3527             return ret
3528
3529         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3530             locale.getpreferredencoding(),
3531             sys.getfilesystemencoding(),
3532             get_encoding(self._screen_file), get_encoding(self._err_file),
3533             self.get_encoding())
3534
3535         logger = self.params.get('logger')
3536         if logger:
3537             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3538             write_debug(encoding_str)
3539         else:
3540             write_string(f'[debug] {encoding_str}\n', encoding=None)
3541             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3542
3543         source = detect_variant()
3544         write_debug(join_nonempty(
3545             'yt-dlp version', __version__,
3546             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3547             '' if source == 'unknown' else f'({source})',
3548             delim=' '))
3549         if not _LAZY_LOADER:
3550             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3551                 write_debug('Lazy loading extractors is forcibly disabled')
3552             else:
3553                 write_debug('Lazy loading extractors is disabled')
3554         if plugin_extractors or plugin_postprocessors:
3555             write_debug('Plugins: %s' % [
3556                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3557                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3558         if self.params.get('compat_opts'):
3559             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3560
3561         if source == 'source':
3562             try:
3563                 sp = Popen(
3564                     ['git', 'rev-parse', '--short', 'HEAD'],
3565                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3566                     cwd=os.path.dirname(os.path.abspath(__file__)))
3567                 out, err = sp.communicate_or_kill()
3568                 out = out.decode().strip()
3569                 if re.match('[0-9a-f]+', out):
3570                     write_debug('Git HEAD: %s' % out)
3571             except Exception:
3572                 try:
3573                     sys.exc_clear()
3574                 except Exception:
3575                     pass
3576
3577         def python_implementation():
3578             impl_name = platform.python_implementation()
3579             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3580                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3581             return impl_name
3582
3583         write_debug('Python version %s (%s %s) - %s' % (
3584             platform.python_version(),
3585             python_implementation(),
3586             platform.architecture()[0],
3587             platform_name()))
3588
3589         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3590         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3591         if ffmpeg_features:
3592             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3593
3594         exe_versions['rtmpdump'] = rtmpdump_version()
3595         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3596         exe_str = ', '.join(
3597             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3598         ) or 'none'
3599         write_debug('exe versions: %s' % exe_str)
3600
3601         from .downloader.websocket import has_websockets
3602         from .postprocessor.embedthumbnail import has_mutagen
3603         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3604
3605         lib_str = join_nonempty(
3606             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3607             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3608             has_mutagen and 'mutagen',
3609             SQLITE_AVAILABLE and 'sqlite',
3610             has_websockets and 'websockets',
3611             delim=', ') or 'none'
3612         write_debug('Optional libraries: %s' % lib_str)
3613
3614         proxy_map = {}
3615         for handler in self._opener.handlers:
3616             if hasattr(handler, 'proxies'):
3617                 proxy_map.update(handler.proxies)
3618         write_debug(f'Proxy map: {proxy_map}')
3619
3620         # Not implemented
3621         if False and self.params.get('call_home'):
3622             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3623             write_debug('Public IP address: %s' % ipaddr)
3624             latest_version = self.urlopen(
3625                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3626             if version_tuple(latest_version) > version_tuple(__version__):
3627                 self.report_warning(
3628                     'You are using an outdated version (newest version: %s)! '
3629                     'See https://yt-dl.org/update if you need help updating.' %
3630                     latest_version)
3631
3632     def _setup_opener(self):
3633         timeout_val = self.params.get('socket_timeout')
3634         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3635
3636         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3637         opts_cookiefile = self.params.get('cookiefile')
3638         opts_proxy = self.params.get('proxy')
3639
3640         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3641
3642         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3643         if opts_proxy is not None:
3644             if opts_proxy == '':
3645                 proxies = {}
3646             else:
3647                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3648         else:
3649             proxies = compat_urllib_request.getproxies()
3650             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3651             if 'http' in proxies and 'https' not in proxies:
3652                 proxies['https'] = proxies['http']
3653         proxy_handler = PerRequestProxyHandler(proxies)
3654
3655         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3656         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3657         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3658         redirect_handler = YoutubeDLRedirectHandler()
3659         data_handler = compat_urllib_request_DataHandler()
3660
3661         # When passing our own FileHandler instance, build_opener won't add the
3662         # default FileHandler and allows us to disable the file protocol, which
3663         # can be used for malicious purposes (see
3664         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3665         file_handler = compat_urllib_request.FileHandler()
3666
3667         def file_open(*args, **kwargs):
3668             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3669         file_handler.file_open = file_open
3670
3671         opener = compat_urllib_request.build_opener(
3672             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3673
3674         # Delete the default user-agent header, which would otherwise apply in
3675         # cases where our custom HTTP handler doesn't come into play
3676         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3677         opener.addheaders = []
3678         self._opener = opener
3679
3680     def encode(self, s):
3681         if isinstance(s, bytes):
3682             return s  # Already encoded
3683
3684         try:
3685             return s.encode(self.get_encoding())
3686         except UnicodeEncodeError as err:
3687             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3688             raise
3689
3690     def get_encoding(self):
3691         encoding = self.params.get('encoding')
3692         if encoding is None:
3693             encoding = preferredencoding()
3694         return encoding
3695
3696     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3697         ''' Write infojson and returns True = written, False = skip, None = error '''
3698         if overwrite is None:
3699             overwrite = self.params.get('overwrites', True)
3700         if not self.params.get('writeinfojson'):
3701             return False
3702         elif not infofn:
3703             self.write_debug(f'Skipping writing {label} infojson')
3704             return False
3705         elif not self._ensure_dir_exists(infofn):
3706             return None
3707         elif not overwrite and os.path.exists(infofn):
3708             self.to_screen(f'[info] {label.title()} metadata is already present')
3709         else:
3710             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3711             try:
3712                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3713             except (OSError, IOError):
3714                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3715                 return None
3716         return True
3717
3718     def _write_description(self, label, ie_result, descfn):
3719         ''' Write description and returns True = written, False = skip, None = error '''
3720         if not self.params.get('writedescription'):
3721             return False
3722         elif not descfn:
3723             self.write_debug(f'Skipping writing {label} description')
3724             return False
3725         elif not self._ensure_dir_exists(descfn):
3726             return None
3727         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3728             self.to_screen(f'[info] {label.title()} description is already present')
3729         elif ie_result.get('description') is None:
3730             self.report_warning(f'There\'s no {label} description to write')
3731             return False
3732         else:
3733             try:
3734                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3735                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3736                     descfile.write(ie_result['description'])
3737             except (OSError, IOError):
3738                 self.report_error(f'Cannot write {label} description file {descfn}')
3739                 return None
3740         return True
3741
3742     def _write_subtitles(self, info_dict, filename):
3743         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3744         ret = []
3745         subtitles = info_dict.get('requested_subtitles')
3746         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3747             # subtitles download errors are already managed as troubles in relevant IE
3748             # that way it will silently go on when used with unsupporting IE
3749             return ret
3750
3751         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3752         if not sub_filename_base:
3753             self.to_screen('[info] Skipping writing video subtitles')
3754             return ret
3755         for sub_lang, sub_info in subtitles.items():
3756             sub_format = sub_info['ext']
3757             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3758             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3759             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3760             if existing_sub:
3761                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3762                 sub_info['filepath'] = existing_sub
3763                 ret.append((existing_sub, sub_filename_final))
3764                 continue
3765
3766             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3767             if sub_info.get('data') is not None:
3768                 try:
3769                     # Use newline='' to prevent conversion of newline characters
3770                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3771                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3772                         subfile.write(sub_info['data'])
3773                     sub_info['filepath'] = sub_filename
3774                     ret.append((sub_filename, sub_filename_final))
3775                     continue
3776                 except (OSError, IOError):
3777                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3778                     return None
3779
3780             try:
3781                 sub_copy = sub_info.copy()
3782                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3783                 self.dl(sub_filename, sub_copy, subtitle=True)
3784                 sub_info['filepath'] = sub_filename
3785                 ret.append((sub_filename, sub_filename_final))
3786             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3787                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3788                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3789                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3790         return ret
3791
3792     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3793         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3794         write_all = self.params.get('write_all_thumbnails', False)
3795         thumbnails, ret = [], []
3796         if write_all or self.params.get('writethumbnail', False):
3797             thumbnails = info_dict.get('thumbnails') or []
3798         multiple = write_all and len(thumbnails) > 1
3799
3800         if thumb_filename_base is None:
3801             thumb_filename_base = filename
3802         if thumbnails and not thumb_filename_base:
3803             self.write_debug(f'Skipping writing {label} thumbnail')
3804             return ret
3805
3806         for idx, t in list(enumerate(thumbnails))[::-1]:
3807             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3808             thumb_display_id = f'{label} thumbnail {t["id"]}'
3809             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3810             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3811
3812             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3813             if existing_thumb:
3814                 self.to_screen('[info] %s is already present' % (
3815                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3816                 t['filepath'] = existing_thumb
3817                 ret.append((existing_thumb, thumb_filename_final))
3818             else:
3819                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3820                 try:
3821                     uf = self.urlopen(t['url'])
3822                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3823                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3824                         shutil.copyfileobj(uf, thumbf)
3825                     ret.append((thumb_filename, thumb_filename_final))
3826                     t['filepath'] = thumb_filename
3827                 except network_exceptions as err:
3828                     thumbnails.pop(idx)
3829                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3830             if ret and not write_all:
3831                 break
3832         return ret