yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     join_nonempty,
  79     LazyList,
  80     LINK_TEMPLATES,
  81     locked_file,
  82     make_dir,
  83     make_HTTPS_handler,
  84     MaxDownloadsReached,
  85     network_exceptions,
  86     number_of_digits,
  87     orderedSet,
  88     OUTTMPL_TYPES,
  89     PagedList,
  90     parse_filesize,
  91     PerRequestProxyHandler,
  92     platform_name,
  93     Popen,
  94     POSTPROCESS_WHEN,
  95     PostProcessingError,
  96     preferredencoding,
  97     prepend_extension,
  98     ReExtractInfo,
  99     register_socks_protocols,
 100     RejectedVideoReached,
 101     remove_terminal_sequences,
 102     render_table,
 103     replace_extension,
 104     SameFileError,
 105     sanitize_filename,
 106     sanitize_path,
 107     sanitize_url,
 108     sanitized_Request,
 109     std_headers,
 110     STR_FORMAT_RE_TMPL,
 111     STR_FORMAT_TYPES,
 112     str_or_none,
 113     strftime_or_none,
 114     subtitles_filename,
 115     supports_terminal_sequences,
 116     timetuple_from_msec,
 117     to_high_limit_path,
 118     traverse_obj,
 119     try_get,
 120     UnavailableVideoError,
 121     url_basename,
 122     variadic,
 123     version_tuple,
 124     write_json_file,
 125     write_string,
 126     YoutubeDLCookieProcessor,
 127     YoutubeDLHandler,
 128     YoutubeDLRedirectHandler,
 129 )
 130 from .cache import Cache
 131 from .minicurses import format_text
 132 from .extractor import (
 133     gen_extractor_classes,
 134     get_info_extractor,
 135     _LAZY_LOADER,
 136     _PLUGIN_CLASSES as plugin_extractors
 137 )
 138 from .extractor.openload import PhantomJSwrapper
 139 from .downloader import (
 140     FFmpegFD,
 141     get_suitable_downloader,
 142     shorten_protocol_name
 143 )
 144 from .downloader.rtmp import rtmpdump_version
 145 from .postprocessor import (
 146     get_postprocessor,
 147     EmbedThumbnailPP,
 148     FFmpegFixupDuplicateMoovPP,
 149     FFmpegFixupDurationPP,
 150     FFmpegFixupM3u8PP,
 151     FFmpegFixupM4aPP,
 152     FFmpegFixupStretchedPP,
 153     FFmpegFixupTimestampPP,
 154     FFmpegMergerPP,
 155     FFmpegPostProcessor,
 156     MoveFilesAfterDownloadPP,
 157     _PLUGIN_CLASSES as plugin_postprocessors
 158 )
 159 from .update import detect_variant
 160 from .version import __version__, RELEASE_GIT_HEAD
 161
 162 if compat_os_name == 'nt':
 163     import ctypes
 164
 165
 166 class YoutubeDL(object):
 167     """YoutubeDL class.
 168
 169     YoutubeDL objects are the ones responsible of downloading the
 170     actual video file and writing it to disk if the user has requested
 171     it, among some other tasks. In most cases there should be one per
 172     program. As, given a video URL, the downloader doesn't know how to
 173     extract all the needed information, task that InfoExtractors do, it
 174     has to pass the URL to one of them.
 175
 176     For this, YoutubeDL objects have a method that allows
 177     InfoExtractors to be registered in a given order. When it is passed
 178     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 179     finds that reports being able to handle it. The InfoExtractor extracts
 180     all the information about the video or videos the URL refers to, and
 181     YoutubeDL process the extracted information, possibly using a File
 182     Downloader to download the video.
 183
 184     YoutubeDL objects accept a lot of parameters. In order not to saturate
 185     the object constructor with arguments, it receives a dictionary of
 186     options instead. These options are available through the params
 187     attribute for the InfoExtractors to use. The YoutubeDL also
 188     registers itself as the downloader in charge for the InfoExtractors
 189     that are added to it, so this is a "mutual registration".
 190
 191     Available options:
 192
 193     username:          Username for authentication purposes.
 194     password:          Password for authentication purposes.
 195     videopassword:     Password for accessing a video.
 196     ap_mso:            Adobe Pass multiple-system operator identifier.
 197     ap_username:       Multiple-system operator account username.
 198     ap_password:       Multiple-system operator account password.
 199     usenetrc:          Use netrc for authentication instead.
 200     verbose:           Print additional info to stdout.
 201     quiet:             Do not print messages to stdout.
 202     no_warnings:       Do not print out anything for warnings.
 203     forceprint:        A dict with keys video/playlist mapped to
 204                        a list of templates to force print to stdout
 205                        For compatibility, a single list is also accepted
 206     forceurl:          Force printing final URL. (Deprecated)
 207     forcetitle:        Force printing title. (Deprecated)
 208     forceid:           Force printing ID. (Deprecated)
 209     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 210     forcedescription:  Force printing description. (Deprecated)
 211     forcefilename:     Force printing final filename. (Deprecated)
 212     forceduration:     Force printing duration. (Deprecated)
 213     forcejson:         Force printing info_dict as JSON.
 214     dump_single_json:  Force printing the info_dict of the whole playlist
 215                        (or video) as a single JSON line.
 216     force_write_download_archive: Force writing download archive regardless
 217                        of 'skip_download' or 'simulate'.
 218     simulate:          Do not download the video files. If unset (or None),
 219                        simulate only if listsubtitles, listformats or list_thumbnails is used
 220     format:            Video format code. see "FORMAT SELECTION" for more details.
 221                        You can also pass a function. The function takes 'ctx' as
 222                        argument and returns the formats to download.
 223                        See "build_format_selector" for an implementation
 224     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 225     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 226                        extracting metadata even if the video is not actually
 227                        available for download (experimental)
 228     format_sort:       A list of fields by which to sort the video formats.
 229                        See "Sorting Formats" for more details.
 230     format_sort_force: Force the given format_sort. see "Sorting Formats"
 231                        for more details.
 232     allow_multiple_video_streams:   Allow multiple video streams to be merged
 233                        into a single file
 234     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 235                        into a single file
 236     check_formats      Whether to test if the formats are downloadable.
 237                        Can be True (check all), False (check none),
 238                        'selected' (check selected formats),
 239                        or None (check only if requested by extractor)
 240     paths:             Dictionary of output paths. The allowed keys are 'home'
 241                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 242     outtmpl:           Dictionary of templates for output names. Allowed keys
 243                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 244                        For compatibility with youtube-dl, a single string can also be used
 245     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 246     restrictfilenames: Do not allow "&" and spaces in file names
 247     trim_file_name:    Limit length of filename (extension excluded)
 248     windowsfilenames:  Force the filenames to be windows compatible
 249     ignoreerrors:      Do not stop on download/postprocessing errors.
 250                        Can be 'only_download' to ignore only download errors.
 251                        Default is 'only_download' for CLI, but False for API
 252     skip_playlist_after_errors: Number of allowed failures until the rest of
 253                        the playlist is skipped
 254     force_generic_extractor: Force downloader to use the generic extractor
 255     overwrites:        Overwrite all video and metadata files if True,
 256                        overwrite only non-video files if None
 257                        and don't overwrite any file if False
 258                        For compatibility with youtube-dl,
 259                        "nooverwrites" may also be used instead
 260     playliststart:     Playlist item to start at.
 261     playlistend:       Playlist item to end at.
 262     playlist_items:    Specific indices of playlist to download.
 263     playlistreverse:   Download playlist items in reverse order.
 264     playlistrandom:    Download playlist items in random order.
 265     matchtitle:        Download only matching titles.
 266     rejecttitle:       Reject downloads for matching titles.
 267     logger:            Log messages to a logging.Logger instance.
 268     logtostderr:       Log messages to stderr instead of stdout.
 269     consoletitle:       Display progress in console window's titlebar.
 270     writedescription:  Write the video description to a .description file
 271     writeinfojson:     Write the video description to a .info.json file
 272     clean_infojson:    Remove private fields from the infojson
 273     getcomments:       Extract video comments. This will not be written to disk
 274                        unless writeinfojson is also given
 275     writeannotations:  Write the video annotations to a .annotations.xml file
 276     writethumbnail:    Write the thumbnail image to a file
 277     allow_playlist_files: Whether to write playlists' description, infojson etc
 278                        also to disk when using the 'write*' options
 279     write_all_thumbnails:  Write all thumbnail formats to files
 280     writelink:         Write an internet shortcut file, depending on the
 281                        current platform (.url/.webloc/.desktop)
 282     writeurllink:      Write a Windows internet shortcut file (.url)
 283     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 284     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 285     writesubtitles:    Write the video subtitles to a file
 286     writeautomaticsub: Write the automatically generated subtitles to a file
 287     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 288                        Downloads all the subtitles of the video
 289                        (requires writesubtitles or writeautomaticsub)
 290     listsubtitles:     Lists all available subtitles for the video
 291     subtitlesformat:   The format code for subtitles
 292     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 293                        The list may contain "all" to refer to all the available
 294                        subtitles. The language can be prefixed with a "-" to
 295                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 296     keepvideo:         Keep the video file after post-processing
 297     daterange:         A DateRange object, download only if the upload_date is in the range.
 298     skip_download:     Skip the actual download of the video file
 299     cachedir:          Location of the cache files in the filesystem.
 300                        False to disable filesystem cache.
 301     noplaylist:        Download single video instead of a playlist if in doubt.
 302     age_limit:         An integer representing the user's age in years.
 303                        Unsuitable videos for the given age are skipped.
 304     min_views:         An integer representing the minimum view count the video
 305                        must have in order to not be skipped.
 306                        Videos without view count information are always
 307                        downloaded. None for no limit.
 308     max_views:         An integer representing the maximum view count.
 309                        Videos that are more popular than that are not
 310                        downloaded.
 311                        Videos without view count information are always
 312                        downloaded. None for no limit.
 313     download_archive:  File name of a file where all downloads are recorded.
 314                        Videos already present in the file are not downloaded
 315                        again.
 316     break_on_existing: Stop the download process after attempting to download a
 317                        file that is in the archive.
 318     break_on_reject:   Stop the download process when encountering a video that
 319                        has been filtered out.
 320     break_per_url:     Whether break_on_reject and break_on_existing
 321                        should act on each input URL as opposed to for the entire queue
 322     cookiefile:        File name where cookies should be read from and dumped to
 323     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 324                        name/pathfrom where cookies are loaded, and the name of the
 325                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 326     nocheckcertificate:  Do not verify SSL certificates
 327     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 328                        At the moment, this is only supported by YouTube.
 329     proxy:             URL of the proxy server to use
 330     geo_verification_proxy:  URL of the proxy to use for IP address verification
 331                        on geo-restricted sites.
 332     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 333     bidi_workaround:   Work around buggy terminals without bidirectional text
 334                        support, using fridibi
 335     debug_printtraffic:Print out sent and received HTTP traffic
 336     include_ads:       Download ads as well (deprecated)
 337     default_search:    Prepend this string if an input url is not valid.
 338                        'auto' for elaborate guessing
 339     encoding:          Use this encoding instead of the system-specified.
 340     extract_flat:      Do not resolve URLs, return the immediate result.
 341                        Pass in 'in_playlist' to only show this behavior for
 342                        playlist items.
 343     wait_for_video:    If given, wait for scheduled streams to become available.
 344                        The value should be a tuple containing the range
 345                        (min_secs, max_secs) to wait between retries
 346     postprocessors:    A list of dictionaries, each with an entry
 347                        * key:  The name of the postprocessor. See
 348                                yt_dlp/postprocessor/__init__.py for a list.
 349                        * when: When to run the postprocessor. Can be one of
 350                                pre_process|before_dl|post_process|after_move.
 351                                Assumed to be 'post_process' if not given
 352     post_hooks:        Deprecated - Register a custom postprocessor instead
 353                        A list of functions that get called as the final step
 354                        for each video file, after all postprocessors have been
 355                        called. The filename will be passed as the only argument.
 356     progress_hooks:    A list of functions that get called on download
 357                        progress, with a dictionary with the entries
 358                        * status: One of "downloading", "error", or "finished".
 359                                  Check this first and ignore unknown values.
 360                        * info_dict: The extracted info_dict
 361
 362                        If status is one of "downloading", or "finished", the
 363                        following properties may also be present:
 364                        * filename: The final filename (always present)
 365                        * tmpfilename: The filename we're currently writing to
 366                        * downloaded_bytes: Bytes on disk
 367                        * total_bytes: Size of the whole file, None if unknown
 368                        * total_bytes_estimate: Guess of the eventual file size,
 369                                                None if unavailable.
 370                        * elapsed: The number of seconds since download started.
 371                        * eta: The estimated time in seconds, None if unknown
 372                        * speed: The download speed in bytes/second, None if
 373                                 unknown
 374                        * fragment_index: The counter of the currently
 375                                          downloaded video fragment.
 376                        * fragment_count: The number of fragments (= individual
 377                                          files that will be merged)
 378
 379                        Progress hooks are guaranteed to be called at least once
 380                        (with status "finished") if the download is successful.
 381     postprocessor_hooks:  A list of functions that get called on postprocessing
 382                        progress, with a dictionary with the entries
 383                        * status: One of "started", "processing", or "finished".
 384                                  Check this first and ignore unknown values.
 385                        * postprocessor: Name of the postprocessor
 386                        * info_dict: The extracted info_dict
 387
 388                        Progress hooks are guaranteed to be called at least twice
 389                        (with status "started" and "finished") if the processing is successful.
 390     merge_output_format: Extension to use when merging formats.
 391     final_ext:         Expected final extension; used to detect when the file was
 392                        already downloaded and converted
 393     fixup:             Automatically correct known faults of the file.
 394                        One of:
 395                        - "never": do nothing
 396                        - "warn": only emit a warning
 397                        - "detect_or_warn": check whether we can do anything
 398                                            about it, warn otherwise (default)
 399     source_address:    Client-side IP address to bind to.
 400     call_home:         Boolean, true iff we are allowed to contact the
 401                        yt-dlp servers for debugging. (BROKEN)
 402     sleep_interval_requests: Number of seconds to sleep between requests
 403                        during extraction
 404     sleep_interval:    Number of seconds to sleep before each download when
 405                        used alone or a lower bound of a range for randomized
 406                        sleep before each download (minimum possible number
 407                        of seconds to sleep) when used along with
 408                        max_sleep_interval.
 409     max_sleep_interval:Upper bound of a range for randomized sleep before each
 410                        download (maximum possible number of seconds to sleep).
 411                        Must only be used along with sleep_interval.
 412                        Actual sleep time will be a random float from range
 413                        [sleep_interval; max_sleep_interval].
 414     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 415     listformats:       Print an overview of available video formats and exit.
 416     list_thumbnails:   Print a table of all thumbnails and exit.
 417     match_filter:      A function that gets called with the info_dict of
 418                        every video.
 419                        If it returns a message, the video is ignored.
 420                        If it returns None, the video is downloaded.
 421                        match_filter_func in utils.py is one example for this.
 422     no_color:          Do not emit color codes in output.
 423     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 424                        HTTP header
 425     geo_bypass_country:
 426                        Two-letter ISO 3166-2 country code that will be used for
 427                        explicit geographic restriction bypassing via faking
 428                        X-Forwarded-For HTTP header
 429     geo_bypass_ip_block:
 430                        IP range in CIDR notation that will be used similarly to
 431                        geo_bypass_country
 432
 433     The following options determine which downloader is picked:
 434     external_downloader: A dictionary of protocol keys and the executable of the
 435                        external downloader to use for it. The allowed protocols
 436                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 437                        Set the value to 'native' to use the native downloader
 438     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 439                        or {'m3u8': 'ffmpeg'} instead.
 440                        Use the native HLS downloader instead of ffmpeg/avconv
 441                        if True, otherwise use ffmpeg/avconv if False, otherwise
 442                        use downloader suggested by extractor if None.
 443     compat_opts:       Compatibility options. See "Differences in default behavior".
 444                        The following options do not work when used through the API:
 445                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 446                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 447                        Refer __init__.py for their implementation
 448     progress_template: Dictionary of templates for progress outputs.
 449                        Allowed keys are 'download', 'postprocess',
 450                        'download-title' (console title) and 'postprocess-title'.
 451                        The template is mapped on a dictionary with keys 'progress' and 'info'
 452
 453     The following parameters are not used by YoutubeDL itself, they are used by
 454     the downloader (see yt_dlp/downloader/common.py):
 455     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 456     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 457     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 458     external_downloader_args, concurrent_fragment_downloads.
 459
 460     The following options are used by the post processors:
 461     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 462                        otherwise prefer ffmpeg. (avconv support is deprecated)
 463     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 464                        to the binary or its containing directory.
 465     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 466                        and a list of additional command-line arguments for the
 467                        postprocessor/executable. The dict can also have "PP+EXE" keys
 468                        which are used when the given exe is used by the given PP.
 469                        Use 'default' as the name for arguments to passed to all PP
 470                        For compatibility with youtube-dl, a single list of args
 471                        can also be used
 472
 473     The following options are used by the extractors:
 474     extractor_retries: Number of times to retry for known errors
 475     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 476     hls_split_discontinuity: Split HLS playlists to different formats at
 477                        discontinuities such as ad breaks (default: False)
 478     extractor_args:    A dictionary of arguments to be passed to the extractors.
 479                        See "EXTRACTOR ARGUMENTS" for details.
 480                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 481     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 482                        If True (default), DASH manifests and related
 483                        data will be downloaded and processed by extractor.
 484                        You can reduce network I/O by disabling it if you don't
 485                        care about DASH. (only for youtube)
 486     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 487                        If True (default), HLS manifests and related
 488                        data will be downloaded and processed by extractor.
 489                        You can reduce network I/O by disabling it if you don't
 490                        care about HLS. (only for youtube)
 491     """
 492
 493     _NUMERIC_FIELDS = set((
 494         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 495         'timestamp', 'release_timestamp',
 496         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 497         'average_rating', 'comment_count', 'age_limit',
 498         'start_time', 'end_time',
 499         'chapter_number', 'season_number', 'episode_number',
 500         'track_number', 'disc_number', 'release_year',
 501     ))
 502
 503     _format_selection_exts = {
 504         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 505         'video': {'mp4', 'flv', 'webm', '3gp'},
 506         'storyboards': {'mhtml'},
 507     }
 508
 509     params = None
 510     _ies = {}
 511     _pps = {k: [] for k in POSTPROCESS_WHEN}
 512     _printed_messages = set()
 513     _first_webpage_request = True
 514     _download_retcode = None
 515     _num_downloads = None
 516     _playlist_level = 0
 517     _playlist_urls = set()
 518     _screen_file = None
 519
 520     def __init__(self, params=None, auto_init=True):
 521         """Create a FileDownloader object with the given options.
 522         @param auto_init    Whether to load the default extractors and print header (if verbose).
 523                             Set to 'no_verbose_header' to not print the header
 524         """
 525         if params is None:
 526             params = {}
 527         self._ies = {}
 528         self._ies_instances = {}
 529         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 530         self._printed_messages = set()
 531         self._first_webpage_request = True
 532         self._post_hooks = []
 533         self._progress_hooks = []
 534         self._postprocessor_hooks = []
 535         self._download_retcode = 0
 536         self._num_downloads = 0
 537         self._num_videos = 0
 538         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 539         self._err_file = sys.stderr
 540         self.params = params
 541         self.cache = Cache(self)
 542
 543         windows_enable_vt_mode()
 544         self._allow_colors = {
 545             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 546             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 547         }
 548
 549         if sys.version_info < (3, 6):
 550             self.report_warning(
 551                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 552
 553         if self.params.get('allow_unplayable_formats'):
 554             self.report_warning(
 555                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 556                 'This is a developer option intended for debugging. \n'
 557                 '         If you experience any issues while using this option, '
 558                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 559
 560         def check_deprecated(param, option, suggestion):
 561             if self.params.get(param) is not None:
 562                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 563                 return True
 564             return False
 565
 566         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 567             if self.params.get('geo_verification_proxy') is None:
 568                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 569
 570         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 571         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 572         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 573
 574         for msg in self.params.get('_warnings', []):
 575             self.report_warning(msg)
 576         for msg in self.params.get('_deprecation_warnings', []):
 577             self.deprecation_warning(msg)
 578
 579         if 'list-formats' in self.params.get('compat_opts', []):
 580             self.params['listformats_table'] = False
 581
 582         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 583             # nooverwrites was unnecessarily changed to overwrites
 584             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 585             # This ensures compatibility with both keys
 586             self.params['overwrites'] = not self.params['nooverwrites']
 587         elif self.params.get('overwrites') is None:
 588             self.params.pop('overwrites', None)
 589         else:
 590             self.params['nooverwrites'] = not self.params['overwrites']
 591
 592         # Compatibility with older syntax
 593         params.setdefault('forceprint', {})
 594         if not isinstance(params['forceprint'], dict):
 595             params['forceprint'] = {'video': params['forceprint']}
 596
 597         if params.get('bidi_workaround', False):
 598             try:
 599                 import pty
 600                 master, slave = pty.openpty()
 601                 width = compat_get_terminal_size().columns
 602                 if width is None:
 603                     width_args = []
 604                 else:
 605                     width_args = ['-w', str(width)]
 606                 sp_kwargs = dict(
 607                     stdin=subprocess.PIPE,
 608                     stdout=slave,
 609                     stderr=self._err_file)
 610                 try:
 611                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 612                 except OSError:
 613                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 614                 self._output_channel = os.fdopen(master, 'rb')
 615             except OSError as ose:
 616                 if ose.errno == errno.ENOENT:
 617                     self.report_warning(
 618                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 619                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 620                 else:
 621                     raise
 622
 623         if (sys.platform != 'win32'
 624                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 625                 and not params.get('restrictfilenames', False)):
 626             # Unicode filesystem API will throw errors (#1474, #13027)
 627             self.report_warning(
 628                 'Assuming --restrict-filenames since file system encoding '
 629                 'cannot encode all characters. '
 630                 'Set the LC_ALL environment variable to fix this.')
 631             self.params['restrictfilenames'] = True
 632
 633         self.outtmpl_dict = self.parse_outtmpl()
 634
 635         # Creating format selector here allows us to catch syntax errors before the extraction
 636         self.format_selector = (
 637             self.params.get('format') if self.params.get('format') in (None, '-')
 638             else self.params['format'] if callable(self.params['format'])
 639             else self.build_format_selector(self.params['format']))
 640
 641         self._setup_opener()
 642
 643         if auto_init:
 644             if auto_init != 'no_verbose_header':
 645                 self.print_debug_header()
 646             self.add_default_info_extractors()
 647
 648         hooks = {
 649             'post_hooks': self.add_post_hook,
 650             'progress_hooks': self.add_progress_hook,
 651             'postprocessor_hooks': self.add_postprocessor_hook,
 652         }
 653         for opt, fn in hooks.items():
 654             for ph in self.params.get(opt, []):
 655                 fn(ph)
 656
 657         for pp_def_raw in self.params.get('postprocessors', []):
 658             pp_def = dict(pp_def_raw)
 659             when = pp_def.pop('when', 'post_process')
 660             self.add_post_processor(
 661                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 662                 when=when)
 663
 664         register_socks_protocols()
 665
 666         def preload_download_archive(fn):
 667             """Preload the archive, if any is specified"""
 668             if fn is None:
 669                 return False
 670             self.write_debug(f'Loading archive file {fn!r}')
 671             try:
 672                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 673                     for line in archive_file:
 674                         self.archive.add(line.strip())
 675             except IOError as ioe:
 676                 if ioe.errno != errno.ENOENT:
 677                     raise
 678                 return False
 679             return True
 680
 681         self.archive = set()
 682         preload_download_archive(self.params.get('download_archive'))
 683
 684     def warn_if_short_id(self, argv):
 685         # short YouTube ID starting with dash?
 686         idxs = [
 687             i for i, a in enumerate(argv)
 688             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 689         if idxs:
 690             correct_argv = (
 691                 ['yt-dlp']
 692                 + [a for i, a in enumerate(argv) if i not in idxs]
 693                 + ['--'] + [argv[i] for i in idxs]
 694             )
 695             self.report_warning(
 696                 'Long argument string detected. '
 697                 'Use -- to separate parameters and URLs, like this:\n%s' %
 698                 args_to_str(correct_argv))
 699
 700     def add_info_extractor(self, ie):
 701         """Add an InfoExtractor object to the end of the list."""
 702         ie_key = ie.ie_key()
 703         self._ies[ie_key] = ie
 704         if not isinstance(ie, type):
 705             self._ies_instances[ie_key] = ie
 706             ie.set_downloader(self)
 707
 708     def _get_info_extractor_class(self, ie_key):
 709         ie = self._ies.get(ie_key)
 710         if ie is None:
 711             ie = get_info_extractor(ie_key)
 712             self.add_info_extractor(ie)
 713         return ie
 714
 715     def get_info_extractor(self, ie_key):
 716         """
 717         Get an instance of an IE with name ie_key, it will try to get one from
 718         the _ies list, if there's no instance it will create a new one and add
 719         it to the extractor list.
 720         """
 721         ie = self._ies_instances.get(ie_key)
 722         if ie is None:
 723             ie = get_info_extractor(ie_key)()
 724             self.add_info_extractor(ie)
 725         return ie
 726
 727     def add_default_info_extractors(self):
 728         """
 729         Add the InfoExtractors returned by gen_extractors to the end of the list
 730         """
 731         for ie in gen_extractor_classes():
 732             self.add_info_extractor(ie)
 733
 734     def add_post_processor(self, pp, when='post_process'):
 735         """Add a PostProcessor object to the end of the chain."""
 736         self._pps[when].append(pp)
 737         pp.set_downloader(self)
 738
 739     def add_post_hook(self, ph):
 740         """Add the post hook"""
 741         self._post_hooks.append(ph)
 742
 743     def add_progress_hook(self, ph):
 744         """Add the download progress hook"""
 745         self._progress_hooks.append(ph)
 746
 747     def add_postprocessor_hook(self, ph):
 748         """Add the postprocessing progress hook"""
 749         self._postprocessor_hooks.append(ph)
 750         for pps in self._pps.values():
 751             for pp in pps:
 752                 pp.add_progress_hook(ph)
 753
 754     def _bidi_workaround(self, message):
 755         if not hasattr(self, '_output_channel'):
 756             return message
 757
 758         assert hasattr(self, '_output_process')
 759         assert isinstance(message, compat_str)
 760         line_count = message.count('\n') + 1
 761         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 762         self._output_process.stdin.flush()
 763         res = ''.join(self._output_channel.readline().decode('utf-8')
 764                       for _ in range(line_count))
 765         return res[:-len('\n')]
 766
 767     def _write_string(self, message, out=None, only_once=False):
 768         if only_once:
 769             if message in self._printed_messages:
 770                 return
 771             self._printed_messages.add(message)
 772         write_string(message, out=out, encoding=self.params.get('encoding'))
 773
 774     def to_stdout(self, message, skip_eol=False, quiet=False):
 775         """Print message to stdout"""
 776         if self.params.get('logger'):
 777             self.params['logger'].debug(message)
 778         elif not quiet or self.params.get('verbose'):
 779             self._write_string(
 780                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 781                 self._err_file if quiet else self._screen_file)
 782
 783     def to_stderr(self, message, only_once=False):
 784         """Print message to stderr"""
 785         assert isinstance(message, compat_str)
 786         if self.params.get('logger'):
 787             self.params['logger'].error(message)
 788         else:
 789             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 790
 791     def to_console_title(self, message):
 792         if not self.params.get('consoletitle', False):
 793             return
 794         message = remove_terminal_sequences(message)
 795         if compat_os_name == 'nt':
 796             if ctypes.windll.kernel32.GetConsoleWindow():
 797                 # c_wchar_p() might not be necessary if `message` is
 798                 # already of type unicode()
 799                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 800         elif 'TERM' in os.environ:
 801             self._write_string('\033]0;%s\007' % message, self._screen_file)
 802
 803     def save_console_title(self):
 804         if not self.params.get('consoletitle', False):
 805             return
 806         if self.params.get('simulate'):
 807             return
 808         if compat_os_name != 'nt' and 'TERM' in os.environ:
 809             # Save the title on stack
 810             self._write_string('\033[22;0t', self._screen_file)
 811
 812     def restore_console_title(self):
 813         if not self.params.get('consoletitle', False):
 814             return
 815         if self.params.get('simulate'):
 816             return
 817         if compat_os_name != 'nt' and 'TERM' in os.environ:
 818             # Restore the title from stack
 819             self._write_string('\033[23;0t', self._screen_file)
 820
 821     def __enter__(self):
 822         self.save_console_title()
 823         return self
 824
 825     def __exit__(self, *args):
 826         self.restore_console_title()
 827
 828         if self.params.get('cookiefile') is not None:
 829             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 830
 831     def trouble(self, message=None, tb=None, is_error=True):
 832         """Determine action to take when a download problem appears.
 833
 834         Depending on if the downloader has been configured to ignore
 835         download errors or not, this method may throw an exception or
 836         not when errors are found, after printing the message.
 837
 838         @param tb          If given, is additional traceback information
 839         @param is_error    Whether to raise error according to ignorerrors
 840         """
 841         if message is not None:
 842             self.to_stderr(message)
 843         if self.params.get('verbose'):
 844             if tb is None:
 845                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 846                     tb = ''
 847                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 848                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 849                     tb += encode_compat_str(traceback.format_exc())
 850                 else:
 851                     tb_data = traceback.format_list(traceback.extract_stack())
 852                     tb = ''.join(tb_data)
 853             if tb:
 854                 self.to_stderr(tb)
 855         if not is_error:
 856             return
 857         if not self.params.get('ignoreerrors'):
 858             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 859                 exc_info = sys.exc_info()[1].exc_info
 860             else:
 861                 exc_info = sys.exc_info()
 862             raise DownloadError(message, exc_info)
 863         self._download_retcode = 1
 864
 865     def to_screen(self, message, skip_eol=False):
 866         """Print message to stdout if not in quiet mode"""
 867         self.to_stdout(
 868             message, skip_eol, quiet=self.params.get('quiet', False))
 869
 870     class Styles(Enum):
 871         HEADERS = 'yellow'
 872         EMPHASIS = 'light blue'
 873         ID = 'green'
 874         DELIM = 'blue'
 875         ERROR = 'red'
 876         WARNING = 'yellow'
 877         SUPPRESS = 'light black'
 878
 879     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 880         if test_encoding:
 881             original_text = text
 882             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 883             text = text.encode(encoding, 'ignore').decode(encoding)
 884             if fallback is not None and text != original_text:
 885                 text = fallback
 886         if isinstance(f, self.Styles):
 887             f = f.value
 888         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 889
 890     def _format_screen(self, *args, **kwargs):
 891         return self._format_text(
 892             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 893
 894     def _format_err(self, *args, **kwargs):
 895         return self._format_text(
 896             self._err_file, self._allow_colors['err'], *args, **kwargs)
 897
 898     def report_warning(self, message, only_once=False):
 899         '''
 900         Print the message to stderr, it will be prefixed with 'WARNING:'
 901         If stderr is a tty file the 'WARNING:' will be colored
 902         '''
 903         if self.params.get('logger') is not None:
 904             self.params['logger'].warning(message)
 905         else:
 906             if self.params.get('no_warnings'):
 907                 return
 908             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 909
 910     def deprecation_warning(self, message):
 911         if self.params.get('logger') is not None:
 912             self.params['logger'].warning('DeprecationWarning: {message}')
 913         else:
 914             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 915
 916     def report_error(self, message, *args, **kwargs):
 917         '''
 918         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 919         in red if stderr is a tty file.
 920         '''
 921         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 922
 923     def write_debug(self, message, only_once=False):
 924         '''Log debug message or Print message to stderr'''
 925         if not self.params.get('verbose', False):
 926             return
 927         message = '[debug] %s' % message
 928         if self.params.get('logger'):
 929             self.params['logger'].debug(message)
 930         else:
 931             self.to_stderr(message, only_once)
 932
 933     def report_file_already_downloaded(self, file_name):
 934         """Report file has already been fully downloaded."""
 935         try:
 936             self.to_screen('[download] %s has already been downloaded' % file_name)
 937         except UnicodeEncodeError:
 938             self.to_screen('[download] The file has already been downloaded')
 939
 940     def report_file_delete(self, file_name):
 941         """Report that existing file will be deleted."""
 942         try:
 943             self.to_screen('Deleting existing file %s' % file_name)
 944         except UnicodeEncodeError:
 945             self.to_screen('Deleting existing file')
 946
 947     def raise_no_formats(self, info, forced=False):
 948         has_drm = info.get('__has_drm')
 949         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 950         expected = self.params.get('ignore_no_formats_error')
 951         if forced or not expected:
 952             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 953                                  expected=has_drm or expected)
 954         else:
 955             self.report_warning(msg)
 956
 957     def parse_outtmpl(self):
 958         outtmpl_dict = self.params.get('outtmpl', {})
 959         if not isinstance(outtmpl_dict, dict):
 960             outtmpl_dict = {'default': outtmpl_dict}
 961         # Remove spaces in the default template
 962         if self.params.get('restrictfilenames'):
 963             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 964         else:
 965             sanitize = lambda x: x
 966         outtmpl_dict.update({
 967             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 968             if outtmpl_dict.get(k) is None})
 969         for key, val in outtmpl_dict.items():
 970             if isinstance(val, bytes):
 971                 self.report_warning(
 972                     'Parameter outtmpl is bytes, but should be a unicode string. '
 973                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 974         return outtmpl_dict
 975
 976     def get_output_path(self, dir_type='', filename=None):
 977         paths = self.params.get('paths', {})
 978         assert isinstance(paths, dict)
 979         path = os.path.join(
 980             expand_path(paths.get('home', '').strip()),
 981             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 982             filename or '')
 983
 984         # Temporary fix for #4787
 985         # 'Treat' all problem characters by passing filename through preferredencoding
 986         # to workaround encoding issues with subprocess on python2 @ Windows
 987         if sys.version_info < (3, 0) and sys.platform == 'win32':
 988             path = encodeFilename(path, True).decode(preferredencoding())
 989         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 990
 991     @staticmethod
 992     def _outtmpl_expandpath(outtmpl):
 993         # expand_path translates '%%' into '%' and '$$' into '$'
 994         # correspondingly that is not what we want since we need to keep
 995         # '%%' intact for template dict substitution step. Working around
 996         # with boundary-alike separator hack.
 997         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 998         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 999
1000         # outtmpl should be expand_path'ed before template dict substitution
1001         # because meta fields may contain env variables we don't want to
1002         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1003         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1004         return expand_path(outtmpl).replace(sep, '')
1005
1006     @staticmethod
1007     def escape_outtmpl(outtmpl):
1008         ''' Escape any remaining strings like %s, %abc% etc. '''
1009         return re.sub(
1010             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1011             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1012             outtmpl)
1013
1014     @classmethod
1015     def validate_outtmpl(cls, outtmpl):
1016         ''' @return None or Exception object '''
1017         outtmpl = re.sub(
1018             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1019             lambda mobj: f'{mobj.group(0)[:-1]}s',
1020             cls._outtmpl_expandpath(outtmpl))
1021         try:
1022             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1023             return None
1024         except ValueError as err:
1025             return err
1026
1027     @staticmethod
1028     def _copy_infodict(info_dict):
1029         info_dict = dict(info_dict)
1030         for key in ('__original_infodict', '__postprocessors'):
1031             info_dict.pop(key, None)
1032         return info_dict
1033
1034     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1035         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1036         @param sanitize    Whether to sanitize the output as a filename.
1037                            For backward compatibility, a function can also be passed
1038         """
1039
1040         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1041
1042         info_dict = self._copy_infodict(info_dict)
1043         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1044             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1045             if info_dict.get('duration', None) is not None
1046             else None)
1047         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1048         info_dict['video_autonumber'] = self._num_videos
1049         if info_dict.get('resolution') is None:
1050             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1051
1052         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1053         # of %(field)s to %(field)0Nd for backward compatibility
1054         field_size_compat_map = {
1055             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1056             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1057             'autonumber': self.params.get('autonumber_size') or 5,
1058         }
1059
1060         TMPL_DICT = {}
1061         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1062         MATH_FUNCTIONS = {
1063             '+': float.__add__,
1064             '-': float.__sub__,
1065         }
1066         # Field is of the form key1.key2...
1067         # where keys (except first) can be string, int or slice
1068         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1069         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1070         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1071         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1072             (?P<negate>-)?
1073             (?P<fields>{field})
1074             (?P<maths>(?:{math_op}{math_field})*)
1075             (?:>(?P<strf_format>.+?))?
1076             (?P<alternate>(?<!\\),[^|&)]+)?
1077             (?:&(?P<replacement>.*?))?
1078             (?:\|(?P<default>.*?))?
1079             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1080
1081         def _traverse_infodict(k):
1082             k = k.split('.')
1083             if k[0] == '':
1084                 k.pop(0)
1085             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1086
1087         def get_value(mdict):
1088             # Object traversal
1089             value = _traverse_infodict(mdict['fields'])
1090             # Negative
1091             if mdict['negate']:
1092                 value = float_or_none(value)
1093                 if value is not None:
1094                     value *= -1
1095             # Do maths
1096             offset_key = mdict['maths']
1097             if offset_key:
1098                 value = float_or_none(value)
1099                 operator = None
1100                 while offset_key:
1101                     item = re.match(
1102                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1103                         offset_key).group(0)
1104                     offset_key = offset_key[len(item):]
1105                     if operator is None:
1106                         operator = MATH_FUNCTIONS[item]
1107                         continue
1108                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1109                     offset = float_or_none(item)
1110                     if offset is None:
1111                         offset = float_or_none(_traverse_infodict(item))
1112                     try:
1113                         value = operator(value, multiplier * offset)
1114                     except (TypeError, ZeroDivisionError):
1115                         return None
1116                     operator = None
1117             # Datetime formatting
1118             if mdict['strf_format']:
1119                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1120
1121             return value
1122
1123         na = self.params.get('outtmpl_na_placeholder', 'NA')
1124
1125         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1126             return sanitize_filename(str(value), restricted=restricted,
1127                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1128
1129         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1130         sanitize = bool(sanitize)
1131
1132         def _dumpjson_default(obj):
1133             if isinstance(obj, (set, LazyList)):
1134                 return list(obj)
1135             return repr(obj)
1136
1137         def create_key(outer_mobj):
1138             if not outer_mobj.group('has_key'):
1139                 return outer_mobj.group(0)
1140             key = outer_mobj.group('key')
1141             mobj = re.match(INTERNAL_FORMAT_RE, key)
1142             initial_field = mobj.group('fields') if mobj else ''
1143             value, replacement, default = None, None, na
1144             while mobj:
1145                 mobj = mobj.groupdict()
1146                 default = mobj['default'] if mobj['default'] is not None else default
1147                 value = get_value(mobj)
1148                 replacement = mobj['replacement']
1149                 if value is None and mobj['alternate']:
1150                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1151                 else:
1152                     break
1153
1154             fmt = outer_mobj.group('format')
1155             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1156                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1157
1158             value = default if value is None else value if replacement is None else replacement
1159
1160             flags = outer_mobj.group('conversion') or ''
1161             str_fmt = f'{fmt[:-1]}s'
1162             if fmt[-1] == 'l':  # list
1163                 delim = '\n' if '#' in flags else ', '
1164                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1165             elif fmt[-1] == 'j':  # json
1166                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1167             elif fmt[-1] == 'q':  # quoted
1168                 value = map(str, variadic(value) if '#' in flags else [value])
1169                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1170             elif fmt[-1] == 'B':  # bytes
1171                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1172                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1173             elif fmt[-1] == 'U':  # unicode normalized
1174                 value, fmt = unicodedata.normalize(
1175                     # "+" = compatibility equivalence, "#" = NFD
1176                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1177                     value), str_fmt
1178             elif fmt[-1] == 'D':  # decimal suffix
1179                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1180                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1181                                               factor=1024 if '#' in flags else 1000)
1182             elif fmt[-1] == 'S':  # filename sanitization
1183                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1184             elif fmt[-1] == 'c':
1185                 if value:
1186                     value = str(value)[0]
1187                 else:
1188                     fmt = str_fmt
1189             elif fmt[-1] not in 'rs':  # numeric
1190                 value = float_or_none(value)
1191                 if value is None:
1192                     value, fmt = default, 's'
1193
1194             if sanitize:
1195                 if fmt[-1] == 'r':
1196                     # If value is an object, sanitize might convert it to a string
1197                     # So we convert it to repr first
1198                     value, fmt = repr(value), str_fmt
1199                 if fmt[-1] in 'csr':
1200                     value = sanitizer(initial_field, value)
1201
1202             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1203             TMPL_DICT[key] = value
1204             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1205
1206         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1207
1208     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1209         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1210         return self.escape_outtmpl(outtmpl) % info_dict
1211
1212     def _prepare_filename(self, info_dict, tmpl_type='default'):
1213         try:
1214             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1215             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1216
1217             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1218             if filename and force_ext is not None:
1219                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1220
1221             # https://github.com/blackjack4494/youtube-dlc/issues/85
1222             trim_file_name = self.params.get('trim_file_name', False)
1223             if trim_file_name:
1224                 no_ext, *ext = filename.rsplit('.', 2)
1225                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1226
1227             return filename
1228         except ValueError as err:
1229             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1230             return None
1231
1232     def prepare_filename(self, info_dict, dir_type='', warn=False):
1233         """Generate the output filename."""
1234
1235         filename = self._prepare_filename(info_dict, dir_type or 'default')
1236         if not filename and dir_type not in ('', 'temp'):
1237             return ''
1238
1239         if warn:
1240             if not self.params.get('paths'):
1241                 pass
1242             elif filename == '-':
1243                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1244             elif os.path.isabs(filename):
1245                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1246         if filename == '-' or not filename:
1247             return filename
1248
1249         return self.get_output_path(dir_type, filename)
1250
1251     def _match_entry(self, info_dict, incomplete=False, silent=False):
1252         """ Returns None if the file should be downloaded """
1253
1254         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1255
1256         def check_filter():
1257             if 'title' in info_dict:
1258                 # This can happen when we're just evaluating the playlist
1259                 title = info_dict['title']
1260                 matchtitle = self.params.get('matchtitle', False)
1261                 if matchtitle:
1262                     if not re.search(matchtitle, title, re.IGNORECASE):
1263                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1264                 rejecttitle = self.params.get('rejecttitle', False)
1265                 if rejecttitle:
1266                     if re.search(rejecttitle, title, re.IGNORECASE):
1267                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1268             date = info_dict.get('upload_date')
1269             if date is not None:
1270                 dateRange = self.params.get('daterange', DateRange())
1271                 if date not in dateRange:
1272                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1273             view_count = info_dict.get('view_count')
1274             if view_count is not None:
1275                 min_views = self.params.get('min_views')
1276                 if min_views is not None and view_count < min_views:
1277                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1278                 max_views = self.params.get('max_views')
1279                 if max_views is not None and view_count > max_views:
1280                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1281             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1282                 return 'Skipping "%s" because it is age restricted' % video_title
1283
1284             match_filter = self.params.get('match_filter')
1285             if match_filter is not None:
1286                 try:
1287                     ret = match_filter(info_dict, incomplete=incomplete)
1288                 except TypeError:
1289                     # For backward compatibility
1290                     ret = None if incomplete else match_filter(info_dict)
1291                 if ret is not None:
1292                     return ret
1293             return None
1294
1295         if self.in_download_archive(info_dict):
1296             reason = '%s has already been recorded in the archive' % video_title
1297             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1298         else:
1299             reason = check_filter()
1300             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1301         if reason is not None:
1302             if not silent:
1303                 self.to_screen('[download] ' + reason)
1304             if self.params.get(break_opt, False):
1305                 raise break_err()
1306         return reason
1307
1308     @staticmethod
1309     def add_extra_info(info_dict, extra_info):
1310         '''Set the keys from extra_info in info dict if they are missing'''
1311         for key, value in extra_info.items():
1312             info_dict.setdefault(key, value)
1313
1314     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1315                      process=True, force_generic_extractor=False):
1316         """
1317         Return a list with a dictionary for each video extracted.
1318
1319         Arguments:
1320         url -- URL to extract
1321
1322         Keyword arguments:
1323         download -- whether to download videos during extraction
1324         ie_key -- extractor key hint
1325         extra_info -- dictionary containing the extra values to add to each result
1326         process -- whether to resolve all unresolved references (URLs, playlist items),
1327             must be True for download to work.
1328         force_generic_extractor -- force using the generic extractor
1329         """
1330
1331         if extra_info is None:
1332             extra_info = {}
1333
1334         if not ie_key and force_generic_extractor:
1335             ie_key = 'Generic'
1336
1337         if ie_key:
1338             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1339         else:
1340             ies = self._ies
1341
1342         for ie_key, ie in ies.items():
1343             if not ie.suitable(url):
1344                 continue
1345
1346             if not ie.working():
1347                 self.report_warning('The program functionality for this site has been marked as broken, '
1348                                     'and will probably not work.')
1349
1350             temp_id = ie.get_temp_id(url)
1351             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1352                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1353                 if self.params.get('break_on_existing', False):
1354                     raise ExistingVideoReached()
1355                 break
1356             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1357         else:
1358             self.report_error('no suitable InfoExtractor for URL %s' % url)
1359
1360     def __handle_extraction_exceptions(func):
1361         @functools.wraps(func)
1362         def wrapper(self, *args, **kwargs):
1363             while True:
1364                 try:
1365                     return func(self, *args, **kwargs)
1366                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1367                     raise
1368                 except ReExtractInfo as e:
1369                     if e.expected:
1370                         self.to_screen(f'{e}; Re-extracting data')
1371                     else:
1372                         self.to_stderr('\r')
1373                         self.report_warning(f'{e}; Re-extracting data')
1374                     continue
1375                 except GeoRestrictedError as e:
1376                     msg = e.msg
1377                     if e.countries:
1378                         msg += '\nThis video is available in %s.' % ', '.join(
1379                             map(ISO3166Utils.short2full, e.countries))
1380                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1381                     self.report_error(msg)
1382                 except ExtractorError as e:  # An error we somewhat expected
1383                     self.report_error(str(e), e.format_traceback())
1384                 except Exception as e:
1385                     if self.params.get('ignoreerrors'):
1386                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1387                     else:
1388                         raise
1389                 break
1390         return wrapper
1391
1392     def _wait_for_video(self, ie_result):
1393         if (not self.params.get('wait_for_video')
1394                 or ie_result.get('_type', 'video') != 'video'
1395                 or ie_result.get('formats') or ie_result.get('url')):
1396             return
1397
1398         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1399         last_msg = ''
1400
1401         def progress(msg):
1402             nonlocal last_msg
1403             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1404             last_msg = msg
1405
1406         min_wait, max_wait = self.params.get('wait_for_video')
1407         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1408         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1409             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1410             self.report_warning('Release time of video is not known')
1411         elif (diff or 0) <= 0:
1412             self.report_warning('Video should already be available according to extracted info')
1413         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1414         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1415
1416         wait_till = time.time() + diff
1417         try:
1418             while True:
1419                 diff = wait_till - time.time()
1420                 if diff <= 0:
1421                     progress('')
1422                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1423                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1424                 time.sleep(1)
1425         except KeyboardInterrupt:
1426             progress('')
1427             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1428         except BaseException as e:
1429             if not isinstance(e, ReExtractInfo):
1430                 self.to_screen('')
1431             raise
1432
1433     @__handle_extraction_exceptions
1434     def __extract_info(self, url, ie, download, extra_info, process):
1435         ie_result = ie.extract(url)
1436         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1437             return
1438         if isinstance(ie_result, list):
1439             # Backwards compatibility: old IE result format
1440             ie_result = {
1441                 '_type': 'compat_list',
1442                 'entries': ie_result,
1443             }
1444         if extra_info.get('original_url'):
1445             ie_result.setdefault('original_url', extra_info['original_url'])
1446         self.add_default_extra_info(ie_result, ie, url)
1447         if process:
1448             self._wait_for_video(ie_result)
1449             return self.process_ie_result(ie_result, download, extra_info)
1450         else:
1451             return ie_result
1452
1453     def add_default_extra_info(self, ie_result, ie, url):
1454         if url is not None:
1455             self.add_extra_info(ie_result, {
1456                 'webpage_url': url,
1457                 'original_url': url,
1458                 'webpage_url_basename': url_basename(url),
1459                 'webpage_url_domain': get_domain(url),
1460             })
1461         if ie is not None:
1462             self.add_extra_info(ie_result, {
1463                 'extractor': ie.IE_NAME,
1464                 'extractor_key': ie.ie_key(),
1465             })
1466
1467     def process_ie_result(self, ie_result, download=True, extra_info=None):
1468         """
1469         Take the result of the ie(may be modified) and resolve all unresolved
1470         references (URLs, playlist items).
1471
1472         It will also download the videos if 'download'.
1473         Returns the resolved ie_result.
1474         """
1475         if extra_info is None:
1476             extra_info = {}
1477         result_type = ie_result.get('_type', 'video')
1478
1479         if result_type in ('url', 'url_transparent'):
1480             ie_result['url'] = sanitize_url(ie_result['url'])
1481             if ie_result.get('original_url'):
1482                 extra_info.setdefault('original_url', ie_result['original_url'])
1483
1484             extract_flat = self.params.get('extract_flat', False)
1485             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1486                     or extract_flat is True):
1487                 info_copy = ie_result.copy()
1488                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1489                 if ie and not ie_result.get('id'):
1490                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1491                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1492                 self.add_extra_info(info_copy, extra_info)
1493                 info_copy, _ = self.pre_process(info_copy)
1494                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1495                 if self.params.get('force_write_download_archive', False):
1496                     self.record_download_archive(info_copy)
1497                 return ie_result
1498
1499         if result_type == 'video':
1500             self.add_extra_info(ie_result, extra_info)
1501             ie_result = self.process_video_result(ie_result, download=download)
1502             additional_urls = (ie_result or {}).get('additional_urls')
1503             if additional_urls:
1504                 # TODO: Improve MetadataParserPP to allow setting a list
1505                 if isinstance(additional_urls, compat_str):
1506                     additional_urls = [additional_urls]
1507                 self.to_screen(
1508                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1509                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1510                 ie_result['additional_entries'] = [
1511                     self.extract_info(
1512                         url, download, extra_info=extra_info,
1513                         force_generic_extractor=self.params.get('force_generic_extractor'))
1514                     for url in additional_urls
1515                 ]
1516             return ie_result
1517         elif result_type == 'url':
1518             # We have to add extra_info to the results because it may be
1519             # contained in a playlist
1520             return self.extract_info(
1521                 ie_result['url'], download,
1522                 ie_key=ie_result.get('ie_key'),
1523                 extra_info=extra_info)
1524         elif result_type == 'url_transparent':
1525             # Use the information from the embedding page
1526             info = self.extract_info(
1527                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1528                 extra_info=extra_info, download=False, process=False)
1529
1530             # extract_info may return None when ignoreerrors is enabled and
1531             # extraction failed with an error, don't crash and return early
1532             # in this case
1533             if not info:
1534                 return info
1535
1536             force_properties = dict(
1537                 (k, v) for k, v in ie_result.items() if v is not None)
1538             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1539                 if f in force_properties:
1540                     del force_properties[f]
1541             new_result = info.copy()
1542             new_result.update(force_properties)
1543
1544             # Extracted info may not be a video result (i.e.
1545             # info.get('_type', 'video') != video) but rather an url or
1546             # url_transparent. In such cases outer metadata (from ie_result)
1547             # should be propagated to inner one (info). For this to happen
1548             # _type of info should be overridden with url_transparent. This
1549             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1550             if new_result.get('_type') == 'url':
1551                 new_result['_type'] = 'url_transparent'
1552
1553             return self.process_ie_result(
1554                 new_result, download=download, extra_info=extra_info)
1555         elif result_type in ('playlist', 'multi_video'):
1556             # Protect from infinite recursion due to recursively nested playlists
1557             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1558             webpage_url = ie_result['webpage_url']
1559             if webpage_url in self._playlist_urls:
1560                 self.to_screen(
1561                     '[download] Skipping already downloaded playlist: %s'
1562                     % ie_result.get('title') or ie_result.get('id'))
1563                 return
1564
1565             self._playlist_level += 1
1566             self._playlist_urls.add(webpage_url)
1567             self._sanitize_thumbnails(ie_result)
1568             try:
1569                 return self.__process_playlist(ie_result, download)
1570             finally:
1571                 self._playlist_level -= 1
1572                 if not self._playlist_level:
1573                     self._playlist_urls.clear()
1574         elif result_type == 'compat_list':
1575             self.report_warning(
1576                 'Extractor %s returned a compat_list result. '
1577                 'It needs to be updated.' % ie_result.get('extractor'))
1578
1579             def _fixup(r):
1580                 self.add_extra_info(r, {
1581                     'extractor': ie_result['extractor'],
1582                     'webpage_url': ie_result['webpage_url'],
1583                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1584                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1585                     'extractor_key': ie_result['extractor_key'],
1586                 })
1587                 return r
1588             ie_result['entries'] = [
1589                 self.process_ie_result(_fixup(r), download, extra_info)
1590                 for r in ie_result['entries']
1591             ]
1592             return ie_result
1593         else:
1594             raise Exception('Invalid result type: %s' % result_type)
1595
1596     def _ensure_dir_exists(self, path):
1597         return make_dir(path, self.report_error)
1598
1599     def __process_playlist(self, ie_result, download):
1600         # We process each entry in the playlist
1601         playlist = ie_result.get('title') or ie_result.get('id')
1602         self.to_screen('[download] Downloading playlist: %s' % playlist)
1603
1604         if 'entries' not in ie_result:
1605             raise EntryNotInPlaylist('There are no entries')
1606
1607         MissingEntry = object()
1608         incomplete_entries = bool(ie_result.get('requested_entries'))
1609         if incomplete_entries:
1610             def fill_missing_entries(entries, indices):
1611                 ret = [MissingEntry] * max(indices)
1612                 for i, entry in zip(indices, entries):
1613                     ret[i - 1] = entry
1614                 return ret
1615             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1616
1617         playlist_results = []
1618
1619         playliststart = self.params.get('playliststart', 1)
1620         playlistend = self.params.get('playlistend')
1621         # For backwards compatibility, interpret -1 as whole list
1622         if playlistend == -1:
1623             playlistend = None
1624
1625         playlistitems_str = self.params.get('playlist_items')
1626         playlistitems = None
1627         if playlistitems_str is not None:
1628             def iter_playlistitems(format):
1629                 for string_segment in format.split(','):
1630                     if '-' in string_segment:
1631                         start, end = string_segment.split('-')
1632                         for item in range(int(start), int(end) + 1):
1633                             yield int(item)
1634                     else:
1635                         yield int(string_segment)
1636             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1637
1638         ie_entries = ie_result['entries']
1639         if isinstance(ie_entries, list):
1640             playlist_count = len(ie_result)
1641             msg = f'Collected {playlist_count} videos; downloading %d of them'
1642             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1643
1644             def get_entry(i):
1645                 return ie_entries[i - 1]
1646         else:
1647             msg = 'Downloading %d videos'
1648             if not isinstance(ie_entries, (PagedList, LazyList)):
1649                 ie_entries = LazyList(ie_entries)
1650
1651             def get_entry(i):
1652                 return YoutubeDL.__handle_extraction_exceptions(
1653                     lambda self, i: ie_entries[i - 1]
1654                 )(self, i)
1655
1656         entries, broken = [], False
1657         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1658         for i in items:
1659             if i == 0:
1660                 continue
1661             if playlistitems is None and playlistend is not None and playlistend < i:
1662                 break
1663             entry = None
1664             try:
1665                 entry = get_entry(i)
1666                 if entry is MissingEntry:
1667                     raise EntryNotInPlaylist()
1668             except (IndexError, EntryNotInPlaylist):
1669                 if incomplete_entries:
1670                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1671                 elif not playlistitems:
1672                     break
1673             entries.append(entry)
1674             try:
1675                 if entry is not None:
1676                     self._match_entry(entry, incomplete=True, silent=True)
1677             except (ExistingVideoReached, RejectedVideoReached):
1678                 broken = True
1679                 break
1680         ie_result['entries'] = entries
1681
1682         # Save playlist_index before re-ordering
1683         entries = [
1684             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1685             for i, entry in enumerate(entries, 1)
1686             if entry is not None]
1687         n_entries = len(entries)
1688
1689         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1690             ie_result['playlist_count'] = n_entries
1691
1692         if not playlistitems and (playliststart != 1 or playlistend):
1693             playlistitems = list(range(playliststart, playliststart + n_entries))
1694         ie_result['requested_entries'] = playlistitems
1695
1696         _infojson_written = False
1697         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1698             ie_copy = {
1699                 'playlist': playlist,
1700                 'playlist_id': ie_result.get('id'),
1701                 'playlist_title': ie_result.get('title'),
1702                 'playlist_uploader': ie_result.get('uploader'),
1703                 'playlist_uploader_id': ie_result.get('uploader_id'),
1704                 'playlist_index': 0,
1705                 'n_entries': n_entries,
1706             }
1707             ie_copy.update(dict(ie_result))
1708
1709             _infojson_written = self._write_info_json(
1710                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1711             if _infojson_written is None:
1712                 return
1713             if self._write_description('playlist', ie_result,
1714                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1715                 return
1716             # TODO: This should be passed to ThumbnailsConvertor if necessary
1717             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1718
1719         if self.params.get('playlistreverse', False):
1720             entries = entries[::-1]
1721         if self.params.get('playlistrandom', False):
1722             random.shuffle(entries)
1723
1724         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1725
1726         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1727         failures = 0
1728         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1729         for i, entry_tuple in enumerate(entries, 1):
1730             playlist_index, entry = entry_tuple
1731             if 'playlist-index' in self.params.get('compat_opts', []):
1732                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1733             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1734             # This __x_forwarded_for_ip thing is a bit ugly but requires
1735             # minimal changes
1736             if x_forwarded_for:
1737                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1738             extra = {
1739                 'n_entries': n_entries,
1740                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1741                 'playlist_count': ie_result.get('playlist_count'),
1742                 'playlist_index': playlist_index,
1743                 'playlist_autonumber': i,
1744                 'playlist': playlist,
1745                 'playlist_id': ie_result.get('id'),
1746                 'playlist_title': ie_result.get('title'),
1747                 'playlist_uploader': ie_result.get('uploader'),
1748                 'playlist_uploader_id': ie_result.get('uploader_id'),
1749                 'extractor': ie_result['extractor'],
1750                 'webpage_url': ie_result['webpage_url'],
1751                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1752                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1753                 'extractor_key': ie_result['extractor_key'],
1754             }
1755
1756             if self._match_entry(entry, incomplete=True) is not None:
1757                 continue
1758
1759             entry_result = self.__process_iterable_entry(entry, download, extra)
1760             if not entry_result:
1761                 failures += 1
1762             if failures >= max_failures:
1763                 self.report_error(
1764                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1765                 break
1766             playlist_results.append(entry_result)
1767         ie_result['entries'] = playlist_results
1768
1769         # Write the updated info to json
1770         if _infojson_written and self._write_info_json(
1771                 'updated playlist', ie_result,
1772                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1773             return
1774
1775         ie_result = self.run_all_pps('playlist', ie_result)
1776         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1777         return ie_result
1778
1779     @__handle_extraction_exceptions
1780     def __process_iterable_entry(self, entry, download, extra_info):
1781         return self.process_ie_result(
1782             entry, download=download, extra_info=extra_info)
1783
1784     def _build_format_filter(self, filter_spec):
1785         " Returns a function to filter the formats according to the filter_spec "
1786
1787         OPERATORS = {
1788             '<': operator.lt,
1789             '<=': operator.le,
1790             '>': operator.gt,
1791             '>=': operator.ge,
1792             '=': operator.eq,
1793             '!=': operator.ne,
1794         }
1795         operator_rex = re.compile(r'''(?x)\s*
1796             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1797             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1798             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1799             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1800         m = operator_rex.fullmatch(filter_spec)
1801         if m:
1802             try:
1803                 comparison_value = int(m.group('value'))
1804             except ValueError:
1805                 comparison_value = parse_filesize(m.group('value'))
1806                 if comparison_value is None:
1807                     comparison_value = parse_filesize(m.group('value') + 'B')
1808                 if comparison_value is None:
1809                     raise ValueError(
1810                         'Invalid value %r in format specification %r' % (
1811                             m.group('value'), filter_spec))
1812             op = OPERATORS[m.group('op')]
1813
1814         if not m:
1815             STR_OPERATORS = {
1816                 '=': operator.eq,
1817                 '^=': lambda attr, value: attr.startswith(value),
1818                 '$=': lambda attr, value: attr.endswith(value),
1819                 '*=': lambda attr, value: value in attr,
1820             }
1821             str_operator_rex = re.compile(r'''(?x)\s*
1822                 (?P<key>[a-zA-Z0-9._-]+)\s*
1823                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1824                 (?P<value>[a-zA-Z0-9._-]+)\s*
1825                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1826             m = str_operator_rex.fullmatch(filter_spec)
1827             if m:
1828                 comparison_value = m.group('value')
1829                 str_op = STR_OPERATORS[m.group('op')]
1830                 if m.group('negation'):
1831                     op = lambda attr, value: not str_op(attr, value)
1832                 else:
1833                     op = str_op
1834
1835         if not m:
1836             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1837
1838         def _filter(f):
1839             actual_value = f.get(m.group('key'))
1840             if actual_value is None:
1841                 return m.group('none_inclusive')
1842             return op(actual_value, comparison_value)
1843         return _filter
1844
1845     def _check_formats(self, formats):
1846         for f in formats:
1847             self.to_screen('[info] Testing format %s' % f['format_id'])
1848             path = self.get_output_path('temp')
1849             if not self._ensure_dir_exists(f'{path}/'):
1850                 continue
1851             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1852             temp_file.close()
1853             try:
1854                 success, _ = self.dl(temp_file.name, f, test=True)
1855             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1856                 success = False
1857             finally:
1858                 if os.path.exists(temp_file.name):
1859                     try:
1860                         os.remove(temp_file.name)
1861                     except OSError:
1862                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1863             if success:
1864                 yield f
1865             else:
1866                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1867
1868     def _default_format_spec(self, info_dict, download=True):
1869
1870         def can_merge():
1871             merger = FFmpegMergerPP(self)
1872             return merger.available and merger.can_merge()
1873
1874         prefer_best = (
1875             not self.params.get('simulate')
1876             and download
1877             and (
1878                 not can_merge()
1879                 or info_dict.get('is_live', False)
1880                 or self.outtmpl_dict['default'] == '-'))
1881         compat = (
1882             prefer_best
1883             or self.params.get('allow_multiple_audio_streams', False)
1884             or 'format-spec' in self.params.get('compat_opts', []))
1885
1886         return (
1887             'best/bestvideo+bestaudio' if prefer_best
1888             else 'bestvideo*+bestaudio/best' if not compat
1889             else 'bestvideo+bestaudio/best')
1890
1891     def build_format_selector(self, format_spec):
1892         def syntax_error(note, start):
1893             message = (
1894                 'Invalid format specification: '
1895                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1896             return SyntaxError(message)
1897
1898         PICKFIRST = 'PICKFIRST'
1899         MERGE = 'MERGE'
1900         SINGLE = 'SINGLE'
1901         GROUP = 'GROUP'
1902         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1903
1904         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1905                                   'video': self.params.get('allow_multiple_video_streams', False)}
1906
1907         check_formats = self.params.get('check_formats') == 'selected'
1908
1909         def _parse_filter(tokens):
1910             filter_parts = []
1911             for type, string, start, _, _ in tokens:
1912                 if type == tokenize.OP and string == ']':
1913                     return ''.join(filter_parts)
1914                 else:
1915                     filter_parts.append(string)
1916
1917         def _remove_unused_ops(tokens):
1918             # Remove operators that we don't use and join them with the surrounding strings
1919             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1920             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1921             last_string, last_start, last_end, last_line = None, None, None, None
1922             for type, string, start, end, line in tokens:
1923                 if type == tokenize.OP and string == '[':
1924                     if last_string:
1925                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1926                         last_string = None
1927                     yield type, string, start, end, line
1928                     # everything inside brackets will be handled by _parse_filter
1929                     for type, string, start, end, line in tokens:
1930                         yield type, string, start, end, line
1931                         if type == tokenize.OP and string == ']':
1932                             break
1933                 elif type == tokenize.OP and string in ALLOWED_OPS:
1934                     if last_string:
1935                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1936                         last_string = None
1937                     yield type, string, start, end, line
1938                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1939                     if not last_string:
1940                         last_string = string
1941                         last_start = start
1942                         last_end = end
1943                     else:
1944                         last_string += string
1945             if last_string:
1946                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1947
1948         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1949             selectors = []
1950             current_selector = None
1951             for type, string, start, _, _ in tokens:
1952                 # ENCODING is only defined in python 3.x
1953                 if type == getattr(tokenize, 'ENCODING', None):
1954                     continue
1955                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1956                     current_selector = FormatSelector(SINGLE, string, [])
1957                 elif type == tokenize.OP:
1958                     if string == ')':
1959                         if not inside_group:
1960                             # ')' will be handled by the parentheses group
1961                             tokens.restore_last_token()
1962                         break
1963                     elif inside_merge and string in ['/', ',']:
1964                         tokens.restore_last_token()
1965                         break
1966                     elif inside_choice and string == ',':
1967                         tokens.restore_last_token()
1968                         break
1969                     elif string == ',':
1970                         if not current_selector:
1971                             raise syntax_error('"," must follow a format selector', start)
1972                         selectors.append(current_selector)
1973                         current_selector = None
1974                     elif string == '/':
1975                         if not current_selector:
1976                             raise syntax_error('"/" must follow a format selector', start)
1977                         first_choice = current_selector
1978                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1979                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1980                     elif string == '[':
1981                         if not current_selector:
1982                             current_selector = FormatSelector(SINGLE, 'best', [])
1983                         format_filter = _parse_filter(tokens)
1984                         current_selector.filters.append(format_filter)
1985                     elif string == '(':
1986                         if current_selector:
1987                             raise syntax_error('Unexpected "("', start)
1988                         group = _parse_format_selection(tokens, inside_group=True)
1989                         current_selector = FormatSelector(GROUP, group, [])
1990                     elif string == '+':
1991                         if not current_selector:
1992                             raise syntax_error('Unexpected "+"', start)
1993                         selector_1 = current_selector
1994                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1995                         if not selector_2:
1996                             raise syntax_error('Expected a selector', start)
1997                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1998                     else:
1999                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2000                 elif type == tokenize.ENDMARKER:
2001                     break
2002             if current_selector:
2003                 selectors.append(current_selector)
2004             return selectors
2005
2006         def _merge(formats_pair):
2007             format_1, format_2 = formats_pair
2008
2009             formats_info = []
2010             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2011             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2012
2013             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2014                 get_no_more = {'video': False, 'audio': False}
2015                 for (i, fmt_info) in enumerate(formats_info):
2016                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2017                         formats_info.pop(i)
2018                         continue
2019                     for aud_vid in ['audio', 'video']:
2020                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2021                             if get_no_more[aud_vid]:
2022                                 formats_info.pop(i)
2023                                 break
2024                             get_no_more[aud_vid] = True
2025
2026             if len(formats_info) == 1:
2027                 return formats_info[0]
2028
2029             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2030             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2031
2032             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2033             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2034
2035             output_ext = self.params.get('merge_output_format')
2036             if not output_ext:
2037                 if the_only_video:
2038                     output_ext = the_only_video['ext']
2039                 elif the_only_audio and not video_fmts:
2040                     output_ext = the_only_audio['ext']
2041                 else:
2042                     output_ext = 'mkv'
2043
2044             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2045
2046             new_dict = {
2047                 'requested_formats': formats_info,
2048                 'format': '+'.join(filtered('format')),
2049                 'format_id': '+'.join(filtered('format_id')),
2050                 'ext': output_ext,
2051                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2052                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2053                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2054                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2055                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2056             }
2057
2058             if the_only_video:
2059                 new_dict.update({
2060                     'width': the_only_video.get('width'),
2061                     'height': the_only_video.get('height'),
2062                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2063                     'fps': the_only_video.get('fps'),
2064                     'dynamic_range': the_only_video.get('dynamic_range'),
2065                     'vcodec': the_only_video.get('vcodec'),
2066                     'vbr': the_only_video.get('vbr'),
2067                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2068                 })
2069
2070             if the_only_audio:
2071                 new_dict.update({
2072                     'acodec': the_only_audio.get('acodec'),
2073                     'abr': the_only_audio.get('abr'),
2074                     'asr': the_only_audio.get('asr'),
2075                 })
2076
2077             return new_dict
2078
2079         def _check_formats(formats):
2080             if not check_formats:
2081                 yield from formats
2082                 return
2083             yield from self._check_formats(formats)
2084
2085         def _build_selector_function(selector):
2086             if isinstance(selector, list):  # ,
2087                 fs = [_build_selector_function(s) for s in selector]
2088
2089                 def selector_function(ctx):
2090                     for f in fs:
2091                         yield from f(ctx)
2092                 return selector_function
2093
2094             elif selector.type == GROUP:  # ()
2095                 selector_function = _build_selector_function(selector.selector)
2096
2097             elif selector.type == PICKFIRST:  # /
2098                 fs = [_build_selector_function(s) for s in selector.selector]
2099
2100                 def selector_function(ctx):
2101                     for f in fs:
2102                         picked_formats = list(f(ctx))
2103                         if picked_formats:
2104                             return picked_formats
2105                     return []
2106
2107             elif selector.type == MERGE:  # +
2108                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2109
2110                 def selector_function(ctx):
2111                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2112                         yield _merge(pair)
2113
2114             elif selector.type == SINGLE:  # atom
2115                 format_spec = selector.selector or 'best'
2116
2117                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2118                 if format_spec == 'all':
2119                     def selector_function(ctx):
2120                         yield from _check_formats(ctx['formats'][::-1])
2121                 elif format_spec == 'mergeall':
2122                     def selector_function(ctx):
2123                         formats = list(_check_formats(ctx['formats']))
2124                         if not formats:
2125                             return
2126                         merged_format = formats[-1]
2127                         for f in formats[-2::-1]:
2128                             merged_format = _merge((merged_format, f))
2129                         yield merged_format
2130
2131                 else:
2132                     format_fallback, format_reverse, format_idx = False, True, 1
2133                     mobj = re.match(
2134                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2135                         format_spec)
2136                     if mobj is not None:
2137                         format_idx = int_or_none(mobj.group('n'), default=1)
2138                         format_reverse = mobj.group('bw')[0] == 'b'
2139                         format_type = (mobj.group('type') or [None])[0]
2140                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2141                         format_modified = mobj.group('mod') is not None
2142
2143                         format_fallback = not format_type and not format_modified  # for b, w
2144                         _filter_f = (
2145                             (lambda f: f.get('%scodec' % format_type) != 'none')
2146                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2147                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2148                             if format_type  # bv, ba, wv, wa
2149                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2150                             if not format_modified  # b, w
2151                             else lambda f: True)  # b*, w*
2152                         filter_f = lambda f: _filter_f(f) and (
2153                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2154                     else:
2155                         if format_spec in self._format_selection_exts['audio']:
2156                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2157                         elif format_spec in self._format_selection_exts['video']:
2158                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2159                         elif format_spec in self._format_selection_exts['storyboards']:
2160                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2161                         else:
2162                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2163
2164                     def selector_function(ctx):
2165                         formats = list(ctx['formats'])
2166                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2167                         if format_fallback and ctx['incomplete_formats'] and not matches:
2168                             # for extractors with incomplete formats (audio only (soundcloud)
2169                             # or video only (imgur)) best/worst will fallback to
2170                             # best/worst {video,audio}-only format
2171                             matches = formats
2172                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2173                         try:
2174                             yield matches[format_idx - 1]
2175                         except IndexError:
2176                             return
2177
2178             filters = [self._build_format_filter(f) for f in selector.filters]
2179
2180             def final_selector(ctx):
2181                 ctx_copy = dict(ctx)
2182                 for _filter in filters:
2183                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2184                 return selector_function(ctx_copy)
2185             return final_selector
2186
2187         stream = io.BytesIO(format_spec.encode('utf-8'))
2188         try:
2189             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2190         except tokenize.TokenError:
2191             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2192
2193         class TokenIterator(object):
2194             def __init__(self, tokens):
2195                 self.tokens = tokens
2196                 self.counter = 0
2197
2198             def __iter__(self):
2199                 return self
2200
2201             def __next__(self):
2202                 if self.counter >= len(self.tokens):
2203                     raise StopIteration()
2204                 value = self.tokens[self.counter]
2205                 self.counter += 1
2206                 return value
2207
2208             next = __next__
2209
2210             def restore_last_token(self):
2211                 self.counter -= 1
2212
2213         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2214         return _build_selector_function(parsed_selector)
2215
2216     def _calc_headers(self, info_dict):
2217         res = std_headers.copy()
2218
2219         add_headers = info_dict.get('http_headers')
2220         if add_headers:
2221             res.update(add_headers)
2222
2223         cookies = self._calc_cookies(info_dict)
2224         if cookies:
2225             res['Cookie'] = cookies
2226
2227         if 'X-Forwarded-For' not in res:
2228             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2229             if x_forwarded_for_ip:
2230                 res['X-Forwarded-For'] = x_forwarded_for_ip
2231
2232         return res
2233
2234     def _calc_cookies(self, info_dict):
2235         pr = sanitized_Request(info_dict['url'])
2236         self.cookiejar.add_cookie_header(pr)
2237         return pr.get_header('Cookie')
2238
2239     def _sort_thumbnails(self, thumbnails):
2240         thumbnails.sort(key=lambda t: (
2241             t.get('preference') if t.get('preference') is not None else -1,
2242             t.get('width') if t.get('width') is not None else -1,
2243             t.get('height') if t.get('height') is not None else -1,
2244             t.get('id') if t.get('id') is not None else '',
2245             t.get('url')))
2246
2247     def _sanitize_thumbnails(self, info_dict):
2248         thumbnails = info_dict.get('thumbnails')
2249         if thumbnails is None:
2250             thumbnail = info_dict.get('thumbnail')
2251             if thumbnail:
2252                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2253         if not thumbnails:
2254             return
2255
2256         def check_thumbnails(thumbnails):
2257             for t in thumbnails:
2258                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2259                 try:
2260                     self.urlopen(HEADRequest(t['url']))
2261                 except network_exceptions as err:
2262                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2263                     continue
2264                 yield t
2265
2266         self._sort_thumbnails(thumbnails)
2267         for i, t in enumerate(thumbnails):
2268             if t.get('id') is None:
2269                 t['id'] = '%d' % i
2270             if t.get('width') and t.get('height'):
2271                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2272             t['url'] = sanitize_url(t['url'])
2273
2274         if self.params.get('check_formats') is True:
2275             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2276         else:
2277             info_dict['thumbnails'] = thumbnails
2278
2279     def process_video_result(self, info_dict, download=True):
2280         assert info_dict.get('_type', 'video') == 'video'
2281         self._num_videos += 1
2282
2283         if 'id' not in info_dict:
2284             raise ExtractorError('Missing "id" field in extractor result')
2285         if 'title' not in info_dict:
2286             raise ExtractorError('Missing "title" field in extractor result',
2287                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2288
2289         def report_force_conversion(field, field_not, conversion):
2290             self.report_warning(
2291                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2292                 % (field, field_not, conversion))
2293
2294         def sanitize_string_field(info, string_field):
2295             field = info.get(string_field)
2296             if field is None or isinstance(field, compat_str):
2297                 return
2298             report_force_conversion(string_field, 'a string', 'string')
2299             info[string_field] = compat_str(field)
2300
2301         def sanitize_numeric_fields(info):
2302             for numeric_field in self._NUMERIC_FIELDS:
2303                 field = info.get(numeric_field)
2304                 if field is None or isinstance(field, compat_numeric_types):
2305                     continue
2306                 report_force_conversion(numeric_field, 'numeric', 'int')
2307                 info[numeric_field] = int_or_none(field)
2308
2309         sanitize_string_field(info_dict, 'id')
2310         sanitize_numeric_fields(info_dict)
2311
2312         if 'playlist' not in info_dict:
2313             # It isn't part of a playlist
2314             info_dict['playlist'] = None
2315             info_dict['playlist_index'] = None
2316
2317         self._sanitize_thumbnails(info_dict)
2318
2319         thumbnail = info_dict.get('thumbnail')
2320         thumbnails = info_dict.get('thumbnails')
2321         if thumbnail:
2322             info_dict['thumbnail'] = sanitize_url(thumbnail)
2323         elif thumbnails:
2324             info_dict['thumbnail'] = thumbnails[-1]['url']
2325
2326         if info_dict.get('display_id') is None and 'id' in info_dict:
2327             info_dict['display_id'] = info_dict['id']
2328
2329         if info_dict.get('duration') is not None:
2330             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2331
2332         for ts_key, date_key in (
2333                 ('timestamp', 'upload_date'),
2334                 ('release_timestamp', 'release_date'),
2335                 ('modified_timestamp', 'modified_date'),
2336         ):
2337             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2338                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2339                 # see http://bugs.python.org/issue1646728)
2340                 try:
2341                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2342                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2343                 except (ValueError, OverflowError, OSError):
2344                     pass
2345
2346         live_keys = ('is_live', 'was_live')
2347         live_status = info_dict.get('live_status')
2348         if live_status is None:
2349             for key in live_keys:
2350                 if info_dict.get(key) is False:
2351                     continue
2352                 if info_dict.get(key):
2353                     live_status = key
2354                 break
2355             if all(info_dict.get(key) is False for key in live_keys):
2356                 live_status = 'not_live'
2357         if live_status:
2358             info_dict['live_status'] = live_status
2359             for key in live_keys:
2360                 if info_dict.get(key) is None:
2361                     info_dict[key] = (live_status == key)
2362
2363         # Auto generate title fields corresponding to the *_number fields when missing
2364         # in order to always have clean titles. This is very common for TV series.
2365         for field in ('chapter', 'season', 'episode'):
2366             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2367                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2368
2369         for cc_kind in ('subtitles', 'automatic_captions'):
2370             cc = info_dict.get(cc_kind)
2371             if cc:
2372                 for _, subtitle in cc.items():
2373                     for subtitle_format in subtitle:
2374                         if subtitle_format.get('url'):
2375                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2376                         if subtitle_format.get('ext') is None:
2377                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2378
2379         automatic_captions = info_dict.get('automatic_captions')
2380         subtitles = info_dict.get('subtitles')
2381
2382         info_dict['requested_subtitles'] = self.process_subtitles(
2383             info_dict['id'], subtitles, automatic_captions)
2384
2385         if info_dict.get('formats') is None:
2386             # There's only one format available
2387             formats = [info_dict]
2388         else:
2389             formats = info_dict['formats']
2390
2391         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2392         if not self.params.get('allow_unplayable_formats'):
2393             formats = [f for f in formats if not f.get('has_drm')]
2394
2395         if info_dict.get('is_live'):
2396             get_from_start = bool(self.params.get('live_from_start'))
2397             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2398             if not get_from_start:
2399                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2400
2401         # backward compatibility
2402         info_dict['fulltitle'] = info_dict['title']
2403
2404         if not formats:
2405             self.raise_no_formats(info_dict)
2406
2407         def is_wellformed(f):
2408             url = f.get('url')
2409             if not url:
2410                 self.report_warning(
2411                     '"url" field is missing or empty - skipping format, '
2412                     'there is an error in extractor')
2413                 return False
2414             if isinstance(url, bytes):
2415                 sanitize_string_field(f, 'url')
2416             return True
2417
2418         # Filter out malformed formats for better extraction robustness
2419         formats = list(filter(is_wellformed, formats))
2420
2421         formats_dict = {}
2422
2423         # We check that all the formats have the format and format_id fields
2424         for i, format in enumerate(formats):
2425             sanitize_string_field(format, 'format_id')
2426             sanitize_numeric_fields(format)
2427             format['url'] = sanitize_url(format['url'])
2428             if not format.get('format_id'):
2429                 format['format_id'] = compat_str(i)
2430             else:
2431                 # Sanitize format_id from characters used in format selector expression
2432                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2433             format_id = format['format_id']
2434             if format_id not in formats_dict:
2435                 formats_dict[format_id] = []
2436             formats_dict[format_id].append(format)
2437
2438         # Make sure all formats have unique format_id
2439         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2440         for format_id, ambiguous_formats in formats_dict.items():
2441             ambigious_id = len(ambiguous_formats) > 1
2442             for i, format in enumerate(ambiguous_formats):
2443                 if ambigious_id:
2444                     format['format_id'] = '%s-%d' % (format_id, i)
2445                 if format.get('ext') is None:
2446                     format['ext'] = determine_ext(format['url']).lower()
2447                 # Ensure there is no conflict between id and ext in format selection
2448                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2449                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2450                     format['format_id'] = 'f%s' % format['format_id']
2451
2452         for i, format in enumerate(formats):
2453             if format.get('format') is None:
2454                 format['format'] = '{id} - {res}{note}'.format(
2455                     id=format['format_id'],
2456                     res=self.format_resolution(format),
2457                     note=format_field(format, 'format_note', ' (%s)'),
2458                 )
2459             if format.get('protocol') is None:
2460                 format['protocol'] = determine_protocol(format)
2461             if format.get('resolution') is None:
2462                 format['resolution'] = self.format_resolution(format, default=None)
2463             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2464                 format['dynamic_range'] = 'SDR'
2465             if (info_dict.get('duration') and format.get('tbr')
2466                     and not format.get('filesize') and not format.get('filesize_approx')):
2467                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2468
2469             # Add HTTP headers, so that external programs can use them from the
2470             # json output
2471             full_format_info = info_dict.copy()
2472             full_format_info.update(format)
2473             format['http_headers'] = self._calc_headers(full_format_info)
2474         # Remove private housekeeping stuff
2475         if '__x_forwarded_for_ip' in info_dict:
2476             del info_dict['__x_forwarded_for_ip']
2477
2478         # TODO Central sorting goes here
2479
2480         if self.params.get('check_formats') is True:
2481             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2482
2483         if not formats or formats[0] is not info_dict:
2484             # only set the 'formats' fields if the original info_dict list them
2485             # otherwise we end up with a circular reference, the first (and unique)
2486             # element in the 'formats' field in info_dict is info_dict itself,
2487             # which can't be exported to json
2488             info_dict['formats'] = formats
2489
2490         info_dict, _ = self.pre_process(info_dict)
2491
2492         # The pre-processors may have modified the formats
2493         formats = info_dict.get('formats', [info_dict])
2494
2495         list_only = self.params.get('simulate') is None and (
2496             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2497         interactive_format_selection = not list_only and self.format_selector == '-'
2498         if self.params.get('list_thumbnails'):
2499             self.list_thumbnails(info_dict)
2500         if self.params.get('listsubtitles'):
2501             if 'automatic_captions' in info_dict:
2502                 self.list_subtitles(
2503                     info_dict['id'], automatic_captions, 'automatic captions')
2504             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2505         if self.params.get('listformats') or interactive_format_selection:
2506             self.list_formats(info_dict)
2507         if list_only:
2508             # Without this printing, -F --print-json will not work
2509             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2510             return
2511
2512         format_selector = self.format_selector
2513         if format_selector is None:
2514             req_format = self._default_format_spec(info_dict, download=download)
2515             self.write_debug('Default format spec: %s' % req_format)
2516             format_selector = self.build_format_selector(req_format)
2517
2518         while True:
2519             if interactive_format_selection:
2520                 req_format = input(
2521                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2522                 try:
2523                     format_selector = self.build_format_selector(req_format)
2524                 except SyntaxError as err:
2525                     self.report_error(err, tb=False, is_error=False)
2526                     continue
2527
2528             # While in format selection we may need to have an access to the original
2529             # format set in order to calculate some metrics or do some processing.
2530             # For now we need to be able to guess whether original formats provided
2531             # by extractor are incomplete or not (i.e. whether extractor provides only
2532             # video-only or audio-only formats) for proper formats selection for
2533             # extractors with such incomplete formats (see
2534             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2535             # Since formats may be filtered during format selection and may not match
2536             # the original formats the results may be incorrect. Thus original formats
2537             # or pre-calculated metrics should be passed to format selection routines
2538             # as well.
2539             # We will pass a context object containing all necessary additional data
2540             # instead of just formats.
2541             # This fixes incorrect format selection issue (see
2542             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2543             incomplete_formats = (
2544                 # All formats are video-only or
2545                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2546                 # all formats are audio-only
2547                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2548
2549             ctx = {
2550                 'formats': formats,
2551                 'incomplete_formats': incomplete_formats,
2552             }
2553
2554             formats_to_download = list(format_selector(ctx))
2555             if interactive_format_selection and not formats_to_download:
2556                 self.report_error('Requested format is not available', tb=False, is_error=False)
2557                 continue
2558             break
2559
2560         if not formats_to_download:
2561             if not self.params.get('ignore_no_formats_error'):
2562                 raise ExtractorError('Requested format is not available', expected=True,
2563                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2564             self.report_warning('Requested format is not available')
2565             # Process what we can, even without any available formats.
2566             formats_to_download = [{}]
2567
2568         best_format = formats_to_download[-1]
2569         if download:
2570             if best_format:
2571                 self.to_screen(
2572                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2573                     + ', '.join([f['format_id'] for f in formats_to_download]))
2574             max_downloads_reached = False
2575             for i, fmt in enumerate(formats_to_download):
2576                 formats_to_download[i] = new_info = dict(info_dict)
2577                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2578                 new_info.update(fmt)
2579                 new_info['__original_infodict'] = info_dict
2580                 try:
2581                     self.process_info(new_info)
2582                 except MaxDownloadsReached:
2583                     max_downloads_reached = True
2584                 new_info.pop('__original_infodict')
2585                 # Remove copied info
2586                 for key, val in tuple(new_info.items()):
2587                     if info_dict.get(key) == val:
2588                         new_info.pop(key)
2589                 if max_downloads_reached:
2590                     break
2591
2592             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2593             assert write_archive.issubset({True, False, 'ignore'})
2594             if True in write_archive and False not in write_archive:
2595                 self.record_download_archive(info_dict)
2596
2597             info_dict['requested_downloads'] = formats_to_download
2598             info_dict = self.run_all_pps('after_video', info_dict)
2599             if max_downloads_reached:
2600                 raise MaxDownloadsReached()
2601
2602         # We update the info dict with the selected best quality format (backwards compatibility)
2603         info_dict.update(best_format)
2604         return info_dict
2605
2606     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2607         """Select the requested subtitles and their format"""
2608         available_subs = {}
2609         if normal_subtitles and self.params.get('writesubtitles'):
2610             available_subs.update(normal_subtitles)
2611         if automatic_captions and self.params.get('writeautomaticsub'):
2612             for lang, cap_info in automatic_captions.items():
2613                 if lang not in available_subs:
2614                     available_subs[lang] = cap_info
2615
2616         if (not self.params.get('writesubtitles') and not
2617                 self.params.get('writeautomaticsub') or not
2618                 available_subs):
2619             return None
2620
2621         all_sub_langs = available_subs.keys()
2622         if self.params.get('allsubtitles', False):
2623             requested_langs = all_sub_langs
2624         elif self.params.get('subtitleslangs', False):
2625             # A list is used so that the order of languages will be the same as
2626             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2627             requested_langs = []
2628             for lang_re in self.params.get('subtitleslangs'):
2629                 if lang_re == 'all':
2630                     requested_langs.extend(all_sub_langs)
2631                     continue
2632                 discard = lang_re[0] == '-'
2633                 if discard:
2634                     lang_re = lang_re[1:]
2635                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2636                 if discard:
2637                     for lang in current_langs:
2638                         while lang in requested_langs:
2639                             requested_langs.remove(lang)
2640                 else:
2641                     requested_langs.extend(current_langs)
2642             requested_langs = orderedSet(requested_langs)
2643         elif 'en' in available_subs:
2644             requested_langs = ['en']
2645         else:
2646             requested_langs = [list(all_sub_langs)[0]]
2647         if requested_langs:
2648             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2649
2650         formats_query = self.params.get('subtitlesformat', 'best')
2651         formats_preference = formats_query.split('/') if formats_query else []
2652         subs = {}
2653         for lang in requested_langs:
2654             formats = available_subs.get(lang)
2655             if formats is None:
2656                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2657                 continue
2658             for ext in formats_preference:
2659                 if ext == 'best':
2660                     f = formats[-1]
2661                     break
2662                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2663                 if matches:
2664                     f = matches[-1]
2665                     break
2666             else:
2667                 f = formats[-1]
2668                 self.report_warning(
2669                     'No subtitle format found matching "%s" for language %s, '
2670                     'using %s' % (formats_query, lang, f['ext']))
2671             subs[lang] = f
2672         return subs
2673
2674     def _forceprint(self, tmpl, info_dict):
2675         mobj = re.match(r'\w+(=?)$', tmpl)
2676         if mobj and mobj.group(1):
2677             tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2678         elif mobj:
2679             tmpl = '%({})s'.format(tmpl)
2680         self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2681
2682     def __forced_printings(self, info_dict, filename, incomplete):
2683         def print_mandatory(field, actual_field=None):
2684             if actual_field is None:
2685                 actual_field = field
2686             if (self.params.get('force%s' % field, False)
2687                     and (not incomplete or info_dict.get(actual_field) is not None)):
2688                 self.to_stdout(info_dict[actual_field])
2689
2690         def print_optional(field):
2691             if (self.params.get('force%s' % field, False)
2692                     and info_dict.get(field) is not None):
2693                 self.to_stdout(info_dict[field])
2694
2695         info_dict = info_dict.copy()
2696         if filename is not None:
2697             info_dict['filename'] = filename
2698         if info_dict.get('requested_formats') is not None:
2699             # For RTMP URLs, also include the playpath
2700             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2701         elif 'url' in info_dict:
2702             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2703
2704         if self.params['forceprint'].get('video') or self.params.get('forcejson'):
2705             self.post_extract(info_dict)
2706         for tmpl in self.params['forceprint'].get('video', []):
2707             self._forceprint(tmpl, info_dict)
2708
2709         print_mandatory('title')
2710         print_mandatory('id')
2711         print_mandatory('url', 'urls')
2712         print_optional('thumbnail')
2713         print_optional('description')
2714         print_optional('filename')
2715         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2716             self.to_stdout(formatSeconds(info_dict['duration']))
2717         print_mandatory('format')
2718
2719         if self.params.get('forcejson'):
2720             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2721
2722     def dl(self, name, info, subtitle=False, test=False):
2723         if not info.get('url'):
2724             self.raise_no_formats(info, True)
2725
2726         if test:
2727             verbose = self.params.get('verbose')
2728             params = {
2729                 'test': True,
2730                 'quiet': self.params.get('quiet') or not verbose,
2731                 'verbose': verbose,
2732                 'noprogress': not verbose,
2733                 'nopart': True,
2734                 'skip_unavailable_fragments': False,
2735                 'keep_fragments': False,
2736                 'overwrites': True,
2737                 '_no_ytdl_file': True,
2738             }
2739         else:
2740             params = self.params
2741         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2742         if not test:
2743             for ph in self._progress_hooks:
2744                 fd.add_progress_hook(ph)
2745             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2746             self.write_debug('Invoking downloader on "%s"' % urls)
2747
2748         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2749         # But it may contain objects that are not deep-copyable
2750         new_info = self._copy_infodict(info)
2751         if new_info.get('http_headers') is None:
2752             new_info['http_headers'] = self._calc_headers(new_info)
2753         return fd.download(name, new_info, subtitle)
2754
2755     def process_info(self, info_dict):
2756         """Process a single resolved IE result. (Modified it in-place)"""
2757
2758         assert info_dict.get('_type', 'video') == 'video'
2759         original_infodict = info_dict
2760
2761         if 'format' not in info_dict and 'ext' in info_dict:
2762             info_dict['format'] = info_dict['ext']
2763
2764         if self._match_entry(info_dict) is not None:
2765             info_dict['__write_download_archive'] = 'ignore'
2766             return
2767
2768         self.post_extract(info_dict)
2769         self._num_downloads += 1
2770
2771         # info_dict['_filename'] needs to be set for backward compatibility
2772         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2773         temp_filename = self.prepare_filename(info_dict, 'temp')
2774         files_to_move = {}
2775
2776         # Forced printings
2777         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2778
2779         if self.params.get('simulate'):
2780             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2781             return
2782
2783         if full_filename is None:
2784             return
2785         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2786             return
2787         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2788             return
2789
2790         if self._write_description('video', info_dict,
2791                                    self.prepare_filename(info_dict, 'description')) is None:
2792             return
2793
2794         sub_files = self._write_subtitles(info_dict, temp_filename)
2795         if sub_files is None:
2796             return
2797         files_to_move.update(dict(sub_files))
2798
2799         thumb_files = self._write_thumbnails(
2800             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2801         if thumb_files is None:
2802             return
2803         files_to_move.update(dict(thumb_files))
2804
2805         infofn = self.prepare_filename(info_dict, 'infojson')
2806         _infojson_written = self._write_info_json('video', info_dict, infofn)
2807         if _infojson_written:
2808             info_dict['infojson_filename'] = infofn
2809             # For backward compatibility, even though it was a private field
2810             info_dict['__infojson_filename'] = infofn
2811         elif _infojson_written is None:
2812             return
2813
2814         # Note: Annotations are deprecated
2815         annofn = None
2816         if self.params.get('writeannotations', False):
2817             annofn = self.prepare_filename(info_dict, 'annotation')
2818         if annofn:
2819             if not self._ensure_dir_exists(encodeFilename(annofn)):
2820                 return
2821             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2822                 self.to_screen('[info] Video annotations are already present')
2823             elif not info_dict.get('annotations'):
2824                 self.report_warning('There are no annotations to write.')
2825             else:
2826                 try:
2827                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2828                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2829                         annofile.write(info_dict['annotations'])
2830                 except (KeyError, TypeError):
2831                     self.report_warning('There are no annotations to write.')
2832                 except (OSError, IOError):
2833                     self.report_error('Cannot write annotations file: ' + annofn)
2834                     return
2835
2836         # Write internet shortcut files
2837         def _write_link_file(link_type):
2838             if 'webpage_url' not in info_dict:
2839                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2840                 return False
2841             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2842             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2843                 return False
2844             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2845                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2846                 return True
2847             try:
2848                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2849                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2850                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2851                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2852                     if link_type == 'desktop':
2853                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2854                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2855             except (OSError, IOError):
2856                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2857                 return False
2858             return True
2859
2860         write_links = {
2861             'url': self.params.get('writeurllink'),
2862             'webloc': self.params.get('writewebloclink'),
2863             'desktop': self.params.get('writedesktoplink'),
2864         }
2865         if self.params.get('writelink'):
2866             link_type = ('webloc' if sys.platform == 'darwin'
2867                          else 'desktop' if sys.platform.startswith('linux')
2868                          else 'url')
2869             write_links[link_type] = True
2870
2871         if any(should_write and not _write_link_file(link_type)
2872                for link_type, should_write in write_links.items()):
2873             return
2874
2875         def replace_info_dict(new_info):
2876             nonlocal info_dict
2877             if new_info == info_dict:
2878                 return
2879             info_dict.clear()
2880             info_dict.update(new_info)
2881
2882         try:
2883             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2884             replace_info_dict(new_info)
2885         except PostProcessingError as err:
2886             self.report_error('Preprocessing: %s' % str(err))
2887             return
2888
2889         if self.params.get('skip_download'):
2890             info_dict['filepath'] = temp_filename
2891             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2892             info_dict['__files_to_move'] = files_to_move
2893             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2894             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2895         else:
2896             # Download
2897             info_dict.setdefault('__postprocessors', [])
2898             try:
2899
2900                 def existing_file(*filepaths):
2901                     ext = info_dict.get('ext')
2902                     final_ext = self.params.get('final_ext', ext)
2903                     existing_files = []
2904                     for file in orderedSet(filepaths):
2905                         if final_ext != ext:
2906                             converted = replace_extension(file, final_ext, ext)
2907                             if os.path.exists(encodeFilename(converted)):
2908                                 existing_files.append(converted)
2909                         if os.path.exists(encodeFilename(file)):
2910                             existing_files.append(file)
2911
2912                     if not existing_files or self.params.get('overwrites', False):
2913                         for file in orderedSet(existing_files):
2914                             self.report_file_delete(file)
2915                             os.remove(encodeFilename(file))
2916                         return None
2917
2918                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2919                     return existing_files[0]
2920
2921                 success = True
2922                 if info_dict.get('requested_formats') is not None:
2923
2924                     def compatible_formats(formats):
2925                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2926                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2927                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2928                         if len(video_formats) > 2 or len(audio_formats) > 2:
2929                             return False
2930
2931                         # Check extension
2932                         exts = set(format.get('ext') for format in formats)
2933                         COMPATIBLE_EXTS = (
2934                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2935                             set(('webm',)),
2936                         )
2937                         for ext_sets in COMPATIBLE_EXTS:
2938                             if ext_sets.issuperset(exts):
2939                                 return True
2940                         # TODO: Check acodec/vcodec
2941                         return False
2942
2943                     requested_formats = info_dict['requested_formats']
2944                     old_ext = info_dict['ext']
2945                     if self.params.get('merge_output_format') is None:
2946                         if not compatible_formats(requested_formats):
2947                             info_dict['ext'] = 'mkv'
2948                             self.report_warning(
2949                                 'Requested formats are incompatible for merge and will be merged into mkv')
2950                         if (info_dict['ext'] == 'webm'
2951                                 and info_dict.get('thumbnails')
2952                                 # check with type instead of pp_key, __name__, or isinstance
2953                                 # since we dont want any custom PPs to trigger this
2954                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2955                             info_dict['ext'] = 'mkv'
2956                             self.report_warning(
2957                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2958                     new_ext = info_dict['ext']
2959
2960                     def correct_ext(filename, ext=new_ext):
2961                         if filename == '-':
2962                             return filename
2963                         filename_real_ext = os.path.splitext(filename)[1][1:]
2964                         filename_wo_ext = (
2965                             os.path.splitext(filename)[0]
2966                             if filename_real_ext in (old_ext, new_ext)
2967                             else filename)
2968                         return '%s.%s' % (filename_wo_ext, ext)
2969
2970                     # Ensure filename always has a correct extension for successful merge
2971                     full_filename = correct_ext(full_filename)
2972                     temp_filename = correct_ext(temp_filename)
2973                     dl_filename = existing_file(full_filename, temp_filename)
2974                     info_dict['__real_download'] = False
2975
2976                     downloaded = []
2977                     merger = FFmpegMergerPP(self)
2978
2979                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2980                     if dl_filename is not None:
2981                         self.report_file_already_downloaded(dl_filename)
2982                     elif fd:
2983                         for f in requested_formats if fd != FFmpegFD else []:
2984                             f['filepath'] = fname = prepend_extension(
2985                                 correct_ext(temp_filename, info_dict['ext']),
2986                                 'f%s' % f['format_id'], info_dict['ext'])
2987                             downloaded.append(fname)
2988                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2989                         success, real_download = self.dl(temp_filename, info_dict)
2990                         info_dict['__real_download'] = real_download
2991                     else:
2992                         if self.params.get('allow_unplayable_formats'):
2993                             self.report_warning(
2994                                 'You have requested merging of multiple formats '
2995                                 'while also allowing unplayable formats to be downloaded. '
2996                                 'The formats won\'t be merged to prevent data corruption.')
2997                         elif not merger.available:
2998                             self.report_warning(
2999                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
3000                                 'The formats won\'t be merged.')
3001
3002                         if temp_filename == '-':
3003                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3004                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3005                                       else 'but ffmpeg is not installed')
3006                             self.report_warning(
3007                                 f'You have requested downloading multiple formats to stdout {reason}. '
3008                                 'The formats will be streamed one after the other')
3009                             fname = temp_filename
3010                         for f in requested_formats:
3011                             new_info = dict(info_dict)
3012                             del new_info['requested_formats']
3013                             new_info.update(f)
3014                             if temp_filename != '-':
3015                                 fname = prepend_extension(
3016                                     correct_ext(temp_filename, new_info['ext']),
3017                                     'f%s' % f['format_id'], new_info['ext'])
3018                                 if not self._ensure_dir_exists(fname):
3019                                     return
3020                                 f['filepath'] = fname
3021                                 downloaded.append(fname)
3022                             partial_success, real_download = self.dl(fname, new_info)
3023                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3024                             success = success and partial_success
3025
3026                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3027                         info_dict['__postprocessors'].append(merger)
3028                         info_dict['__files_to_merge'] = downloaded
3029                         # Even if there were no downloads, it is being merged only now
3030                         info_dict['__real_download'] = True
3031                     else:
3032                         for file in downloaded:
3033                             files_to_move[file] = None
3034                 else:
3035                     # Just a single file
3036                     dl_filename = existing_file(full_filename, temp_filename)
3037                     if dl_filename is None or dl_filename == temp_filename:
3038                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3039                         # So we should try to resume the download
3040                         success, real_download = self.dl(temp_filename, info_dict)
3041                         info_dict['__real_download'] = real_download
3042                     else:
3043                         self.report_file_already_downloaded(dl_filename)
3044
3045                 dl_filename = dl_filename or temp_filename
3046                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3047
3048             except network_exceptions as err:
3049                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3050                 return
3051             except (OSError, IOError) as err:
3052                 raise UnavailableVideoError(err)
3053             except (ContentTooShortError, ) as err:
3054                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3055                 return
3056
3057             if success and full_filename != '-':
3058
3059                 def fixup():
3060                     do_fixup = True
3061                     fixup_policy = self.params.get('fixup')
3062                     vid = info_dict['id']
3063
3064                     if fixup_policy in ('ignore', 'never'):
3065                         return
3066                     elif fixup_policy == 'warn':
3067                         do_fixup = False
3068                     elif fixup_policy != 'force':
3069                         assert fixup_policy in ('detect_or_warn', None)
3070                         if not info_dict.get('__real_download'):
3071                             do_fixup = False
3072
3073                     def ffmpeg_fixup(cndn, msg, cls):
3074                         if not cndn:
3075                             return
3076                         if not do_fixup:
3077                             self.report_warning(f'{vid}: {msg}')
3078                             return
3079                         pp = cls(self)
3080                         if pp.available:
3081                             info_dict['__postprocessors'].append(pp)
3082                         else:
3083                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3084
3085                     stretched_ratio = info_dict.get('stretched_ratio')
3086                     ffmpeg_fixup(
3087                         stretched_ratio not in (1, None),
3088                         f'Non-uniform pixel ratio {stretched_ratio}',
3089                         FFmpegFixupStretchedPP)
3090
3091                     ffmpeg_fixup(
3092                         (info_dict.get('requested_formats') is None
3093                          and info_dict.get('container') == 'm4a_dash'
3094                          and info_dict.get('ext') == 'm4a'),
3095                         'writing DASH m4a. Only some players support this container',
3096                         FFmpegFixupM4aPP)
3097
3098                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3099                     downloader = downloader.__name__ if downloader else None
3100
3101                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3102                         ffmpeg_fixup(downloader == 'HlsFD',
3103                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3104                                      FFmpegFixupM3u8PP)
3105                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3106                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3107
3108                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3109                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3110
3111                 fixup()
3112                 try:
3113                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3114                 except PostProcessingError as err:
3115                     self.report_error('Postprocessing: %s' % str(err))
3116                     return
3117                 try:
3118                     for ph in self._post_hooks:
3119                         ph(info_dict['filepath'])
3120                 except Exception as err:
3121                     self.report_error('post hooks: %s' % str(err))
3122                     return
3123                 info_dict['__write_download_archive'] = True
3124
3125         if self.params.get('force_write_download_archive'):
3126             info_dict['__write_download_archive'] = True
3127
3128         # Make sure the info_dict was modified in-place
3129         assert info_dict is original_infodict
3130
3131         max_downloads = self.params.get('max_downloads')
3132         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3133             raise MaxDownloadsReached()
3134
3135     def __download_wrapper(self, func):
3136         @functools.wraps(func)
3137         def wrapper(*args, **kwargs):
3138             try:
3139                 res = func(*args, **kwargs)
3140             except UnavailableVideoError as e:
3141                 self.report_error(e)
3142             except MaxDownloadsReached as e:
3143                 self.to_screen(f'[info] {e}')
3144                 raise
3145             except DownloadCancelled as e:
3146                 self.to_screen(f'[info] {e}')
3147                 if not self.params.get('break_per_url'):
3148                     raise
3149             else:
3150                 if self.params.get('dump_single_json', False):
3151                     self.post_extract(res)
3152                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3153         return wrapper
3154
3155     def download(self, url_list):
3156         """Download a given list of URLs."""
3157         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3158         outtmpl = self.outtmpl_dict['default']
3159         if (len(url_list) > 1
3160                 and outtmpl != '-'
3161                 and '%' not in outtmpl
3162                 and self.params.get('max_downloads') != 1):
3163             raise SameFileError(outtmpl)
3164
3165         for url in url_list:
3166             self.__download_wrapper(self.extract_info)(
3167                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3168
3169         return self._download_retcode
3170
3171     def download_with_info_file(self, info_filename):
3172         with contextlib.closing(fileinput.FileInput(
3173                 [info_filename], mode='r',
3174                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3175             # FileInput doesn't have a read method, we can't call json.load
3176             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3177         try:
3178             self.__download_wrapper(self.process_ie_result)(info, download=True)
3179         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3180             if not isinstance(e, EntryNotInPlaylist):
3181                 self.to_stderr('\r')
3182             webpage_url = info.get('webpage_url')
3183             if webpage_url is not None:
3184                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3185                 return self.download([webpage_url])
3186             else:
3187                 raise
3188         return self._download_retcode
3189
3190     @staticmethod
3191     def sanitize_info(info_dict, remove_private_keys=False):
3192         ''' Sanitize the infodict for converting to json '''
3193         if info_dict is None:
3194             return info_dict
3195         info_dict.setdefault('epoch', int(time.time()))
3196         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3197         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3198         if remove_private_keys:
3199             remove_keys |= {
3200                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3201                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3202             }
3203             reject = lambda k, v: k not in keep_keys and (
3204                 k.startswith('_') or k in remove_keys or v is None)
3205         else:
3206             reject = lambda k, v: k in remove_keys
3207
3208         def filter_fn(obj):
3209             if isinstance(obj, dict):
3210                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3211             elif isinstance(obj, (list, tuple, set, LazyList)):
3212                 return list(map(filter_fn, obj))
3213             elif obj is None or isinstance(obj, (str, int, float, bool)):
3214                 return obj
3215             else:
3216                 return repr(obj)
3217
3218         return filter_fn(info_dict)
3219
3220     @staticmethod
3221     def filter_requested_info(info_dict, actually_filter=True):
3222         ''' Alias of sanitize_info for backward compatibility '''
3223         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3224
3225     @staticmethod
3226     def post_extract(info_dict):
3227         def actual_post_extract(info_dict):
3228             if info_dict.get('_type') in ('playlist', 'multi_video'):
3229                 for video_dict in info_dict.get('entries', {}):
3230                     actual_post_extract(video_dict or {})
3231                 return
3232
3233             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3234             extra = post_extractor().items()
3235             info_dict.update(extra)
3236             info_dict.pop('__post_extractor', None)
3237
3238             original_infodict = info_dict.get('__original_infodict') or {}
3239             original_infodict.update(extra)
3240             original_infodict.pop('__post_extractor', None)
3241
3242         actual_post_extract(info_dict or {})
3243
3244
3245     def run_pp(self, pp, infodict):
3246         files_to_delete = []
3247         if '__files_to_move' not in infodict:
3248             infodict['__files_to_move'] = {}
3249         try:
3250             files_to_delete, infodict = pp.run(infodict)
3251         except PostProcessingError as e:
3252             # Must be True and not 'only_download'
3253             if self.params.get('ignoreerrors') is True:
3254                 self.report_error(e)
3255                 return infodict
3256             raise
3257
3258         if not files_to_delete:
3259             return infodict
3260         if self.params.get('keepvideo', False):
3261             for f in files_to_delete:
3262                 infodict['__files_to_move'].setdefault(f, '')
3263         else:
3264             for old_filename in set(files_to_delete):
3265                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3266                 try:
3267                     os.remove(encodeFilename(old_filename))
3268                 except (IOError, OSError):
3269                     self.report_warning('Unable to remove downloaded original file')
3270                 if old_filename in infodict['__files_to_move']:
3271                     del infodict['__files_to_move'][old_filename]
3272         return infodict
3273
3274     def run_all_pps(self, key, info, *, additional_pps=None):
3275         for tmpl in self.params['forceprint'].get(key, []):
3276             self._forceprint(tmpl, info)
3277         for pp in (additional_pps or []) + self._pps[key]:
3278             info = self.run_pp(info)
3279         return info
3280
3281     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3282         info = dict(ie_info)
3283         info['__files_to_move'] = files_to_move or {}
3284         info = self.run_all_pps(key, info)
3285         return info, info.pop('__files_to_move', None)
3286
3287     def post_process(self, filename, info, files_to_move=None):
3288         """Run all the postprocessors on the given file."""
3289         info['filepath'] = filename
3290         info['__files_to_move'] = files_to_move or {}
3291         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3292         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3293         del info['__files_to_move']
3294         return self.run_all_pps('after_move', info)
3295
3296     def _make_archive_id(self, info_dict):
3297         video_id = info_dict.get('id')
3298         if not video_id:
3299             return
3300         # Future-proof against any change in case
3301         # and backwards compatibility with prior versions
3302         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3303         if extractor is None:
3304             url = str_or_none(info_dict.get('url'))
3305             if not url:
3306                 return
3307             # Try to find matching extractor for the URL and take its ie_key
3308             for ie_key, ie in self._ies.items():
3309                 if ie.suitable(url):
3310                     extractor = ie_key
3311                     break
3312             else:
3313                 return
3314         return '%s %s' % (extractor.lower(), video_id)
3315
3316     def in_download_archive(self, info_dict):
3317         fn = self.params.get('download_archive')
3318         if fn is None:
3319             return False
3320
3321         vid_id = self._make_archive_id(info_dict)
3322         if not vid_id:
3323             return False  # Incomplete video information
3324
3325         return vid_id in self.archive
3326
3327     def record_download_archive(self, info_dict):
3328         fn = self.params.get('download_archive')
3329         if fn is None:
3330             return
3331         vid_id = self._make_archive_id(info_dict)
3332         assert vid_id
3333         self.write_debug(f'Adding to archive: {vid_id}')
3334         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3335             archive_file.write(vid_id + '\n')
3336         self.archive.add(vid_id)
3337
3338     @staticmethod
3339     def format_resolution(format, default='unknown'):
3340         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3341             return 'audio only'
3342         if format.get('resolution') is not None:
3343             return format['resolution']
3344         if format.get('width') and format.get('height'):
3345             return '%dx%d' % (format['width'], format['height'])
3346         elif format.get('height'):
3347             return '%sp' % format['height']
3348         elif format.get('width'):
3349             return '%dx?' % format['width']
3350         return default
3351
3352     def _format_note(self, fdict):
3353         res = ''
3354         if fdict.get('ext') in ['f4f', 'f4m']:
3355             res += '(unsupported)'
3356         if fdict.get('language'):
3357             if res:
3358                 res += ' '
3359             res += '[%s]' % fdict['language']
3360         if fdict.get('format_note') is not None:
3361             if res:
3362                 res += ' '
3363             res += fdict['format_note']
3364         if fdict.get('tbr') is not None:
3365             if res:
3366                 res += ', '
3367             res += '%4dk' % fdict['tbr']
3368         if fdict.get('container') is not None:
3369             if res:
3370                 res += ', '
3371             res += '%s container' % fdict['container']
3372         if (fdict.get('vcodec') is not None
3373                 and fdict.get('vcodec') != 'none'):
3374             if res:
3375                 res += ', '
3376             res += fdict['vcodec']
3377             if fdict.get('vbr') is not None:
3378                 res += '@'
3379         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3380             res += 'video@'
3381         if fdict.get('vbr') is not None:
3382             res += '%4dk' % fdict['vbr']
3383         if fdict.get('fps') is not None:
3384             if res:
3385                 res += ', '
3386             res += '%sfps' % fdict['fps']
3387         if fdict.get('acodec') is not None:
3388             if res:
3389                 res += ', '
3390             if fdict['acodec'] == 'none':
3391                 res += 'video only'
3392             else:
3393                 res += '%-5s' % fdict['acodec']
3394         elif fdict.get('abr') is not None:
3395             if res:
3396                 res += ', '
3397             res += 'audio'
3398         if fdict.get('abr') is not None:
3399             res += '@%3dk' % fdict['abr']
3400         if fdict.get('asr') is not None:
3401             res += ' (%5dHz)' % fdict['asr']
3402         if fdict.get('filesize') is not None:
3403             if res:
3404                 res += ', '
3405             res += format_bytes(fdict['filesize'])
3406         elif fdict.get('filesize_approx') is not None:
3407             if res:
3408                 res += ', '
3409             res += '~' + format_bytes(fdict['filesize_approx'])
3410         return res
3411
3412     def _list_format_headers(self, *headers):
3413         if self.params.get('listformats_table', True) is not False:
3414             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3415         return headers
3416
3417     def list_formats(self, info_dict):
3418         if not info_dict.get('formats') and not info_dict.get('url'):
3419             self.to_screen('%s has no formats' % info_dict['id'])
3420             return
3421         self.to_screen('[info] Available formats for %s:' % info_dict['id'])
3422
3423         formats = info_dict.get('formats', [info_dict])
3424         new_format = self.params.get('listformats_table', True) is not False
3425         if new_format:
3426             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3427             table = [
3428                 [
3429                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3430                     format_field(f, 'ext'),
3431                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3432                     format_field(f, 'fps', '\t%d'),
3433                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3434                     delim,
3435                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3436                     format_field(f, 'tbr', '\t%dk'),
3437                     shorten_protocol_name(f.get('protocol', '')),
3438                     delim,
3439                     format_field(f, 'vcodec', default='unknown').replace(
3440                         'none',
3441                         'images' if f.get('acodec') == 'none'
3442                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3443                     format_field(f, 'vbr', '\t%dk'),
3444                     format_field(f, 'acodec', default='unknown').replace(
3445                         'none',
3446                         '' if f.get('vcodec') == 'none'
3447                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3448                     format_field(f, 'abr', '\t%dk'),
3449                     format_field(f, 'asr', '\t%dHz'),
3450                     join_nonempty(
3451                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3452                         format_field(f, 'language', '[%s]'),
3453                         join_nonempty(
3454                             format_field(f, 'format_note'),
3455                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3456                             delim=', '),
3457                         delim=' '),
3458                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3459             header_line = self._list_format_headers(
3460                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3461                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3462         else:
3463             table = [
3464                 [
3465                     format_field(f, 'format_id'),
3466                     format_field(f, 'ext'),
3467                     self.format_resolution(f),
3468                     self._format_note(f)]
3469                 for f in formats
3470                 if f.get('preference') is None or f['preference'] >= -1000]
3471             header_line = ['format code', 'extension', 'resolution', 'note']
3472
3473         self.to_stdout(render_table(
3474             header_line, table,
3475             extra_gap=(0 if new_format else 1),
3476             hide_empty=new_format,
3477             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3478
3479     def list_thumbnails(self, info_dict):
3480         thumbnails = list(info_dict.get('thumbnails'))
3481         if not thumbnails:
3482             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3483             return
3484
3485         self.to_screen(
3486             '[info] Thumbnails for %s:' % info_dict['id'])
3487         self.to_stdout(render_table(
3488             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3489             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3490
3491     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3492         if not subtitles:
3493             self.to_screen('%s has no %s' % (video_id, name))
3494             return
3495         self.to_screen(
3496             'Available %s for %s:' % (name, video_id))
3497
3498         def _row(lang, formats):
3499             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3500             if len(set(names)) == 1:
3501                 names = [] if names[0] == 'unknown' else names[:1]
3502             return [lang, ', '.join(names), ', '.join(exts)]
3503
3504         self.to_stdout(render_table(
3505             self._list_format_headers('Language', 'Name', 'Formats'),
3506             [_row(lang, formats) for lang, formats in subtitles.items()],
3507             hide_empty=True))
3508
3509     def urlopen(self, req):
3510         """ Start an HTTP download """
3511         if isinstance(req, compat_basestring):
3512             req = sanitized_Request(req)
3513         return self._opener.open(req, timeout=self._socket_timeout)
3514
3515     def print_debug_header(self):
3516         if not self.params.get('verbose'):
3517             return
3518
3519         def get_encoding(stream):
3520             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3521             if not supports_terminal_sequences(stream):
3522                 from .compat import WINDOWS_VT_MODE
3523                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3524             return ret
3525
3526         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3527             locale.getpreferredencoding(),
3528             sys.getfilesystemencoding(),
3529             get_encoding(self._screen_file), get_encoding(self._err_file),
3530             self.get_encoding())
3531
3532         logger = self.params.get('logger')
3533         if logger:
3534             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3535             write_debug(encoding_str)
3536         else:
3537             write_string(f'[debug] {encoding_str}\n', encoding=None)
3538             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3539
3540         source = detect_variant()
3541         write_debug(join_nonempty(
3542             'yt-dlp version', __version__,
3543             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3544             '' if source == 'unknown' else f'({source})',
3545             delim=' '))
3546         if not _LAZY_LOADER:
3547             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3548                 write_debug('Lazy loading extractors is forcibly disabled')
3549             else:
3550                 write_debug('Lazy loading extractors is disabled')
3551         if plugin_extractors or plugin_postprocessors:
3552             write_debug('Plugins: %s' % [
3553                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3554                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3555         if self.params.get('compat_opts'):
3556             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3557
3558         if source == 'source':
3559             try:
3560                 sp = Popen(
3561                     ['git', 'rev-parse', '--short', 'HEAD'],
3562                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3563                     cwd=os.path.dirname(os.path.abspath(__file__)))
3564                 out, err = sp.communicate_or_kill()
3565                 out = out.decode().strip()
3566                 if re.match('[0-9a-f]+', out):
3567                     write_debug('Git HEAD: %s' % out)
3568             except Exception:
3569                 try:
3570                     sys.exc_clear()
3571                 except Exception:
3572                     pass
3573
3574         def python_implementation():
3575             impl_name = platform.python_implementation()
3576             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3577                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3578             return impl_name
3579
3580         write_debug('Python version %s (%s %s) - %s' % (
3581             platform.python_version(),
3582             python_implementation(),
3583             platform.architecture()[0],
3584             platform_name()))
3585
3586         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3587         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3588         if ffmpeg_features:
3589             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3590
3591         exe_versions['rtmpdump'] = rtmpdump_version()
3592         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3593         exe_str = ', '.join(
3594             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3595         ) or 'none'
3596         write_debug('exe versions: %s' % exe_str)
3597
3598         from .downloader.websocket import has_websockets
3599         from .postprocessor.embedthumbnail import has_mutagen
3600         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3601
3602         lib_str = join_nonempty(
3603             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3604             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3605             has_mutagen and 'mutagen',
3606             SQLITE_AVAILABLE and 'sqlite',
3607             has_websockets and 'websockets',
3608             delim=', ') or 'none'
3609         write_debug('Optional libraries: %s' % lib_str)
3610
3611         proxy_map = {}
3612         for handler in self._opener.handlers:
3613             if hasattr(handler, 'proxies'):
3614                 proxy_map.update(handler.proxies)
3615         write_debug(f'Proxy map: {proxy_map}')
3616
3617         # Not implemented
3618         if False and self.params.get('call_home'):
3619             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3620             write_debug('Public IP address: %s' % ipaddr)
3621             latest_version = self.urlopen(
3622                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3623             if version_tuple(latest_version) > version_tuple(__version__):
3624                 self.report_warning(
3625                     'You are using an outdated version (newest version: %s)! '
3626                     'See https://yt-dl.org/update if you need help updating.' %
3627                     latest_version)
3628
3629     def _setup_opener(self):
3630         timeout_val = self.params.get('socket_timeout')
3631         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3632
3633         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3634         opts_cookiefile = self.params.get('cookiefile')
3635         opts_proxy = self.params.get('proxy')
3636
3637         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3638
3639         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3640         if opts_proxy is not None:
3641             if opts_proxy == '':
3642                 proxies = {}
3643             else:
3644                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3645         else:
3646             proxies = compat_urllib_request.getproxies()
3647             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3648             if 'http' in proxies and 'https' not in proxies:
3649                 proxies['https'] = proxies['http']
3650         proxy_handler = PerRequestProxyHandler(proxies)
3651
3652         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3653         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3654         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3655         redirect_handler = YoutubeDLRedirectHandler()
3656         data_handler = compat_urllib_request_DataHandler()
3657
3658         # When passing our own FileHandler instance, build_opener won't add the
3659         # default FileHandler and allows us to disable the file protocol, which
3660         # can be used for malicious purposes (see
3661         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3662         file_handler = compat_urllib_request.FileHandler()
3663
3664         def file_open(*args, **kwargs):
3665             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3666         file_handler.file_open = file_open
3667
3668         opener = compat_urllib_request.build_opener(
3669             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3670
3671         # Delete the default user-agent header, which would otherwise apply in
3672         # cases where our custom HTTP handler doesn't come into play
3673         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3674         opener.addheaders = []
3675         self._opener = opener
3676
3677     def encode(self, s):
3678         if isinstance(s, bytes):
3679             return s  # Already encoded
3680
3681         try:
3682             return s.encode(self.get_encoding())
3683         except UnicodeEncodeError as err:
3684             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3685             raise
3686
3687     def get_encoding(self):
3688         encoding = self.params.get('encoding')
3689         if encoding is None:
3690             encoding = preferredencoding()
3691         return encoding
3692
3693     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3694         ''' Write infojson and returns True = written, False = skip, None = error '''
3695         if overwrite is None:
3696             overwrite = self.params.get('overwrites', True)
3697         if not self.params.get('writeinfojson'):
3698             return False
3699         elif not infofn:
3700             self.write_debug(f'Skipping writing {label} infojson')
3701             return False
3702         elif not self._ensure_dir_exists(infofn):
3703             return None
3704         elif not overwrite and os.path.exists(infofn):
3705             self.to_screen(f'[info] {label.title()} metadata is already present')
3706         else:
3707             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3708             try:
3709                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3710             except (OSError, IOError):
3711                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3712                 return None
3713         return True
3714
3715     def _write_description(self, label, ie_result, descfn):
3716         ''' Write description and returns True = written, False = skip, None = error '''
3717         if not self.params.get('writedescription'):
3718             return False
3719         elif not descfn:
3720             self.write_debug(f'Skipping writing {label} description')
3721             return False
3722         elif not self._ensure_dir_exists(descfn):
3723             return None
3724         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3725             self.to_screen(f'[info] {label.title()} description is already present')
3726         elif ie_result.get('description') is None:
3727             self.report_warning(f'There\'s no {label} description to write')
3728             return False
3729         else:
3730             try:
3731                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3732                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3733                     descfile.write(ie_result['description'])
3734             except (OSError, IOError):
3735                 self.report_error(f'Cannot write {label} description file {descfn}')
3736                 return None
3737         return True
3738
3739     def _write_subtitles(self, info_dict, filename):
3740         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3741         ret = []
3742         subtitles = info_dict.get('requested_subtitles')
3743         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3744             # subtitles download errors are already managed as troubles in relevant IE
3745             # that way it will silently go on when used with unsupporting IE
3746             return ret
3747
3748         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3749         if not sub_filename_base:
3750             self.to_screen('[info] Skipping writing video subtitles')
3751             return ret
3752         for sub_lang, sub_info in subtitles.items():
3753             sub_format = sub_info['ext']
3754             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3755             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3756             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3757                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3758                 sub_info['filepath'] = sub_filename
3759                 ret.append((sub_filename, sub_filename_final))
3760                 continue
3761
3762             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3763             if sub_info.get('data') is not None:
3764                 try:
3765                     # Use newline='' to prevent conversion of newline characters
3766                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3767                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3768                         subfile.write(sub_info['data'])
3769                     sub_info['filepath'] = sub_filename
3770                     ret.append((sub_filename, sub_filename_final))
3771                     continue
3772                 except (OSError, IOError):
3773                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3774                     return None
3775
3776             try:
3777                 sub_copy = sub_info.copy()
3778                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3779                 self.dl(sub_filename, sub_copy, subtitle=True)
3780                 sub_info['filepath'] = sub_filename
3781                 ret.append((sub_filename, sub_filename_final))
3782             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3783                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3784                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3785                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3786         return ret
3787
3788     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3789         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3790         write_all = self.params.get('write_all_thumbnails', False)
3791         thumbnails, ret = [], []
3792         if write_all or self.params.get('writethumbnail', False):
3793             thumbnails = info_dict.get('thumbnails') or []
3794         multiple = write_all and len(thumbnails) > 1
3795
3796         if thumb_filename_base is None:
3797             thumb_filename_base = filename
3798         if thumbnails and not thumb_filename_base:
3799             self.write_debug(f'Skipping writing {label} thumbnail')
3800             return ret
3801
3802         for idx, t in list(enumerate(thumbnails))[::-1]:
3803             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3804             thumb_display_id = f'{label} thumbnail {t["id"]}'
3805             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3806             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3807
3808             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3809                 ret.append((thumb_filename, thumb_filename_final))
3810                 t['filepath'] = thumb_filename
3811                 self.to_screen('[info] %s is already present' % (
3812                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3813             else:
3814                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3815                 try:
3816                     uf = self.urlopen(t['url'])
3817                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3818                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3819                         shutil.copyfileobj(uf, thumbf)
3820                     ret.append((thumb_filename, thumb_filename_final))
3821                     t['filepath'] = thumb_filename
3822                 except network_exceptions as err:
3823                     thumbnails.pop(idx)
3824                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3825             if ret and not write_all:
3826                 break
3827         return ret