yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     join_nonempty,
  79     LazyList,
  80     LINK_TEMPLATES,
  81     locked_file,
  82     make_dir,
  83     make_HTTPS_handler,
  84     MaxDownloadsReached,
  85     network_exceptions,
  86     number_of_digits,
  87     orderedSet,
  88     OUTTMPL_TYPES,
  89     PagedList,
  90     parse_filesize,
  91     PerRequestProxyHandler,
  92     platform_name,
  93     Popen,
  94     POSTPROCESS_WHEN,
  95     PostProcessingError,
  96     preferredencoding,
  97     prepend_extension,
  98     ReExtractInfo,
  99     register_socks_protocols,
 100     RejectedVideoReached,
 101     remove_terminal_sequences,
 102     render_table,
 103     replace_extension,
 104     SameFileError,
 105     sanitize_filename,
 106     sanitize_path,
 107     sanitize_url,
 108     sanitized_Request,
 109     std_headers,
 110     STR_FORMAT_RE_TMPL,
 111     STR_FORMAT_TYPES,
 112     str_or_none,
 113     strftime_or_none,
 114     subtitles_filename,
 115     supports_terminal_sequences,
 116     timetuple_from_msec,
 117     to_high_limit_path,
 118     traverse_obj,
 119     try_get,
 120     UnavailableVideoError,
 121     url_basename,
 122     variadic,
 123     version_tuple,
 124     write_json_file,
 125     write_string,
 126     YoutubeDLCookieProcessor,
 127     YoutubeDLHandler,
 128     YoutubeDLRedirectHandler,
 129 )
 130 from .cache import Cache
 131 from .minicurses import format_text
 132 from .extractor import (
 133     gen_extractor_classes,
 134     get_info_extractor,
 135     _LAZY_LOADER,
 136     _PLUGIN_CLASSES as plugin_extractors
 137 )
 138 from .extractor.openload import PhantomJSwrapper
 139 from .downloader import (
 140     FFmpegFD,
 141     get_suitable_downloader,
 142     shorten_protocol_name
 143 )
 144 from .downloader.rtmp import rtmpdump_version
 145 from .postprocessor import (
 146     get_postprocessor,
 147     EmbedThumbnailPP,
 148     FFmpegFixupDuplicateMoovPP,
 149     FFmpegFixupDurationPP,
 150     FFmpegFixupM3u8PP,
 151     FFmpegFixupM4aPP,
 152     FFmpegFixupStretchedPP,
 153     FFmpegFixupTimestampPP,
 154     FFmpegMergerPP,
 155     FFmpegPostProcessor,
 156     MoveFilesAfterDownloadPP,
 157     _PLUGIN_CLASSES as plugin_postprocessors
 158 )
 159 from .update import detect_variant
 160 from .version import __version__, RELEASE_GIT_HEAD
 161
 162 if compat_os_name == 'nt':
 163     import ctypes
 164
 165
 166 class YoutubeDL(object):
 167     """YoutubeDL class.
 168
 169     YoutubeDL objects are the ones responsible of downloading the
 170     actual video file and writing it to disk if the user has requested
 171     it, among some other tasks. In most cases there should be one per
 172     program. As, given a video URL, the downloader doesn't know how to
 173     extract all the needed information, task that InfoExtractors do, it
 174     has to pass the URL to one of them.
 175
 176     For this, YoutubeDL objects have a method that allows
 177     InfoExtractors to be registered in a given order. When it is passed
 178     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 179     finds that reports being able to handle it. The InfoExtractor extracts
 180     all the information about the video or videos the URL refers to, and
 181     YoutubeDL process the extracted information, possibly using a File
 182     Downloader to download the video.
 183
 184     YoutubeDL objects accept a lot of parameters. In order not to saturate
 185     the object constructor with arguments, it receives a dictionary of
 186     options instead. These options are available through the params
 187     attribute for the InfoExtractors to use. The YoutubeDL also
 188     registers itself as the downloader in charge for the InfoExtractors
 189     that are added to it, so this is a "mutual registration".
 190
 191     Available options:
 192
 193     username:          Username for authentication purposes.
 194     password:          Password for authentication purposes.
 195     videopassword:     Password for accessing a video.
 196     ap_mso:            Adobe Pass multiple-system operator identifier.
 197     ap_username:       Multiple-system operator account username.
 198     ap_password:       Multiple-system operator account password.
 199     usenetrc:          Use netrc for authentication instead.
 200     verbose:           Print additional info to stdout.
 201     quiet:             Do not print messages to stdout.
 202     no_warnings:       Do not print out anything for warnings.
 203     forceprint:        A dict with keys video/playlist mapped to
 204                        a list of templates to force print to stdout
 205                        For compatibility, a single list is also accepted
 206     forceurl:          Force printing final URL. (Deprecated)
 207     forcetitle:        Force printing title. (Deprecated)
 208     forceid:           Force printing ID. (Deprecated)
 209     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 210     forcedescription:  Force printing description. (Deprecated)
 211     forcefilename:     Force printing final filename. (Deprecated)
 212     forceduration:     Force printing duration. (Deprecated)
 213     forcejson:         Force printing info_dict as JSON.
 214     dump_single_json:  Force printing the info_dict of the whole playlist
 215                        (or video) as a single JSON line.
 216     force_write_download_archive: Force writing download archive regardless
 217                        of 'skip_download' or 'simulate'.
 218     simulate:          Do not download the video files. If unset (or None),
 219                        simulate only if listsubtitles, listformats or list_thumbnails is used
 220     format:            Video format code. see "FORMAT SELECTION" for more details.
 221                        You can also pass a function. The function takes 'ctx' as
 222                        argument and returns the formats to download.
 223                        See "build_format_selector" for an implementation
 224     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 225     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 226                        extracting metadata even if the video is not actually
 227                        available for download (experimental)
 228     format_sort:       A list of fields by which to sort the video formats.
 229                        See "Sorting Formats" for more details.
 230     format_sort_force: Force the given format_sort. see "Sorting Formats"
 231                        for more details.
 232     allow_multiple_video_streams:   Allow multiple video streams to be merged
 233                        into a single file
 234     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 235                        into a single file
 236     check_formats      Whether to test if the formats are downloadable.
 237                        Can be True (check all), False (check none),
 238                        'selected' (check selected formats),
 239                        or None (check only if requested by extractor)
 240     paths:             Dictionary of output paths. The allowed keys are 'home'
 241                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 242     outtmpl:           Dictionary of templates for output names. Allowed keys
 243                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 244                        For compatibility with youtube-dl, a single string can also be used
 245     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 246     restrictfilenames: Do not allow "&" and spaces in file names
 247     trim_file_name:    Limit length of filename (extension excluded)
 248     windowsfilenames:  Force the filenames to be windows compatible
 249     ignoreerrors:      Do not stop on download/postprocessing errors.
 250                        Can be 'only_download' to ignore only download errors.
 251                        Default is 'only_download' for CLI, but False for API
 252     skip_playlist_after_errors: Number of allowed failures until the rest of
 253                        the playlist is skipped
 254     force_generic_extractor: Force downloader to use the generic extractor
 255     overwrites:        Overwrite all video and metadata files if True,
 256                        overwrite only non-video files if None
 257                        and don't overwrite any file if False
 258                        For compatibility with youtube-dl,
 259                        "nooverwrites" may also be used instead
 260     playliststart:     Playlist item to start at.
 261     playlistend:       Playlist item to end at.
 262     playlist_items:    Specific indices of playlist to download.
 263     playlistreverse:   Download playlist items in reverse order.
 264     playlistrandom:    Download playlist items in random order.
 265     matchtitle:        Download only matching titles.
 266     rejecttitle:       Reject downloads for matching titles.
 267     logger:            Log messages to a logging.Logger instance.
 268     logtostderr:       Log messages to stderr instead of stdout.
 269     consoletitle:       Display progress in console window's titlebar.
 270     writedescription:  Write the video description to a .description file
 271     writeinfojson:     Write the video description to a .info.json file
 272     clean_infojson:    Remove private fields from the infojson
 273     getcomments:       Extract video comments. This will not be written to disk
 274                        unless writeinfojson is also given
 275     writeannotations:  Write the video annotations to a .annotations.xml file
 276     writethumbnail:    Write the thumbnail image to a file
 277     allow_playlist_files: Whether to write playlists' description, infojson etc
 278                        also to disk when using the 'write*' options
 279     write_all_thumbnails:  Write all thumbnail formats to files
 280     writelink:         Write an internet shortcut file, depending on the
 281                        current platform (.url/.webloc/.desktop)
 282     writeurllink:      Write a Windows internet shortcut file (.url)
 283     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 284     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 285     writesubtitles:    Write the video subtitles to a file
 286     writeautomaticsub: Write the automatically generated subtitles to a file
 287     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 288                        Downloads all the subtitles of the video
 289                        (requires writesubtitles or writeautomaticsub)
 290     listsubtitles:     Lists all available subtitles for the video
 291     subtitlesformat:   The format code for subtitles
 292     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 293                        The list may contain "all" to refer to all the available
 294                        subtitles. The language can be prefixed with a "-" to
 295                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 296     keepvideo:         Keep the video file after post-processing
 297     daterange:         A DateRange object, download only if the upload_date is in the range.
 298     skip_download:     Skip the actual download of the video file
 299     cachedir:          Location of the cache files in the filesystem.
 300                        False to disable filesystem cache.
 301     noplaylist:        Download single video instead of a playlist if in doubt.
 302     age_limit:         An integer representing the user's age in years.
 303                        Unsuitable videos for the given age are skipped.
 304     min_views:         An integer representing the minimum view count the video
 305                        must have in order to not be skipped.
 306                        Videos without view count information are always
 307                        downloaded. None for no limit.
 308     max_views:         An integer representing the maximum view count.
 309                        Videos that are more popular than that are not
 310                        downloaded.
 311                        Videos without view count information are always
 312                        downloaded. None for no limit.
 313     download_archive:  File name of a file where all downloads are recorded.
 314                        Videos already present in the file are not downloaded
 315                        again.
 316     break_on_existing: Stop the download process after attempting to download a
 317                        file that is in the archive.
 318     break_on_reject:   Stop the download process when encountering a video that
 319                        has been filtered out.
 320     break_per_url:     Whether break_on_reject and break_on_existing
 321                        should act on each input URL as opposed to for the entire queue
 322     cookiefile:        File name where cookies should be read from and dumped to
 323     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 324                        name/pathfrom where cookies are loaded, and the name of the
 325                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 326     nocheckcertificate:  Do not verify SSL certificates
 327     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 328                        At the moment, this is only supported by YouTube.
 329     proxy:             URL of the proxy server to use
 330     geo_verification_proxy:  URL of the proxy to use for IP address verification
 331                        on geo-restricted sites.
 332     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 333     bidi_workaround:   Work around buggy terminals without bidirectional text
 334                        support, using fridibi
 335     debug_printtraffic:Print out sent and received HTTP traffic
 336     include_ads:       Download ads as well (deprecated)
 337     default_search:    Prepend this string if an input url is not valid.
 338                        'auto' for elaborate guessing
 339     encoding:          Use this encoding instead of the system-specified.
 340     extract_flat:      Do not resolve URLs, return the immediate result.
 341                        Pass in 'in_playlist' to only show this behavior for
 342                        playlist items.
 343     wait_for_video:    If given, wait for scheduled streams to become available.
 344                        The value should be a tuple containing the range
 345                        (min_secs, max_secs) to wait between retries
 346     postprocessors:    A list of dictionaries, each with an entry
 347                        * key:  The name of the postprocessor. See
 348                                yt_dlp/postprocessor/__init__.py for a list.
 349                        * when: When to run the postprocessor. Can be one of
 350                                pre_process|before_dl|post_process|after_move.
 351                                Assumed to be 'post_process' if not given
 352     post_hooks:        Deprecated - Register a custom postprocessor instead
 353                        A list of functions that get called as the final step
 354                        for each video file, after all postprocessors have been
 355                        called. The filename will be passed as the only argument.
 356     progress_hooks:    A list of functions that get called on download
 357                        progress, with a dictionary with the entries
 358                        * status: One of "downloading", "error", or "finished".
 359                                  Check this first and ignore unknown values.
 360                        * info_dict: The extracted info_dict
 361
 362                        If status is one of "downloading", or "finished", the
 363                        following properties may also be present:
 364                        * filename: The final filename (always present)
 365                        * tmpfilename: The filename we're currently writing to
 366                        * downloaded_bytes: Bytes on disk
 367                        * total_bytes: Size of the whole file, None if unknown
 368                        * total_bytes_estimate: Guess of the eventual file size,
 369                                                None if unavailable.
 370                        * elapsed: The number of seconds since download started.
 371                        * eta: The estimated time in seconds, None if unknown
 372                        * speed: The download speed in bytes/second, None if
 373                                 unknown
 374                        * fragment_index: The counter of the currently
 375                                          downloaded video fragment.
 376                        * fragment_count: The number of fragments (= individual
 377                                          files that will be merged)
 378
 379                        Progress hooks are guaranteed to be called at least once
 380                        (with status "finished") if the download is successful.
 381     postprocessor_hooks:  A list of functions that get called on postprocessing
 382                        progress, with a dictionary with the entries
 383                        * status: One of "started", "processing", or "finished".
 384                                  Check this first and ignore unknown values.
 385                        * postprocessor: Name of the postprocessor
 386                        * info_dict: The extracted info_dict
 387
 388                        Progress hooks are guaranteed to be called at least twice
 389                        (with status "started" and "finished") if the processing is successful.
 390     merge_output_format: Extension to use when merging formats.
 391     final_ext:         Expected final extension; used to detect when the file was
 392                        already downloaded and converted
 393     fixup:             Automatically correct known faults of the file.
 394                        One of:
 395                        - "never": do nothing
 396                        - "warn": only emit a warning
 397                        - "detect_or_warn": check whether we can do anything
 398                                            about it, warn otherwise (default)
 399     source_address:    Client-side IP address to bind to.
 400     call_home:         Boolean, true iff we are allowed to contact the
 401                        yt-dlp servers for debugging. (BROKEN)
 402     sleep_interval_requests: Number of seconds to sleep between requests
 403                        during extraction
 404     sleep_interval:    Number of seconds to sleep before each download when
 405                        used alone or a lower bound of a range for randomized
 406                        sleep before each download (minimum possible number
 407                        of seconds to sleep) when used along with
 408                        max_sleep_interval.
 409     max_sleep_interval:Upper bound of a range for randomized sleep before each
 410                        download (maximum possible number of seconds to sleep).
 411                        Must only be used along with sleep_interval.
 412                        Actual sleep time will be a random float from range
 413                        [sleep_interval; max_sleep_interval].
 414     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 415     listformats:       Print an overview of available video formats and exit.
 416     list_thumbnails:   Print a table of all thumbnails and exit.
 417     match_filter:      A function that gets called with the info_dict of
 418                        every video.
 419                        If it returns a message, the video is ignored.
 420                        If it returns None, the video is downloaded.
 421                        match_filter_func in utils.py is one example for this.
 422     no_color:          Do not emit color codes in output.
 423     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 424                        HTTP header
 425     geo_bypass_country:
 426                        Two-letter ISO 3166-2 country code that will be used for
 427                        explicit geographic restriction bypassing via faking
 428                        X-Forwarded-For HTTP header
 429     geo_bypass_ip_block:
 430                        IP range in CIDR notation that will be used similarly to
 431                        geo_bypass_country
 432
 433     The following options determine which downloader is picked:
 434     external_downloader: A dictionary of protocol keys and the executable of the
 435                        external downloader to use for it. The allowed protocols
 436                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 437                        Set the value to 'native' to use the native downloader
 438     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 439                        or {'m3u8': 'ffmpeg'} instead.
 440                        Use the native HLS downloader instead of ffmpeg/avconv
 441                        if True, otherwise use ffmpeg/avconv if False, otherwise
 442                        use downloader suggested by extractor if None.
 443     compat_opts:       Compatibility options. See "Differences in default behavior".
 444                        The following options do not work when used through the API:
 445                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 446                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 447                        Refer __init__.py for their implementation
 448     progress_template: Dictionary of templates for progress outputs.
 449                        Allowed keys are 'download', 'postprocess',
 450                        'download-title' (console title) and 'postprocess-title'.
 451                        The template is mapped on a dictionary with keys 'progress' and 'info'
 452
 453     The following parameters are not used by YoutubeDL itself, they are used by
 454     the downloader (see yt_dlp/downloader/common.py):
 455     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 456     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 457     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 458     external_downloader_args, concurrent_fragment_downloads.
 459
 460     The following options are used by the post processors:
 461     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 462                        otherwise prefer ffmpeg. (avconv support is deprecated)
 463     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 464                        to the binary or its containing directory.
 465     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 466                        and a list of additional command-line arguments for the
 467                        postprocessor/executable. The dict can also have "PP+EXE" keys
 468                        which are used when the given exe is used by the given PP.
 469                        Use 'default' as the name for arguments to passed to all PP
 470                        For compatibility with youtube-dl, a single list of args
 471                        can also be used
 472
 473     The following options are used by the extractors:
 474     extractor_retries: Number of times to retry for known errors
 475     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 476     hls_split_discontinuity: Split HLS playlists to different formats at
 477                        discontinuities such as ad breaks (default: False)
 478     extractor_args:    A dictionary of arguments to be passed to the extractors.
 479                        See "EXTRACTOR ARGUMENTS" for details.
 480                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 481     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 482                        If True (default), DASH manifests and related
 483                        data will be downloaded and processed by extractor.
 484                        You can reduce network I/O by disabling it if you don't
 485                        care about DASH. (only for youtube)
 486     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 487                        If True (default), HLS manifests and related
 488                        data will be downloaded and processed by extractor.
 489                        You can reduce network I/O by disabling it if you don't
 490                        care about HLS. (only for youtube)
 491     """
 492
 493     _NUMERIC_FIELDS = set((
 494         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 495         'timestamp', 'release_timestamp',
 496         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 497         'average_rating', 'comment_count', 'age_limit',
 498         'start_time', 'end_time',
 499         'chapter_number', 'season_number', 'episode_number',
 500         'track_number', 'disc_number', 'release_year',
 501     ))
 502
 503     _format_selection_exts = {
 504         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 505         'video': {'mp4', 'flv', 'webm', '3gp'},
 506         'storyboards': {'mhtml'},
 507     }
 508
 509     params = None
 510     _ies = {}
 511     _pps = {k: [] for k in POSTPROCESS_WHEN}
 512     _printed_messages = set()
 513     _first_webpage_request = True
 514     _download_retcode = None
 515     _num_downloads = None
 516     _playlist_level = 0
 517     _playlist_urls = set()
 518     _screen_file = None
 519
 520     def __init__(self, params=None, auto_init=True):
 521         """Create a FileDownloader object with the given options.
 522         @param auto_init    Whether to load the default extractors and print header (if verbose).
 523                             Set to 'no_verbose_header' to not print the header
 524         """
 525         if params is None:
 526             params = {}
 527         self._ies = {}
 528         self._ies_instances = {}
 529         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 530         self._printed_messages = set()
 531         self._first_webpage_request = True
 532         self._post_hooks = []
 533         self._progress_hooks = []
 534         self._postprocessor_hooks = []
 535         self._download_retcode = 0
 536         self._num_downloads = 0
 537         self._num_videos = 0
 538         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 539         self._err_file = sys.stderr
 540         self.params = params
 541         self.cache = Cache(self)
 542
 543         windows_enable_vt_mode()
 544         self._allow_colors = {
 545             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 546             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 547         }
 548
 549         if sys.version_info < (3, 6):
 550             self.report_warning(
 551                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 552
 553         if self.params.get('allow_unplayable_formats'):
 554             self.report_warning(
 555                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 556                 'This is a developer option intended for debugging. \n'
 557                 '         If you experience any issues while using this option, '
 558                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 559
 560         def check_deprecated(param, option, suggestion):
 561             if self.params.get(param) is not None:
 562                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 563                 return True
 564             return False
 565
 566         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 567             if self.params.get('geo_verification_proxy') is None:
 568                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 569
 570         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 571         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 572         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 573
 574         for msg in self.params.get('_warnings', []):
 575             self.report_warning(msg)
 576         for msg in self.params.get('_deprecation_warnings', []):
 577             self.deprecation_warning(msg)
 578
 579         if 'list-formats' in self.params.get('compat_opts', []):
 580             self.params['listformats_table'] = False
 581
 582         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 583             # nooverwrites was unnecessarily changed to overwrites
 584             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 585             # This ensures compatibility with both keys
 586             self.params['overwrites'] = not self.params['nooverwrites']
 587         elif self.params.get('overwrites') is None:
 588             self.params.pop('overwrites', None)
 589         else:
 590             self.params['nooverwrites'] = not self.params['overwrites']
 591
 592         # Compatibility with older syntax
 593         params.setdefault('forceprint', {})
 594         if not isinstance(params['forceprint'], dict):
 595             params['forceprint'] = {'video': params['forceprint']}
 596
 597         if params.get('bidi_workaround', False):
 598             try:
 599                 import pty
 600                 master, slave = pty.openpty()
 601                 width = compat_get_terminal_size().columns
 602                 if width is None:
 603                     width_args = []
 604                 else:
 605                     width_args = ['-w', str(width)]
 606                 sp_kwargs = dict(
 607                     stdin=subprocess.PIPE,
 608                     stdout=slave,
 609                     stderr=self._err_file)
 610                 try:
 611                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 612                 except OSError:
 613                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 614                 self._output_channel = os.fdopen(master, 'rb')
 615             except OSError as ose:
 616                 if ose.errno == errno.ENOENT:
 617                     self.report_warning(
 618                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 619                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 620                 else:
 621                     raise
 622
 623         if (sys.platform != 'win32'
 624                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 625                 and not params.get('restrictfilenames', False)):
 626             # Unicode filesystem API will throw errors (#1474, #13027)
 627             self.report_warning(
 628                 'Assuming --restrict-filenames since file system encoding '
 629                 'cannot encode all characters. '
 630                 'Set the LC_ALL environment variable to fix this.')
 631             self.params['restrictfilenames'] = True
 632
 633         self.outtmpl_dict = self.parse_outtmpl()
 634
 635         # Creating format selector here allows us to catch syntax errors before the extraction
 636         self.format_selector = (
 637             self.params.get('format') if self.params.get('format') in (None, '-')
 638             else self.params['format'] if callable(self.params['format'])
 639             else self.build_format_selector(self.params['format']))
 640
 641         self._setup_opener()
 642
 643         if auto_init:
 644             if auto_init != 'no_verbose_header':
 645                 self.print_debug_header()
 646             self.add_default_info_extractors()
 647
 648         hooks = {
 649             'post_hooks': self.add_post_hook,
 650             'progress_hooks': self.add_progress_hook,
 651             'postprocessor_hooks': self.add_postprocessor_hook,
 652         }
 653         for opt, fn in hooks.items():
 654             for ph in self.params.get(opt, []):
 655                 fn(ph)
 656
 657         for pp_def_raw in self.params.get('postprocessors', []):
 658             pp_def = dict(pp_def_raw)
 659             when = pp_def.pop('when', 'post_process')
 660             self.add_post_processor(
 661                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 662                 when=when)
 663
 664         register_socks_protocols()
 665
 666         def preload_download_archive(fn):
 667             """Preload the archive, if any is specified"""
 668             if fn is None:
 669                 return False
 670             self.write_debug(f'Loading archive file {fn!r}')
 671             try:
 672                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 673                     for line in archive_file:
 674                         self.archive.add(line.strip())
 675             except IOError as ioe:
 676                 if ioe.errno != errno.ENOENT:
 677                     raise
 678                 return False
 679             return True
 680
 681         self.archive = set()
 682         preload_download_archive(self.params.get('download_archive'))
 683
 684     def warn_if_short_id(self, argv):
 685         # short YouTube ID starting with dash?
 686         idxs = [
 687             i for i, a in enumerate(argv)
 688             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 689         if idxs:
 690             correct_argv = (
 691                 ['yt-dlp']
 692                 + [a for i, a in enumerate(argv) if i not in idxs]
 693                 + ['--'] + [argv[i] for i in idxs]
 694             )
 695             self.report_warning(
 696                 'Long argument string detected. '
 697                 'Use -- to separate parameters and URLs, like this:\n%s' %
 698                 args_to_str(correct_argv))
 699
 700     def add_info_extractor(self, ie):
 701         """Add an InfoExtractor object to the end of the list."""
 702         ie_key = ie.ie_key()
 703         self._ies[ie_key] = ie
 704         if not isinstance(ie, type):
 705             self._ies_instances[ie_key] = ie
 706             ie.set_downloader(self)
 707
 708     def _get_info_extractor_class(self, ie_key):
 709         ie = self._ies.get(ie_key)
 710         if ie is None:
 711             ie = get_info_extractor(ie_key)
 712             self.add_info_extractor(ie)
 713         return ie
 714
 715     def get_info_extractor(self, ie_key):
 716         """
 717         Get an instance of an IE with name ie_key, it will try to get one from
 718         the _ies list, if there's no instance it will create a new one and add
 719         it to the extractor list.
 720         """
 721         ie = self._ies_instances.get(ie_key)
 722         if ie is None:
 723             ie = get_info_extractor(ie_key)()
 724             self.add_info_extractor(ie)
 725         return ie
 726
 727     def add_default_info_extractors(self):
 728         """
 729         Add the InfoExtractors returned by gen_extractors to the end of the list
 730         """
 731         for ie in gen_extractor_classes():
 732             self.add_info_extractor(ie)
 733
 734     def add_post_processor(self, pp, when='post_process'):
 735         """Add a PostProcessor object to the end of the chain."""
 736         self._pps[when].append(pp)
 737         pp.set_downloader(self)
 738
 739     def add_post_hook(self, ph):
 740         """Add the post hook"""
 741         self._post_hooks.append(ph)
 742
 743     def add_progress_hook(self, ph):
 744         """Add the download progress hook"""
 745         self._progress_hooks.append(ph)
 746
 747     def add_postprocessor_hook(self, ph):
 748         """Add the postprocessing progress hook"""
 749         self._postprocessor_hooks.append(ph)
 750         for pps in self._pps.values():
 751             for pp in pps:
 752                 pp.add_progress_hook(ph)
 753
 754     def _bidi_workaround(self, message):
 755         if not hasattr(self, '_output_channel'):
 756             return message
 757
 758         assert hasattr(self, '_output_process')
 759         assert isinstance(message, compat_str)
 760         line_count = message.count('\n') + 1
 761         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 762         self._output_process.stdin.flush()
 763         res = ''.join(self._output_channel.readline().decode('utf-8')
 764                       for _ in range(line_count))
 765         return res[:-len('\n')]
 766
 767     def _write_string(self, message, out=None, only_once=False):
 768         if only_once:
 769             if message in self._printed_messages:
 770                 return
 771             self._printed_messages.add(message)
 772         write_string(message, out=out, encoding=self.params.get('encoding'))
 773
 774     def to_stdout(self, message, skip_eol=False, quiet=False):
 775         """Print message to stdout"""
 776         if self.params.get('logger'):
 777             self.params['logger'].debug(message)
 778         elif not quiet or self.params.get('verbose'):
 779             self._write_string(
 780                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 781                 self._err_file if quiet else self._screen_file)
 782
 783     def to_stderr(self, message, only_once=False):
 784         """Print message to stderr"""
 785         assert isinstance(message, compat_str)
 786         if self.params.get('logger'):
 787             self.params['logger'].error(message)
 788         else:
 789             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 790
 791     def to_console_title(self, message):
 792         if not self.params.get('consoletitle', False):
 793             return
 794         message = remove_terminal_sequences(message)
 795         if compat_os_name == 'nt':
 796             if ctypes.windll.kernel32.GetConsoleWindow():
 797                 # c_wchar_p() might not be necessary if `message` is
 798                 # already of type unicode()
 799                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 800         elif 'TERM' in os.environ:
 801             self._write_string('\033]0;%s\007' % message, self._screen_file)
 802
 803     def save_console_title(self):
 804         if not self.params.get('consoletitle', False):
 805             return
 806         if self.params.get('simulate'):
 807             return
 808         if compat_os_name != 'nt' and 'TERM' in os.environ:
 809             # Save the title on stack
 810             self._write_string('\033[22;0t', self._screen_file)
 811
 812     def restore_console_title(self):
 813         if not self.params.get('consoletitle', False):
 814             return
 815         if self.params.get('simulate'):
 816             return
 817         if compat_os_name != 'nt' and 'TERM' in os.environ:
 818             # Restore the title from stack
 819             self._write_string('\033[23;0t', self._screen_file)
 820
 821     def __enter__(self):
 822         self.save_console_title()
 823         return self
 824
 825     def __exit__(self, *args):
 826         self.restore_console_title()
 827
 828         if self.params.get('cookiefile') is not None:
 829             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 830
 831     def trouble(self, message=None, tb=None, is_error=True):
 832         """Determine action to take when a download problem appears.
 833
 834         Depending on if the downloader has been configured to ignore
 835         download errors or not, this method may throw an exception or
 836         not when errors are found, after printing the message.
 837
 838         @param tb          If given, is additional traceback information
 839         @param is_error    Whether to raise error according to ignorerrors
 840         """
 841         if message is not None:
 842             self.to_stderr(message)
 843         if self.params.get('verbose'):
 844             if tb is None:
 845                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 846                     tb = ''
 847                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 848                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 849                     tb += encode_compat_str(traceback.format_exc())
 850                 else:
 851                     tb_data = traceback.format_list(traceback.extract_stack())
 852                     tb = ''.join(tb_data)
 853             if tb:
 854                 self.to_stderr(tb)
 855         if not is_error:
 856             return
 857         if not self.params.get('ignoreerrors'):
 858             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 859                 exc_info = sys.exc_info()[1].exc_info
 860             else:
 861                 exc_info = sys.exc_info()
 862             raise DownloadError(message, exc_info)
 863         self._download_retcode = 1
 864
 865     def to_screen(self, message, skip_eol=False):
 866         """Print message to stdout if not in quiet mode"""
 867         self.to_stdout(
 868             message, skip_eol, quiet=self.params.get('quiet', False))
 869
 870     class Styles(Enum):
 871         HEADERS = 'yellow'
 872         EMPHASIS = 'light blue'
 873         ID = 'green'
 874         DELIM = 'blue'
 875         ERROR = 'red'
 876         WARNING = 'yellow'
 877         SUPPRESS = 'light black'
 878
 879     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 880         if test_encoding:
 881             original_text = text
 882             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 883             text = text.encode(encoding, 'ignore').decode(encoding)
 884             if fallback is not None and text != original_text:
 885                 text = fallback
 886         if isinstance(f, self.Styles):
 887             f = f.value
 888         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 889
 890     def _format_screen(self, *args, **kwargs):
 891         return self._format_text(
 892             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 893
 894     def _format_err(self, *args, **kwargs):
 895         return self._format_text(
 896             self._err_file, self._allow_colors['err'], *args, **kwargs)
 897
 898     def report_warning(self, message, only_once=False):
 899         '''
 900         Print the message to stderr, it will be prefixed with 'WARNING:'
 901         If stderr is a tty file the 'WARNING:' will be colored
 902         '''
 903         if self.params.get('logger') is not None:
 904             self.params['logger'].warning(message)
 905         else:
 906             if self.params.get('no_warnings'):
 907                 return
 908             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 909
 910     def deprecation_warning(self, message):
 911         if self.params.get('logger') is not None:
 912             self.params['logger'].warning('DeprecationWarning: {message}')
 913         else:
 914             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 915
 916     def report_error(self, message, *args, **kwargs):
 917         '''
 918         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 919         in red if stderr is a tty file.
 920         '''
 921         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 922
 923     def write_debug(self, message, only_once=False):
 924         '''Log debug message or Print message to stderr'''
 925         if not self.params.get('verbose', False):
 926             return
 927         message = '[debug] %s' % message
 928         if self.params.get('logger'):
 929             self.params['logger'].debug(message)
 930         else:
 931             self.to_stderr(message, only_once)
 932
 933     def report_file_already_downloaded(self, file_name):
 934         """Report file has already been fully downloaded."""
 935         try:
 936             self.to_screen('[download] %s has already been downloaded' % file_name)
 937         except UnicodeEncodeError:
 938             self.to_screen('[download] The file has already been downloaded')
 939
 940     def report_file_delete(self, file_name):
 941         """Report that existing file will be deleted."""
 942         try:
 943             self.to_screen('Deleting existing file %s' % file_name)
 944         except UnicodeEncodeError:
 945             self.to_screen('Deleting existing file')
 946
 947     def raise_no_formats(self, info, forced=False):
 948         has_drm = info.get('__has_drm')
 949         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 950         expected = self.params.get('ignore_no_formats_error')
 951         if forced or not expected:
 952             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 953                                  expected=has_drm or expected)
 954         else:
 955             self.report_warning(msg)
 956
 957     def parse_outtmpl(self):
 958         outtmpl_dict = self.params.get('outtmpl', {})
 959         if not isinstance(outtmpl_dict, dict):
 960             outtmpl_dict = {'default': outtmpl_dict}
 961         # Remove spaces in the default template
 962         if self.params.get('restrictfilenames'):
 963             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 964         else:
 965             sanitize = lambda x: x
 966         outtmpl_dict.update({
 967             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 968             if outtmpl_dict.get(k) is None})
 969         for key, val in outtmpl_dict.items():
 970             if isinstance(val, bytes):
 971                 self.report_warning(
 972                     'Parameter outtmpl is bytes, but should be a unicode string. '
 973                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 974         return outtmpl_dict
 975
 976     def get_output_path(self, dir_type='', filename=None):
 977         paths = self.params.get('paths', {})
 978         assert isinstance(paths, dict)
 979         path = os.path.join(
 980             expand_path(paths.get('home', '').strip()),
 981             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 982             filename or '')
 983
 984         # Temporary fix for #4787
 985         # 'Treat' all problem characters by passing filename through preferredencoding
 986         # to workaround encoding issues with subprocess on python2 @ Windows
 987         if sys.version_info < (3, 0) and sys.platform == 'win32':
 988             path = encodeFilename(path, True).decode(preferredencoding())
 989         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 990
 991     @staticmethod
 992     def _outtmpl_expandpath(outtmpl):
 993         # expand_path translates '%%' into '%' and '$$' into '$'
 994         # correspondingly that is not what we want since we need to keep
 995         # '%%' intact for template dict substitution step. Working around
 996         # with boundary-alike separator hack.
 997         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 998         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 999
1000         # outtmpl should be expand_path'ed before template dict substitution
1001         # because meta fields may contain env variables we don't want to
1002         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1003         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1004         return expand_path(outtmpl).replace(sep, '')
1005
1006     @staticmethod
1007     def escape_outtmpl(outtmpl):
1008         ''' Escape any remaining strings like %s, %abc% etc. '''
1009         return re.sub(
1010             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1011             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1012             outtmpl)
1013
1014     @classmethod
1015     def validate_outtmpl(cls, outtmpl):
1016         ''' @return None or Exception object '''
1017         outtmpl = re.sub(
1018             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1019             lambda mobj: f'{mobj.group(0)[:-1]}s',
1020             cls._outtmpl_expandpath(outtmpl))
1021         try:
1022             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1023             return None
1024         except ValueError as err:
1025             return err
1026
1027     @staticmethod
1028     def _copy_infodict(info_dict):
1029         info_dict = dict(info_dict)
1030         for key in ('__original_infodict', '__postprocessors'):
1031             info_dict.pop(key, None)
1032         return info_dict
1033
1034     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1035         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1036         @param sanitize    Whether to sanitize the output as a filename.
1037                            For backward compatibility, a function can also be passed
1038         """
1039
1040         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1041
1042         info_dict = self._copy_infodict(info_dict)
1043         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1044             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1045             if info_dict.get('duration', None) is not None
1046             else None)
1047         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1048         info_dict['video_autonumber'] = self._num_videos
1049         if info_dict.get('resolution') is None:
1050             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1051
1052         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1053         # of %(field)s to %(field)0Nd for backward compatibility
1054         field_size_compat_map = {
1055             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1056             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1057             'autonumber': self.params.get('autonumber_size') or 5,
1058         }
1059
1060         TMPL_DICT = {}
1061         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1062         MATH_FUNCTIONS = {
1063             '+': float.__add__,
1064             '-': float.__sub__,
1065         }
1066         # Field is of the form key1.key2...
1067         # where keys (except first) can be string, int or slice
1068         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1069         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1070         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1071         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1072             (?P<negate>-)?
1073             (?P<fields>{field})
1074             (?P<maths>(?:{math_op}{math_field})*)
1075             (?:>(?P<strf_format>.+?))?
1076             (?P<alternate>(?<!\\),[^|&)]+)?
1077             (?:&(?P<replacement>.*?))?
1078             (?:\|(?P<default>.*?))?
1079             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1080
1081         def _traverse_infodict(k):
1082             k = k.split('.')
1083             if k[0] == '':
1084                 k.pop(0)
1085             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1086
1087         def get_value(mdict):
1088             # Object traversal
1089             value = _traverse_infodict(mdict['fields'])
1090             # Negative
1091             if mdict['negate']:
1092                 value = float_or_none(value)
1093                 if value is not None:
1094                     value *= -1
1095             # Do maths
1096             offset_key = mdict['maths']
1097             if offset_key:
1098                 value = float_or_none(value)
1099                 operator = None
1100                 while offset_key:
1101                     item = re.match(
1102                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1103                         offset_key).group(0)
1104                     offset_key = offset_key[len(item):]
1105                     if operator is None:
1106                         operator = MATH_FUNCTIONS[item]
1107                         continue
1108                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1109                     offset = float_or_none(item)
1110                     if offset is None:
1111                         offset = float_or_none(_traverse_infodict(item))
1112                     try:
1113                         value = operator(value, multiplier * offset)
1114                     except (TypeError, ZeroDivisionError):
1115                         return None
1116                     operator = None
1117             # Datetime formatting
1118             if mdict['strf_format']:
1119                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1120
1121             return value
1122
1123         na = self.params.get('outtmpl_na_placeholder', 'NA')
1124
1125         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1126             return sanitize_filename(str(value), restricted=restricted,
1127                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1128
1129         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1130         sanitize = bool(sanitize)
1131
1132         def _dumpjson_default(obj):
1133             if isinstance(obj, (set, LazyList)):
1134                 return list(obj)
1135             return repr(obj)
1136
1137         def create_key(outer_mobj):
1138             if not outer_mobj.group('has_key'):
1139                 return outer_mobj.group(0)
1140             key = outer_mobj.group('key')
1141             mobj = re.match(INTERNAL_FORMAT_RE, key)
1142             initial_field = mobj.group('fields') if mobj else ''
1143             value, replacement, default = None, None, na
1144             while mobj:
1145                 mobj = mobj.groupdict()
1146                 default = mobj['default'] if mobj['default'] is not None else default
1147                 value = get_value(mobj)
1148                 replacement = mobj['replacement']
1149                 if value is None and mobj['alternate']:
1150                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1151                 else:
1152                     break
1153
1154             fmt = outer_mobj.group('format')
1155             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1156                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1157
1158             value = default if value is None else value if replacement is None else replacement
1159
1160             flags = outer_mobj.group('conversion') or ''
1161             str_fmt = f'{fmt[:-1]}s'
1162             if fmt[-1] == 'l':  # list
1163                 delim = '\n' if '#' in flags else ', '
1164                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1165             elif fmt[-1] == 'j':  # json
1166                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1167             elif fmt[-1] == 'q':  # quoted
1168                 value = map(str, variadic(value) if '#' in flags else [value])
1169                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1170             elif fmt[-1] == 'B':  # bytes
1171                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1172                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1173             elif fmt[-1] == 'U':  # unicode normalized
1174                 value, fmt = unicodedata.normalize(
1175                     # "+" = compatibility equivalence, "#" = NFD
1176                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1177                     value), str_fmt
1178             elif fmt[-1] == 'D':  # decimal suffix
1179                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1180                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1181                                               factor=1024 if '#' in flags else 1000)
1182             elif fmt[-1] == 'S':  # filename sanitization
1183                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1184             elif fmt[-1] == 'c':
1185                 if value:
1186                     value = str(value)[0]
1187                 else:
1188                     fmt = str_fmt
1189             elif fmt[-1] not in 'rs':  # numeric
1190                 value = float_or_none(value)
1191                 if value is None:
1192                     value, fmt = default, 's'
1193
1194             if sanitize:
1195                 if fmt[-1] == 'r':
1196                     # If value is an object, sanitize might convert it to a string
1197                     # So we convert it to repr first
1198                     value, fmt = repr(value), str_fmt
1199                 if fmt[-1] in 'csr':
1200                     value = sanitizer(initial_field, value)
1201
1202             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1203             TMPL_DICT[key] = value
1204             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1205
1206         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1207
1208     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1209         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1210         return self.escape_outtmpl(outtmpl) % info_dict
1211
1212     def _prepare_filename(self, info_dict, tmpl_type='default'):
1213         try:
1214             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1215             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1216
1217             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1218             if filename and force_ext is not None:
1219                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1220
1221             # https://github.com/blackjack4494/youtube-dlc/issues/85
1222             trim_file_name = self.params.get('trim_file_name', False)
1223             if trim_file_name:
1224                 no_ext, *ext = filename.rsplit('.', 2)
1225                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1226
1227             return filename
1228         except ValueError as err:
1229             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1230             return None
1231
1232     def prepare_filename(self, info_dict, dir_type='', warn=False):
1233         """Generate the output filename."""
1234
1235         filename = self._prepare_filename(info_dict, dir_type or 'default')
1236         if not filename and dir_type not in ('', 'temp'):
1237             return ''
1238
1239         if warn:
1240             if not self.params.get('paths'):
1241                 pass
1242             elif filename == '-':
1243                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1244             elif os.path.isabs(filename):
1245                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1246         if filename == '-' or not filename:
1247             return filename
1248
1249         return self.get_output_path(dir_type, filename)
1250
1251     def _match_entry(self, info_dict, incomplete=False, silent=False):
1252         """ Returns None if the file should be downloaded """
1253
1254         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1255
1256         def check_filter():
1257             if 'title' in info_dict:
1258                 # This can happen when we're just evaluating the playlist
1259                 title = info_dict['title']
1260                 matchtitle = self.params.get('matchtitle', False)
1261                 if matchtitle:
1262                     if not re.search(matchtitle, title, re.IGNORECASE):
1263                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1264                 rejecttitle = self.params.get('rejecttitle', False)
1265                 if rejecttitle:
1266                     if re.search(rejecttitle, title, re.IGNORECASE):
1267                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1268             date = info_dict.get('upload_date')
1269             if date is not None:
1270                 dateRange = self.params.get('daterange', DateRange())
1271                 if date not in dateRange:
1272                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1273             view_count = info_dict.get('view_count')
1274             if view_count is not None:
1275                 min_views = self.params.get('min_views')
1276                 if min_views is not None and view_count < min_views:
1277                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1278                 max_views = self.params.get('max_views')
1279                 if max_views is not None and view_count > max_views:
1280                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1281             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1282                 return 'Skipping "%s" because it is age restricted' % video_title
1283
1284             match_filter = self.params.get('match_filter')
1285             if match_filter is not None:
1286                 try:
1287                     ret = match_filter(info_dict, incomplete=incomplete)
1288                 except TypeError:
1289                     # For backward compatibility
1290                     ret = None if incomplete else match_filter(info_dict)
1291                 if ret is not None:
1292                     return ret
1293             return None
1294
1295         if self.in_download_archive(info_dict):
1296             reason = '%s has already been recorded in the archive' % video_title
1297             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1298         else:
1299             reason = check_filter()
1300             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1301         if reason is not None:
1302             if not silent:
1303                 self.to_screen('[download] ' + reason)
1304             if self.params.get(break_opt, False):
1305                 raise break_err()
1306         return reason
1307
1308     @staticmethod
1309     def add_extra_info(info_dict, extra_info):
1310         '''Set the keys from extra_info in info dict if they are missing'''
1311         for key, value in extra_info.items():
1312             info_dict.setdefault(key, value)
1313
1314     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1315                      process=True, force_generic_extractor=False):
1316         """
1317         Return a list with a dictionary for each video extracted.
1318
1319         Arguments:
1320         url -- URL to extract
1321
1322         Keyword arguments:
1323         download -- whether to download videos during extraction
1324         ie_key -- extractor key hint
1325         extra_info -- dictionary containing the extra values to add to each result
1326         process -- whether to resolve all unresolved references (URLs, playlist items),
1327             must be True for download to work.
1328         force_generic_extractor -- force using the generic extractor
1329         """
1330
1331         if extra_info is None:
1332             extra_info = {}
1333
1334         if not ie_key and force_generic_extractor:
1335             ie_key = 'Generic'
1336
1337         if ie_key:
1338             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1339         else:
1340             ies = self._ies
1341
1342         for ie_key, ie in ies.items():
1343             if not ie.suitable(url):
1344                 continue
1345
1346             if not ie.working():
1347                 self.report_warning('The program functionality for this site has been marked as broken, '
1348                                     'and will probably not work.')
1349
1350             temp_id = ie.get_temp_id(url)
1351             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1352                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1353                 if self.params.get('break_on_existing', False):
1354                     raise ExistingVideoReached()
1355                 break
1356             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1357         else:
1358             self.report_error('no suitable InfoExtractor for URL %s' % url)
1359
1360     def __handle_extraction_exceptions(func):
1361         @functools.wraps(func)
1362         def wrapper(self, *args, **kwargs):
1363             while True:
1364                 try:
1365                     return func(self, *args, **kwargs)
1366                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1367                     raise
1368                 except ReExtractInfo as e:
1369                     if e.expected:
1370                         self.to_screen(f'{e}; Re-extracting data')
1371                     else:
1372                         self.to_stderr('\r')
1373                         self.report_warning(f'{e}; Re-extracting data')
1374                     continue
1375                 except GeoRestrictedError as e:
1376                     msg = e.msg
1377                     if e.countries:
1378                         msg += '\nThis video is available in %s.' % ', '.join(
1379                             map(ISO3166Utils.short2full, e.countries))
1380                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1381                     self.report_error(msg)
1382                 except ExtractorError as e:  # An error we somewhat expected
1383                     self.report_error(str(e), e.format_traceback())
1384                 except Exception as e:
1385                     if self.params.get('ignoreerrors'):
1386                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1387                     else:
1388                         raise
1389                 break
1390         return wrapper
1391
1392     def _wait_for_video(self, ie_result):
1393         if (not self.params.get('wait_for_video')
1394                 or ie_result.get('_type', 'video') != 'video'
1395                 or ie_result.get('formats') or ie_result.get('url')):
1396             return
1397
1398         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1399         last_msg = ''
1400
1401         def progress(msg):
1402             nonlocal last_msg
1403             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1404             last_msg = msg
1405
1406         min_wait, max_wait = self.params.get('wait_for_video')
1407         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1408         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1409             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1410             self.report_warning('Release time of video is not known')
1411         elif (diff or 0) <= 0:
1412             self.report_warning('Video should already be available according to extracted info')
1413         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1414         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1415
1416         wait_till = time.time() + diff
1417         try:
1418             while True:
1419                 diff = wait_till - time.time()
1420                 if diff <= 0:
1421                     progress('')
1422                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1423                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1424                 time.sleep(1)
1425         except KeyboardInterrupt:
1426             progress('')
1427             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1428         except BaseException as e:
1429             if not isinstance(e, ReExtractInfo):
1430                 self.to_screen('')
1431             raise
1432
1433     @__handle_extraction_exceptions
1434     def __extract_info(self, url, ie, download, extra_info, process):
1435         ie_result = ie.extract(url)
1436         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1437             return
1438         if isinstance(ie_result, list):
1439             # Backwards compatibility: old IE result format
1440             ie_result = {
1441                 '_type': 'compat_list',
1442                 'entries': ie_result,
1443             }
1444         if extra_info.get('original_url'):
1445             ie_result.setdefault('original_url', extra_info['original_url'])
1446         self.add_default_extra_info(ie_result, ie, url)
1447         if process:
1448             self._wait_for_video(ie_result)
1449             return self.process_ie_result(ie_result, download, extra_info)
1450         else:
1451             return ie_result
1452
1453     def add_default_extra_info(self, ie_result, ie, url):
1454         if url is not None:
1455             self.add_extra_info(ie_result, {
1456                 'webpage_url': url,
1457                 'original_url': url,
1458                 'webpage_url_basename': url_basename(url),
1459                 'webpage_url_domain': get_domain(url),
1460             })
1461         if ie is not None:
1462             self.add_extra_info(ie_result, {
1463                 'extractor': ie.IE_NAME,
1464                 'extractor_key': ie.ie_key(),
1465             })
1466
1467     def process_ie_result(self, ie_result, download=True, extra_info=None):
1468         """
1469         Take the result of the ie(may be modified) and resolve all unresolved
1470         references (URLs, playlist items).
1471
1472         It will also download the videos if 'download'.
1473         Returns the resolved ie_result.
1474         """
1475         if extra_info is None:
1476             extra_info = {}
1477         result_type = ie_result.get('_type', 'video')
1478
1479         if result_type in ('url', 'url_transparent'):
1480             ie_result['url'] = sanitize_url(ie_result['url'])
1481             if ie_result.get('original_url'):
1482                 extra_info.setdefault('original_url', ie_result['original_url'])
1483
1484             extract_flat = self.params.get('extract_flat', False)
1485             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1486                     or extract_flat is True):
1487                 info_copy = ie_result.copy()
1488                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1489                 if ie and not ie_result.get('id'):
1490                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1491                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1492                 self.add_extra_info(info_copy, extra_info)
1493                 info_copy, _ = self.pre_process(info_copy)
1494                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1495                 if self.params.get('force_write_download_archive', False):
1496                     self.record_download_archive(info_copy)
1497                 return ie_result
1498
1499         if result_type == 'video':
1500             self.add_extra_info(ie_result, extra_info)
1501             ie_result = self.process_video_result(ie_result, download=download)
1502             additional_urls = (ie_result or {}).get('additional_urls')
1503             if additional_urls:
1504                 # TODO: Improve MetadataParserPP to allow setting a list
1505                 if isinstance(additional_urls, compat_str):
1506                     additional_urls = [additional_urls]
1507                 self.to_screen(
1508                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1509                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1510                 ie_result['additional_entries'] = [
1511                     self.extract_info(
1512                         url, download, extra_info=extra_info,
1513                         force_generic_extractor=self.params.get('force_generic_extractor'))
1514                     for url in additional_urls
1515                 ]
1516             return ie_result
1517         elif result_type == 'url':
1518             # We have to add extra_info to the results because it may be
1519             # contained in a playlist
1520             return self.extract_info(
1521                 ie_result['url'], download,
1522                 ie_key=ie_result.get('ie_key'),
1523                 extra_info=extra_info)
1524         elif result_type == 'url_transparent':
1525             # Use the information from the embedding page
1526             info = self.extract_info(
1527                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1528                 extra_info=extra_info, download=False, process=False)
1529
1530             # extract_info may return None when ignoreerrors is enabled and
1531             # extraction failed with an error, don't crash and return early
1532             # in this case
1533             if not info:
1534                 return info
1535
1536             force_properties = dict(
1537                 (k, v) for k, v in ie_result.items() if v is not None)
1538             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1539                 if f in force_properties:
1540                     del force_properties[f]
1541             new_result = info.copy()
1542             new_result.update(force_properties)
1543
1544             # Extracted info may not be a video result (i.e.
1545             # info.get('_type', 'video') != video) but rather an url or
1546             # url_transparent. In such cases outer metadata (from ie_result)
1547             # should be propagated to inner one (info). For this to happen
1548             # _type of info should be overridden with url_transparent. This
1549             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1550             if new_result.get('_type') == 'url':
1551                 new_result['_type'] = 'url_transparent'
1552
1553             return self.process_ie_result(
1554                 new_result, download=download, extra_info=extra_info)
1555         elif result_type in ('playlist', 'multi_video'):
1556             # Protect from infinite recursion due to recursively nested playlists
1557             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1558             webpage_url = ie_result['webpage_url']
1559             if webpage_url in self._playlist_urls:
1560                 self.to_screen(
1561                     '[download] Skipping already downloaded playlist: %s'
1562                     % ie_result.get('title') or ie_result.get('id'))
1563                 return
1564
1565             self._playlist_level += 1
1566             self._playlist_urls.add(webpage_url)
1567             self._sanitize_thumbnails(ie_result)
1568             try:
1569                 return self.__process_playlist(ie_result, download)
1570             finally:
1571                 self._playlist_level -= 1
1572                 if not self._playlist_level:
1573                     self._playlist_urls.clear()
1574         elif result_type == 'compat_list':
1575             self.report_warning(
1576                 'Extractor %s returned a compat_list result. '
1577                 'It needs to be updated.' % ie_result.get('extractor'))
1578
1579             def _fixup(r):
1580                 self.add_extra_info(r, {
1581                     'extractor': ie_result['extractor'],
1582                     'webpage_url': ie_result['webpage_url'],
1583                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1584                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1585                     'extractor_key': ie_result['extractor_key'],
1586                 })
1587                 return r
1588             ie_result['entries'] = [
1589                 self.process_ie_result(_fixup(r), download, extra_info)
1590                 for r in ie_result['entries']
1591             ]
1592             return ie_result
1593         else:
1594             raise Exception('Invalid result type: %s' % result_type)
1595
1596     def _ensure_dir_exists(self, path):
1597         return make_dir(path, self.report_error)
1598
1599     def __process_playlist(self, ie_result, download):
1600         # We process each entry in the playlist
1601         playlist = ie_result.get('title') or ie_result.get('id')
1602         self.to_screen('[download] Downloading playlist: %s' % playlist)
1603
1604         if 'entries' not in ie_result:
1605             raise EntryNotInPlaylist('There are no entries')
1606
1607         MissingEntry = object()
1608         incomplete_entries = bool(ie_result.get('requested_entries'))
1609         if incomplete_entries:
1610             def fill_missing_entries(entries, indices):
1611                 ret = [MissingEntry] * max(indices)
1612                 for i, entry in zip(indices, entries):
1613                     ret[i - 1] = entry
1614                 return ret
1615             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1616
1617         playlist_results = []
1618
1619         playliststart = self.params.get('playliststart', 1)
1620         playlistend = self.params.get('playlistend')
1621         # For backwards compatibility, interpret -1 as whole list
1622         if playlistend == -1:
1623             playlistend = None
1624
1625         playlistitems_str = self.params.get('playlist_items')
1626         playlistitems = None
1627         if playlistitems_str is not None:
1628             def iter_playlistitems(format):
1629                 for string_segment in format.split(','):
1630                     if '-' in string_segment:
1631                         start, end = string_segment.split('-')
1632                         for item in range(int(start), int(end) + 1):
1633                             yield int(item)
1634                     else:
1635                         yield int(string_segment)
1636             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1637
1638         ie_entries = ie_result['entries']
1639         if isinstance(ie_entries, list):
1640             playlist_count = len(ie_result)
1641             msg = f'Collected {playlist_count} videos; downloading %d of them'
1642             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1643
1644             def get_entry(i):
1645                 return ie_entries[i - 1]
1646         else:
1647             msg = 'Downloading %d videos'
1648             if not isinstance(ie_entries, (PagedList, LazyList)):
1649                 ie_entries = LazyList(ie_entries)
1650
1651             def get_entry(i):
1652                 return YoutubeDL.__handle_extraction_exceptions(
1653                     lambda self, i: ie_entries[i - 1]
1654                 )(self, i)
1655
1656         entries, broken = [], False
1657         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1658         for i in items:
1659             if i == 0:
1660                 continue
1661             if playlistitems is None and playlistend is not None and playlistend < i:
1662                 break
1663             entry = None
1664             try:
1665                 entry = get_entry(i)
1666                 if entry is MissingEntry:
1667                     raise EntryNotInPlaylist()
1668             except (IndexError, EntryNotInPlaylist):
1669                 if incomplete_entries:
1670                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1671                 elif not playlistitems:
1672                     break
1673             entries.append(entry)
1674             try:
1675                 if entry is not None:
1676                     self._match_entry(entry, incomplete=True, silent=True)
1677             except (ExistingVideoReached, RejectedVideoReached):
1678                 broken = True
1679                 break
1680         ie_result['entries'] = entries
1681
1682         # Save playlist_index before re-ordering
1683         entries = [
1684             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1685             for i, entry in enumerate(entries, 1)
1686             if entry is not None]
1687         n_entries = len(entries)
1688
1689         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1690             ie_result['playlist_count'] = n_entries
1691
1692         if not playlistitems and (playliststart != 1 or playlistend):
1693             playlistitems = list(range(playliststart, playliststart + n_entries))
1694         ie_result['requested_entries'] = playlistitems
1695
1696         _infojson_written = False
1697         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1698             ie_copy = {
1699                 'playlist': playlist,
1700                 'playlist_id': ie_result.get('id'),
1701                 'playlist_title': ie_result.get('title'),
1702                 'playlist_uploader': ie_result.get('uploader'),
1703                 'playlist_uploader_id': ie_result.get('uploader_id'),
1704                 'playlist_index': 0,
1705                 'n_entries': n_entries,
1706             }
1707             ie_copy.update(dict(ie_result))
1708
1709             _infojson_written = self._write_info_json(
1710                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1711             if _infojson_written is None:
1712                 return
1713             if self._write_description('playlist', ie_result,
1714                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1715                 return
1716             # TODO: This should be passed to ThumbnailsConvertor if necessary
1717             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1718
1719         if self.params.get('playlistreverse', False):
1720             entries = entries[::-1]
1721         if self.params.get('playlistrandom', False):
1722             random.shuffle(entries)
1723
1724         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1725
1726         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1727         failures = 0
1728         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1729         for i, entry_tuple in enumerate(entries, 1):
1730             playlist_index, entry = entry_tuple
1731             if 'playlist-index' in self.params.get('compat_opts', []):
1732                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1733             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1734             # This __x_forwarded_for_ip thing is a bit ugly but requires
1735             # minimal changes
1736             if x_forwarded_for:
1737                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1738             extra = {
1739                 'n_entries': n_entries,
1740                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1741                 'playlist_count': ie_result.get('playlist_count'),
1742                 'playlist_index': playlist_index,
1743                 'playlist_autonumber': i,
1744                 'playlist': playlist,
1745                 'playlist_id': ie_result.get('id'),
1746                 'playlist_title': ie_result.get('title'),
1747                 'playlist_uploader': ie_result.get('uploader'),
1748                 'playlist_uploader_id': ie_result.get('uploader_id'),
1749                 'extractor': ie_result['extractor'],
1750                 'webpage_url': ie_result['webpage_url'],
1751                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1752                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1753                 'extractor_key': ie_result['extractor_key'],
1754             }
1755
1756             if self._match_entry(entry, incomplete=True) is not None:
1757                 continue
1758
1759             entry_result = self.__process_iterable_entry(entry, download, extra)
1760             if not entry_result:
1761                 failures += 1
1762             if failures >= max_failures:
1763                 self.report_error(
1764                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1765                 break
1766             playlist_results.append(entry_result)
1767         ie_result['entries'] = playlist_results
1768
1769         # Write the updated info to json
1770         if _infojson_written and self._write_info_json(
1771                 'updated playlist', ie_result,
1772                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1773             return
1774
1775         ie_result = self.run_all_pps('playlist', ie_result)
1776         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1777         return ie_result
1778
1779     @__handle_extraction_exceptions
1780     def __process_iterable_entry(self, entry, download, extra_info):
1781         return self.process_ie_result(
1782             entry, download=download, extra_info=extra_info)
1783
1784     def _build_format_filter(self, filter_spec):
1785         " Returns a function to filter the formats according to the filter_spec "
1786
1787         OPERATORS = {
1788             '<': operator.lt,
1789             '<=': operator.le,
1790             '>': operator.gt,
1791             '>=': operator.ge,
1792             '=': operator.eq,
1793             '!=': operator.ne,
1794         }
1795         operator_rex = re.compile(r'''(?x)\s*
1796             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1797             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1798             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1799             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1800         m = operator_rex.fullmatch(filter_spec)
1801         if m:
1802             try:
1803                 comparison_value = int(m.group('value'))
1804             except ValueError:
1805                 comparison_value = parse_filesize(m.group('value'))
1806                 if comparison_value is None:
1807                     comparison_value = parse_filesize(m.group('value') + 'B')
1808                 if comparison_value is None:
1809                     raise ValueError(
1810                         'Invalid value %r in format specification %r' % (
1811                             m.group('value'), filter_spec))
1812             op = OPERATORS[m.group('op')]
1813
1814         if not m:
1815             STR_OPERATORS = {
1816                 '=': operator.eq,
1817                 '^=': lambda attr, value: attr.startswith(value),
1818                 '$=': lambda attr, value: attr.endswith(value),
1819                 '*=': lambda attr, value: value in attr,
1820             }
1821             str_operator_rex = re.compile(r'''(?x)\s*
1822                 (?P<key>[a-zA-Z0-9._-]+)\s*
1823                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1824                 (?P<value>[a-zA-Z0-9._-]+)\s*
1825                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1826             m = str_operator_rex.fullmatch(filter_spec)
1827             if m:
1828                 comparison_value = m.group('value')
1829                 str_op = STR_OPERATORS[m.group('op')]
1830                 if m.group('negation'):
1831                     op = lambda attr, value: not str_op(attr, value)
1832                 else:
1833                     op = str_op
1834
1835         if not m:
1836             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1837
1838         def _filter(f):
1839             actual_value = f.get(m.group('key'))
1840             if actual_value is None:
1841                 return m.group('none_inclusive')
1842             return op(actual_value, comparison_value)
1843         return _filter
1844
1845     def _check_formats(self, formats):
1846         for f in formats:
1847             self.to_screen('[info] Testing format %s' % f['format_id'])
1848             path = self.get_output_path('temp')
1849             if not self._ensure_dir_exists(f'{path}/'):
1850                 continue
1851             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1852             temp_file.close()
1853             try:
1854                 success, _ = self.dl(temp_file.name, f, test=True)
1855             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1856                 success = False
1857             finally:
1858                 if os.path.exists(temp_file.name):
1859                     try:
1860                         os.remove(temp_file.name)
1861                     except OSError:
1862                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1863             if success:
1864                 yield f
1865             else:
1866                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1867
1868     def _default_format_spec(self, info_dict, download=True):
1869
1870         def can_merge():
1871             merger = FFmpegMergerPP(self)
1872             return merger.available and merger.can_merge()
1873
1874         prefer_best = (
1875             not self.params.get('simulate')
1876             and download
1877             and (
1878                 not can_merge()
1879                 or info_dict.get('is_live', False)
1880                 or self.outtmpl_dict['default'] == '-'))
1881         compat = (
1882             prefer_best
1883             or self.params.get('allow_multiple_audio_streams', False)
1884             or 'format-spec' in self.params.get('compat_opts', []))
1885
1886         return (
1887             'best/bestvideo+bestaudio' if prefer_best
1888             else 'bestvideo*+bestaudio/best' if not compat
1889             else 'bestvideo+bestaudio/best')
1890
1891     def build_format_selector(self, format_spec):
1892         def syntax_error(note, start):
1893             message = (
1894                 'Invalid format specification: '
1895                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1896             return SyntaxError(message)
1897
1898         PICKFIRST = 'PICKFIRST'
1899         MERGE = 'MERGE'
1900         SINGLE = 'SINGLE'
1901         GROUP = 'GROUP'
1902         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1903
1904         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1905                                   'video': self.params.get('allow_multiple_video_streams', False)}
1906
1907         check_formats = self.params.get('check_formats') == 'selected'
1908
1909         def _parse_filter(tokens):
1910             filter_parts = []
1911             for type, string, start, _, _ in tokens:
1912                 if type == tokenize.OP and string == ']':
1913                     return ''.join(filter_parts)
1914                 else:
1915                     filter_parts.append(string)
1916
1917         def _remove_unused_ops(tokens):
1918             # Remove operators that we don't use and join them with the surrounding strings
1919             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1920             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1921             last_string, last_start, last_end, last_line = None, None, None, None
1922             for type, string, start, end, line in tokens:
1923                 if type == tokenize.OP and string == '[':
1924                     if last_string:
1925                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1926                         last_string = None
1927                     yield type, string, start, end, line
1928                     # everything inside brackets will be handled by _parse_filter
1929                     for type, string, start, end, line in tokens:
1930                         yield type, string, start, end, line
1931                         if type == tokenize.OP and string == ']':
1932                             break
1933                 elif type == tokenize.OP and string in ALLOWED_OPS:
1934                     if last_string:
1935                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1936                         last_string = None
1937                     yield type, string, start, end, line
1938                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1939                     if not last_string:
1940                         last_string = string
1941                         last_start = start
1942                         last_end = end
1943                     else:
1944                         last_string += string
1945             if last_string:
1946                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1947
1948         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1949             selectors = []
1950             current_selector = None
1951             for type, string, start, _, _ in tokens:
1952                 # ENCODING is only defined in python 3.x
1953                 if type == getattr(tokenize, 'ENCODING', None):
1954                     continue
1955                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1956                     current_selector = FormatSelector(SINGLE, string, [])
1957                 elif type == tokenize.OP:
1958                     if string == ')':
1959                         if not inside_group:
1960                             # ')' will be handled by the parentheses group
1961                             tokens.restore_last_token()
1962                         break
1963                     elif inside_merge and string in ['/', ',']:
1964                         tokens.restore_last_token()
1965                         break
1966                     elif inside_choice and string == ',':
1967                         tokens.restore_last_token()
1968                         break
1969                     elif string == ',':
1970                         if not current_selector:
1971                             raise syntax_error('"," must follow a format selector', start)
1972                         selectors.append(current_selector)
1973                         current_selector = None
1974                     elif string == '/':
1975                         if not current_selector:
1976                             raise syntax_error('"/" must follow a format selector', start)
1977                         first_choice = current_selector
1978                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1979                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1980                     elif string == '[':
1981                         if not current_selector:
1982                             current_selector = FormatSelector(SINGLE, 'best', [])
1983                         format_filter = _parse_filter(tokens)
1984                         current_selector.filters.append(format_filter)
1985                     elif string == '(':
1986                         if current_selector:
1987                             raise syntax_error('Unexpected "("', start)
1988                         group = _parse_format_selection(tokens, inside_group=True)
1989                         current_selector = FormatSelector(GROUP, group, [])
1990                     elif string == '+':
1991                         if not current_selector:
1992                             raise syntax_error('Unexpected "+"', start)
1993                         selector_1 = current_selector
1994                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1995                         if not selector_2:
1996                             raise syntax_error('Expected a selector', start)
1997                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1998                     else:
1999                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2000                 elif type == tokenize.ENDMARKER:
2001                     break
2002             if current_selector:
2003                 selectors.append(current_selector)
2004             return selectors
2005
2006         def _merge(formats_pair):
2007             format_1, format_2 = formats_pair
2008
2009             formats_info = []
2010             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2011             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2012
2013             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2014                 get_no_more = {'video': False, 'audio': False}
2015                 for (i, fmt_info) in enumerate(formats_info):
2016                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2017                         formats_info.pop(i)
2018                         continue
2019                     for aud_vid in ['audio', 'video']:
2020                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2021                             if get_no_more[aud_vid]:
2022                                 formats_info.pop(i)
2023                                 break
2024                             get_no_more[aud_vid] = True
2025
2026             if len(formats_info) == 1:
2027                 return formats_info[0]
2028
2029             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2030             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2031
2032             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2033             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2034
2035             output_ext = self.params.get('merge_output_format')
2036             if not output_ext:
2037                 if the_only_video:
2038                     output_ext = the_only_video['ext']
2039                 elif the_only_audio and not video_fmts:
2040                     output_ext = the_only_audio['ext']
2041                 else:
2042                     output_ext = 'mkv'
2043
2044             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2045
2046             new_dict = {
2047                 'requested_formats': formats_info,
2048                 'format': '+'.join(filtered('format')),
2049                 'format_id': '+'.join(filtered('format_id')),
2050                 'ext': output_ext,
2051                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2052                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2053                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2054                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2055                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2056             }
2057
2058             if the_only_video:
2059                 new_dict.update({
2060                     'width': the_only_video.get('width'),
2061                     'height': the_only_video.get('height'),
2062                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2063                     'fps': the_only_video.get('fps'),
2064                     'dynamic_range': the_only_video.get('dynamic_range'),
2065                     'vcodec': the_only_video.get('vcodec'),
2066                     'vbr': the_only_video.get('vbr'),
2067                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2068                 })
2069
2070             if the_only_audio:
2071                 new_dict.update({
2072                     'acodec': the_only_audio.get('acodec'),
2073                     'abr': the_only_audio.get('abr'),
2074                     'asr': the_only_audio.get('asr'),
2075                 })
2076
2077             return new_dict
2078
2079         def _check_formats(formats):
2080             if not check_formats:
2081                 yield from formats
2082                 return
2083             yield from self._check_formats(formats)
2084
2085         def _build_selector_function(selector):
2086             if isinstance(selector, list):  # ,
2087                 fs = [_build_selector_function(s) for s in selector]
2088
2089                 def selector_function(ctx):
2090                     for f in fs:
2091                         yield from f(ctx)
2092                 return selector_function
2093
2094             elif selector.type == GROUP:  # ()
2095                 selector_function = _build_selector_function(selector.selector)
2096
2097             elif selector.type == PICKFIRST:  # /
2098                 fs = [_build_selector_function(s) for s in selector.selector]
2099
2100                 def selector_function(ctx):
2101                     for f in fs:
2102                         picked_formats = list(f(ctx))
2103                         if picked_formats:
2104                             return picked_formats
2105                     return []
2106
2107             elif selector.type == MERGE:  # +
2108                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2109
2110                 def selector_function(ctx):
2111                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2112                         yield _merge(pair)
2113
2114             elif selector.type == SINGLE:  # atom
2115                 format_spec = selector.selector or 'best'
2116
2117                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2118                 if format_spec == 'all':
2119                     def selector_function(ctx):
2120                         yield from _check_formats(ctx['formats'][::-1])
2121                 elif format_spec == 'mergeall':
2122                     def selector_function(ctx):
2123                         formats = list(_check_formats(ctx['formats']))
2124                         if not formats:
2125                             return
2126                         merged_format = formats[-1]
2127                         for f in formats[-2::-1]:
2128                             merged_format = _merge((merged_format, f))
2129                         yield merged_format
2130
2131                 else:
2132                     format_fallback, format_reverse, format_idx = False, True, 1
2133                     mobj = re.match(
2134                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2135                         format_spec)
2136                     if mobj is not None:
2137                         format_idx = int_or_none(mobj.group('n'), default=1)
2138                         format_reverse = mobj.group('bw')[0] == 'b'
2139                         format_type = (mobj.group('type') or [None])[0]
2140                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2141                         format_modified = mobj.group('mod') is not None
2142
2143                         format_fallback = not format_type and not format_modified  # for b, w
2144                         _filter_f = (
2145                             (lambda f: f.get('%scodec' % format_type) != 'none')
2146                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2147                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2148                             if format_type  # bv, ba, wv, wa
2149                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2150                             if not format_modified  # b, w
2151                             else lambda f: True)  # b*, w*
2152                         filter_f = lambda f: _filter_f(f) and (
2153                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2154                     else:
2155                         if format_spec in self._format_selection_exts['audio']:
2156                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2157                         elif format_spec in self._format_selection_exts['video']:
2158                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2159                         elif format_spec in self._format_selection_exts['storyboards']:
2160                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2161                         else:
2162                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2163
2164                     def selector_function(ctx):
2165                         formats = list(ctx['formats'])
2166                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2167                         if format_fallback and ctx['incomplete_formats'] and not matches:
2168                             # for extractors with incomplete formats (audio only (soundcloud)
2169                             # or video only (imgur)) best/worst will fallback to
2170                             # best/worst {video,audio}-only format
2171                             matches = formats
2172                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2173                         try:
2174                             yield matches[format_idx - 1]
2175                         except IndexError:
2176                             return
2177
2178             filters = [self._build_format_filter(f) for f in selector.filters]
2179
2180             def final_selector(ctx):
2181                 ctx_copy = dict(ctx)
2182                 for _filter in filters:
2183                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2184                 return selector_function(ctx_copy)
2185             return final_selector
2186
2187         stream = io.BytesIO(format_spec.encode('utf-8'))
2188         try:
2189             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2190         except tokenize.TokenError:
2191             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2192
2193         class TokenIterator(object):
2194             def __init__(self, tokens):
2195                 self.tokens = tokens
2196                 self.counter = 0
2197
2198             def __iter__(self):
2199                 return self
2200
2201             def __next__(self):
2202                 if self.counter >= len(self.tokens):
2203                     raise StopIteration()
2204                 value = self.tokens[self.counter]
2205                 self.counter += 1
2206                 return value
2207
2208             next = __next__
2209
2210             def restore_last_token(self):
2211                 self.counter -= 1
2212
2213         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2214         return _build_selector_function(parsed_selector)
2215
2216     def _calc_headers(self, info_dict):
2217         res = std_headers.copy()
2218
2219         add_headers = info_dict.get('http_headers')
2220         if add_headers:
2221             res.update(add_headers)
2222
2223         cookies = self._calc_cookies(info_dict)
2224         if cookies:
2225             res['Cookie'] = cookies
2226
2227         if 'X-Forwarded-For' not in res:
2228             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2229             if x_forwarded_for_ip:
2230                 res['X-Forwarded-For'] = x_forwarded_for_ip
2231
2232         return res
2233
2234     def _calc_cookies(self, info_dict):
2235         pr = sanitized_Request(info_dict['url'])
2236         self.cookiejar.add_cookie_header(pr)
2237         return pr.get_header('Cookie')
2238
2239     def _sort_thumbnails(self, thumbnails):
2240         thumbnails.sort(key=lambda t: (
2241             t.get('preference') if t.get('preference') is not None else -1,
2242             t.get('width') if t.get('width') is not None else -1,
2243             t.get('height') if t.get('height') is not None else -1,
2244             t.get('id') if t.get('id') is not None else '',
2245             t.get('url')))
2246
2247     def _sanitize_thumbnails(self, info_dict):
2248         thumbnails = info_dict.get('thumbnails')
2249         if thumbnails is None:
2250             thumbnail = info_dict.get('thumbnail')
2251             if thumbnail:
2252                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2253         if not thumbnails:
2254             return
2255
2256         def check_thumbnails(thumbnails):
2257             for t in thumbnails:
2258                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2259                 try:
2260                     self.urlopen(HEADRequest(t['url']))
2261                 except network_exceptions as err:
2262                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2263                     continue
2264                 yield t
2265
2266         self._sort_thumbnails(thumbnails)
2267         for i, t in enumerate(thumbnails):
2268             if t.get('id') is None:
2269                 t['id'] = '%d' % i
2270             if t.get('width') and t.get('height'):
2271                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2272             t['url'] = sanitize_url(t['url'])
2273
2274         if self.params.get('check_formats') is True:
2275             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2276         else:
2277             info_dict['thumbnails'] = thumbnails
2278
2279     def process_video_result(self, info_dict, download=True):
2280         assert info_dict.get('_type', 'video') == 'video'
2281         self._num_videos += 1
2282
2283         if 'id' not in info_dict:
2284             raise ExtractorError('Missing "id" field in extractor result')
2285         if 'title' not in info_dict:
2286             raise ExtractorError('Missing "title" field in extractor result',
2287                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2288
2289         def report_force_conversion(field, field_not, conversion):
2290             self.report_warning(
2291                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2292                 % (field, field_not, conversion))
2293
2294         def sanitize_string_field(info, string_field):
2295             field = info.get(string_field)
2296             if field is None or isinstance(field, compat_str):
2297                 return
2298             report_force_conversion(string_field, 'a string', 'string')
2299             info[string_field] = compat_str(field)
2300
2301         def sanitize_numeric_fields(info):
2302             for numeric_field in self._NUMERIC_FIELDS:
2303                 field = info.get(numeric_field)
2304                 if field is None or isinstance(field, compat_numeric_types):
2305                     continue
2306                 report_force_conversion(numeric_field, 'numeric', 'int')
2307                 info[numeric_field] = int_or_none(field)
2308
2309         sanitize_string_field(info_dict, 'id')
2310         sanitize_numeric_fields(info_dict)
2311
2312         if 'playlist' not in info_dict:
2313             # It isn't part of a playlist
2314             info_dict['playlist'] = None
2315             info_dict['playlist_index'] = None
2316
2317         self._sanitize_thumbnails(info_dict)
2318
2319         thumbnail = info_dict.get('thumbnail')
2320         thumbnails = info_dict.get('thumbnails')
2321         if thumbnail:
2322             info_dict['thumbnail'] = sanitize_url(thumbnail)
2323         elif thumbnails:
2324             info_dict['thumbnail'] = thumbnails[-1]['url']
2325
2326         if info_dict.get('display_id') is None and 'id' in info_dict:
2327             info_dict['display_id'] = info_dict['id']
2328
2329         if info_dict.get('duration') is not None:
2330             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2331
2332         for ts_key, date_key in (
2333                 ('timestamp', 'upload_date'),
2334                 ('release_timestamp', 'release_date'),
2335                 ('modified_timestamp', 'modified_date'),
2336         ):
2337             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2338                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2339                 # see http://bugs.python.org/issue1646728)
2340                 try:
2341                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2342                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2343                 except (ValueError, OverflowError, OSError):
2344                     pass
2345
2346         live_keys = ('is_live', 'was_live')
2347         live_status = info_dict.get('live_status')
2348         if live_status is None:
2349             for key in live_keys:
2350                 if info_dict.get(key) is False:
2351                     continue
2352                 if info_dict.get(key):
2353                     live_status = key
2354                 break
2355             if all(info_dict.get(key) is False for key in live_keys):
2356                 live_status = 'not_live'
2357         if live_status:
2358             info_dict['live_status'] = live_status
2359             for key in live_keys:
2360                 if info_dict.get(key) is None:
2361                     info_dict[key] = (live_status == key)
2362
2363         # Auto generate title fields corresponding to the *_number fields when missing
2364         # in order to always have clean titles. This is very common for TV series.
2365         for field in ('chapter', 'season', 'episode'):
2366             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2367                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2368
2369         for cc_kind in ('subtitles', 'automatic_captions'):
2370             cc = info_dict.get(cc_kind)
2371             if cc:
2372                 for _, subtitle in cc.items():
2373                     for subtitle_format in subtitle:
2374                         if subtitle_format.get('url'):
2375                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2376                         if subtitle_format.get('ext') is None:
2377                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2378
2379         automatic_captions = info_dict.get('automatic_captions')
2380         subtitles = info_dict.get('subtitles')
2381
2382         info_dict['requested_subtitles'] = self.process_subtitles(
2383             info_dict['id'], subtitles, automatic_captions)
2384
2385         if info_dict.get('formats') is None:
2386             # There's only one format available
2387             formats = [info_dict]
2388         else:
2389             formats = info_dict['formats']
2390
2391         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2392         if not self.params.get('allow_unplayable_formats'):
2393             formats = [f for f in formats if not f.get('has_drm')]
2394
2395         if info_dict.get('is_live'):
2396             get_from_start = bool(self.params.get('live_from_start'))
2397             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2398             if not get_from_start:
2399                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2400
2401         # backward compatibility
2402         info_dict['fulltitle'] = info_dict['title']
2403
2404         if not formats:
2405             self.raise_no_formats(info_dict)
2406
2407         def is_wellformed(f):
2408             url = f.get('url')
2409             if not url:
2410                 self.report_warning(
2411                     '"url" field is missing or empty - skipping format, '
2412                     'there is an error in extractor')
2413                 return False
2414             if isinstance(url, bytes):
2415                 sanitize_string_field(f, 'url')
2416             return True
2417
2418         # Filter out malformed formats for better extraction robustness
2419         formats = list(filter(is_wellformed, formats))
2420
2421         formats_dict = {}
2422
2423         # We check that all the formats have the format and format_id fields
2424         for i, format in enumerate(formats):
2425             sanitize_string_field(format, 'format_id')
2426             sanitize_numeric_fields(format)
2427             format['url'] = sanitize_url(format['url'])
2428             if not format.get('format_id'):
2429                 format['format_id'] = compat_str(i)
2430             else:
2431                 # Sanitize format_id from characters used in format selector expression
2432                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2433             format_id = format['format_id']
2434             if format_id not in formats_dict:
2435                 formats_dict[format_id] = []
2436             formats_dict[format_id].append(format)
2437
2438         # Make sure all formats have unique format_id
2439         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2440         for format_id, ambiguous_formats in formats_dict.items():
2441             ambigious_id = len(ambiguous_formats) > 1
2442             for i, format in enumerate(ambiguous_formats):
2443                 if ambigious_id:
2444                     format['format_id'] = '%s-%d' % (format_id, i)
2445                 if format.get('ext') is None:
2446                     format['ext'] = determine_ext(format['url']).lower()
2447                 # Ensure there is no conflict between id and ext in format selection
2448                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2449                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2450                     format['format_id'] = 'f%s' % format['format_id']
2451
2452         for i, format in enumerate(formats):
2453             if format.get('format') is None:
2454                 format['format'] = '{id} - {res}{note}'.format(
2455                     id=format['format_id'],
2456                     res=self.format_resolution(format),
2457                     note=format_field(format, 'format_note', ' (%s)'),
2458                 )
2459             if format.get('protocol') is None:
2460                 format['protocol'] = determine_protocol(format)
2461             if format.get('resolution') is None:
2462                 format['resolution'] = self.format_resolution(format, default=None)
2463             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2464                 format['dynamic_range'] = 'SDR'
2465             if (info_dict.get('duration') and format.get('tbr')
2466                     and not format.get('filesize') and not format.get('filesize_approx')):
2467                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2468
2469             # Add HTTP headers, so that external programs can use them from the
2470             # json output
2471             full_format_info = info_dict.copy()
2472             full_format_info.update(format)
2473             format['http_headers'] = self._calc_headers(full_format_info)
2474         # Remove private housekeeping stuff
2475         if '__x_forwarded_for_ip' in info_dict:
2476             del info_dict['__x_forwarded_for_ip']
2477
2478         # TODO Central sorting goes here
2479
2480         if self.params.get('check_formats') is True:
2481             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2482
2483         if not formats or formats[0] is not info_dict:
2484             # only set the 'formats' fields if the original info_dict list them
2485             # otherwise we end up with a circular reference, the first (and unique)
2486             # element in the 'formats' field in info_dict is info_dict itself,
2487             # which can't be exported to json
2488             info_dict['formats'] = formats
2489
2490         info_dict, _ = self.pre_process(info_dict)
2491
2492         # The pre-processors may have modified the formats
2493         formats = info_dict.get('formats', [info_dict])
2494
2495         list_only = self.params.get('simulate') is None and (
2496             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2497         interactive_format_selection = not list_only and self.format_selector == '-'
2498         if self.params.get('list_thumbnails'):
2499             self.list_thumbnails(info_dict)
2500         if self.params.get('listsubtitles'):
2501             if 'automatic_captions' in info_dict:
2502                 self.list_subtitles(
2503                     info_dict['id'], automatic_captions, 'automatic captions')
2504             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2505         if self.params.get('listformats') or interactive_format_selection:
2506             self.list_formats(info_dict)
2507         if list_only:
2508             # Without this printing, -F --print-json will not work
2509             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2510             return
2511
2512         format_selector = self.format_selector
2513         if format_selector is None:
2514             req_format = self._default_format_spec(info_dict, download=download)
2515             self.write_debug('Default format spec: %s' % req_format)
2516             format_selector = self.build_format_selector(req_format)
2517
2518         while True:
2519             if interactive_format_selection:
2520                 req_format = input(
2521                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2522                 try:
2523                     format_selector = self.build_format_selector(req_format)
2524                 except SyntaxError as err:
2525                     self.report_error(err, tb=False, is_error=False)
2526                     continue
2527
2528             # While in format selection we may need to have an access to the original
2529             # format set in order to calculate some metrics or do some processing.
2530             # For now we need to be able to guess whether original formats provided
2531             # by extractor are incomplete or not (i.e. whether extractor provides only
2532             # video-only or audio-only formats) for proper formats selection for
2533             # extractors with such incomplete formats (see
2534             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2535             # Since formats may be filtered during format selection and may not match
2536             # the original formats the results may be incorrect. Thus original formats
2537             # or pre-calculated metrics should be passed to format selection routines
2538             # as well.
2539             # We will pass a context object containing all necessary additional data
2540             # instead of just formats.
2541             # This fixes incorrect format selection issue (see
2542             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2543             incomplete_formats = (
2544                 # All formats are video-only or
2545                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2546                 # all formats are audio-only
2547                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2548
2549             ctx = {
2550                 'formats': formats,
2551                 'incomplete_formats': incomplete_formats,
2552             }
2553
2554             formats_to_download = list(format_selector(ctx))
2555             if interactive_format_selection and not formats_to_download:
2556                 self.report_error('Requested format is not available', tb=False, is_error=False)
2557                 continue
2558             break
2559
2560         if not formats_to_download:
2561             if not self.params.get('ignore_no_formats_error'):
2562                 raise ExtractorError('Requested format is not available', expected=True,
2563                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2564             self.report_warning('Requested format is not available')
2565             # Process what we can, even without any available formats.
2566             formats_to_download = [{}]
2567
2568         best_format = formats_to_download[-1]
2569         if download:
2570             if best_format:
2571                 self.to_screen(
2572                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2573                     + ', '.join([f['format_id'] for f in formats_to_download]))
2574             max_downloads_reached = False
2575             for i, fmt in enumerate(formats_to_download):
2576                 formats_to_download[i] = new_info = dict(info_dict)
2577                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2578                 new_info.update(fmt)
2579                 new_info['__original_infodict'] = info_dict
2580                 try:
2581                     self.process_info(new_info)
2582                 except MaxDownloadsReached:
2583                     max_downloads_reached = True
2584                 new_info.pop('__original_infodict')
2585                 # Remove copied info
2586                 for key, val in tuple(new_info.items()):
2587                     if info_dict.get(key) == val:
2588                         new_info.pop(key)
2589                 if max_downloads_reached:
2590                     break
2591
2592             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2593             assert write_archive.issubset({True, False, 'ignore'})
2594             if True in write_archive and False not in write_archive:
2595                 self.record_download_archive(info_dict)
2596
2597             info_dict['requested_downloads'] = formats_to_download
2598             info_dict = self.run_all_pps('after_video', info_dict)
2599             if max_downloads_reached:
2600                 raise MaxDownloadsReached()
2601
2602         # We update the info dict with the selected best quality format (backwards compatibility)
2603         info_dict.update(best_format)
2604         return info_dict
2605
2606     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2607         """Select the requested subtitles and their format"""
2608         available_subs = {}
2609         if normal_subtitles and self.params.get('writesubtitles'):
2610             available_subs.update(normal_subtitles)
2611         if automatic_captions and self.params.get('writeautomaticsub'):
2612             for lang, cap_info in automatic_captions.items():
2613                 if lang not in available_subs:
2614                     available_subs[lang] = cap_info
2615
2616         if (not self.params.get('writesubtitles') and not
2617                 self.params.get('writeautomaticsub') or not
2618                 available_subs):
2619             return None
2620
2621         all_sub_langs = available_subs.keys()
2622         if self.params.get('allsubtitles', False):
2623             requested_langs = all_sub_langs
2624         elif self.params.get('subtitleslangs', False):
2625             # A list is used so that the order of languages will be the same as
2626             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2627             requested_langs = []
2628             for lang_re in self.params.get('subtitleslangs'):
2629                 if lang_re == 'all':
2630                     requested_langs.extend(all_sub_langs)
2631                     continue
2632                 discard = lang_re[0] == '-'
2633                 if discard:
2634                     lang_re = lang_re[1:]
2635                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2636                 if discard:
2637                     for lang in current_langs:
2638                         while lang in requested_langs:
2639                             requested_langs.remove(lang)
2640                 else:
2641                     requested_langs.extend(current_langs)
2642             requested_langs = orderedSet(requested_langs)
2643         elif 'en' in available_subs:
2644             requested_langs = ['en']
2645         else:
2646             requested_langs = [list(all_sub_langs)[0]]
2647         if requested_langs:
2648             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2649
2650         formats_query = self.params.get('subtitlesformat', 'best')
2651         formats_preference = formats_query.split('/') if formats_query else []
2652         subs = {}
2653         for lang in requested_langs:
2654             formats = available_subs.get(lang)
2655             if formats is None:
2656                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2657                 continue
2658             for ext in formats_preference:
2659                 if ext == 'best':
2660                     f = formats[-1]
2661                     break
2662                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2663                 if matches:
2664                     f = matches[-1]
2665                     break
2666             else:
2667                 f = formats[-1]
2668                 self.report_warning(
2669                     'No subtitle format found matching "%s" for language %s, '
2670                     'using %s' % (formats_query, lang, f['ext']))
2671             subs[lang] = f
2672         return subs
2673
2674     def _forceprint(self, tmpl, info_dict):
2675         mobj = re.match(r'\w+(=?)$', tmpl)
2676         if mobj and mobj.group(1):
2677             tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2678         elif mobj:
2679             tmpl = '%({})s'.format(tmpl)
2680
2681         info_dict = info_dict.copy()
2682         info_dict['formats_table'] = self.render_formats_table(info_dict)
2683         info_dict['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2684         info_dict['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2685         info_dict['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2686         self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2687
2688     def __forced_printings(self, info_dict, filename, incomplete):
2689         def print_mandatory(field, actual_field=None):
2690             if actual_field is None:
2691                 actual_field = field
2692             if (self.params.get('force%s' % field, False)
2693                     and (not incomplete or info_dict.get(actual_field) is not None)):
2694                 self.to_stdout(info_dict[actual_field])
2695
2696         def print_optional(field):
2697             if (self.params.get('force%s' % field, False)
2698                     and info_dict.get(field) is not None):
2699                 self.to_stdout(info_dict[field])
2700
2701         info_dict = info_dict.copy()
2702         if filename is not None:
2703             info_dict['filename'] = filename
2704         if info_dict.get('requested_formats') is not None:
2705             # For RTMP URLs, also include the playpath
2706             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2707         elif 'url' in info_dict:
2708             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2709
2710         if self.params['forceprint'].get('video') or self.params.get('forcejson'):
2711             self.post_extract(info_dict)
2712         for tmpl in self.params['forceprint'].get('video', []):
2713             self._forceprint(tmpl, info_dict)
2714
2715         print_mandatory('title')
2716         print_mandatory('id')
2717         print_mandatory('url', 'urls')
2718         print_optional('thumbnail')
2719         print_optional('description')
2720         print_optional('filename')
2721         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2722             self.to_stdout(formatSeconds(info_dict['duration']))
2723         print_mandatory('format')
2724
2725         if self.params.get('forcejson'):
2726             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2727
2728     def dl(self, name, info, subtitle=False, test=False):
2729         if not info.get('url'):
2730             self.raise_no_formats(info, True)
2731
2732         if test:
2733             verbose = self.params.get('verbose')
2734             params = {
2735                 'test': True,
2736                 'quiet': self.params.get('quiet') or not verbose,
2737                 'verbose': verbose,
2738                 'noprogress': not verbose,
2739                 'nopart': True,
2740                 'skip_unavailable_fragments': False,
2741                 'keep_fragments': False,
2742                 'overwrites': True,
2743                 '_no_ytdl_file': True,
2744             }
2745         else:
2746             params = self.params
2747         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2748         if not test:
2749             for ph in self._progress_hooks:
2750                 fd.add_progress_hook(ph)
2751             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2752             self.write_debug('Invoking downloader on "%s"' % urls)
2753
2754         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2755         # But it may contain objects that are not deep-copyable
2756         new_info = self._copy_infodict(info)
2757         if new_info.get('http_headers') is None:
2758             new_info['http_headers'] = self._calc_headers(new_info)
2759         return fd.download(name, new_info, subtitle)
2760
2761     def process_info(self, info_dict):
2762         """Process a single resolved IE result. (Modified it in-place)"""
2763
2764         assert info_dict.get('_type', 'video') == 'video'
2765         original_infodict = info_dict
2766
2767         if 'format' not in info_dict and 'ext' in info_dict:
2768             info_dict['format'] = info_dict['ext']
2769
2770         if self._match_entry(info_dict) is not None:
2771             info_dict['__write_download_archive'] = 'ignore'
2772             return
2773
2774         self.post_extract(info_dict)
2775         self._num_downloads += 1
2776
2777         # info_dict['_filename'] needs to be set for backward compatibility
2778         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2779         temp_filename = self.prepare_filename(info_dict, 'temp')
2780         files_to_move = {}
2781
2782         # Forced printings
2783         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2784
2785         if self.params.get('simulate'):
2786             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2787             return
2788
2789         if full_filename is None:
2790             return
2791         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2792             return
2793         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2794             return
2795
2796         if self._write_description('video', info_dict,
2797                                    self.prepare_filename(info_dict, 'description')) is None:
2798             return
2799
2800         sub_files = self._write_subtitles(info_dict, temp_filename)
2801         if sub_files is None:
2802             return
2803         files_to_move.update(dict(sub_files))
2804
2805         thumb_files = self._write_thumbnails(
2806             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2807         if thumb_files is None:
2808             return
2809         files_to_move.update(dict(thumb_files))
2810
2811         infofn = self.prepare_filename(info_dict, 'infojson')
2812         _infojson_written = self._write_info_json('video', info_dict, infofn)
2813         if _infojson_written:
2814             info_dict['infojson_filename'] = infofn
2815             # For backward compatibility, even though it was a private field
2816             info_dict['__infojson_filename'] = infofn
2817         elif _infojson_written is None:
2818             return
2819
2820         # Note: Annotations are deprecated
2821         annofn = None
2822         if self.params.get('writeannotations', False):
2823             annofn = self.prepare_filename(info_dict, 'annotation')
2824         if annofn:
2825             if not self._ensure_dir_exists(encodeFilename(annofn)):
2826                 return
2827             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2828                 self.to_screen('[info] Video annotations are already present')
2829             elif not info_dict.get('annotations'):
2830                 self.report_warning('There are no annotations to write.')
2831             else:
2832                 try:
2833                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2834                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2835                         annofile.write(info_dict['annotations'])
2836                 except (KeyError, TypeError):
2837                     self.report_warning('There are no annotations to write.')
2838                 except (OSError, IOError):
2839                     self.report_error('Cannot write annotations file: ' + annofn)
2840                     return
2841
2842         # Write internet shortcut files
2843         def _write_link_file(link_type):
2844             if 'webpage_url' not in info_dict:
2845                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2846                 return False
2847             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2848             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2849                 return False
2850             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2851                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2852                 return True
2853             try:
2854                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2855                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2856                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2857                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2858                     if link_type == 'desktop':
2859                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2860                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2861             except (OSError, IOError):
2862                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2863                 return False
2864             return True
2865
2866         write_links = {
2867             'url': self.params.get('writeurllink'),
2868             'webloc': self.params.get('writewebloclink'),
2869             'desktop': self.params.get('writedesktoplink'),
2870         }
2871         if self.params.get('writelink'):
2872             link_type = ('webloc' if sys.platform == 'darwin'
2873                          else 'desktop' if sys.platform.startswith('linux')
2874                          else 'url')
2875             write_links[link_type] = True
2876
2877         if any(should_write and not _write_link_file(link_type)
2878                for link_type, should_write in write_links.items()):
2879             return
2880
2881         def replace_info_dict(new_info):
2882             nonlocal info_dict
2883             if new_info == info_dict:
2884                 return
2885             info_dict.clear()
2886             info_dict.update(new_info)
2887
2888         try:
2889             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2890             replace_info_dict(new_info)
2891         except PostProcessingError as err:
2892             self.report_error('Preprocessing: %s' % str(err))
2893             return
2894
2895         if self.params.get('skip_download'):
2896             info_dict['filepath'] = temp_filename
2897             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2898             info_dict['__files_to_move'] = files_to_move
2899             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2900             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2901         else:
2902             # Download
2903             info_dict.setdefault('__postprocessors', [])
2904             try:
2905
2906                 def existing_file(*filepaths):
2907                     ext = info_dict.get('ext')
2908                     final_ext = self.params.get('final_ext', ext)
2909                     existing_files = []
2910                     for file in orderedSet(filepaths):
2911                         if final_ext != ext:
2912                             converted = replace_extension(file, final_ext, ext)
2913                             if os.path.exists(encodeFilename(converted)):
2914                                 existing_files.append(converted)
2915                         if os.path.exists(encodeFilename(file)):
2916                             existing_files.append(file)
2917
2918                     if not existing_files or self.params.get('overwrites', False):
2919                         for file in orderedSet(existing_files):
2920                             self.report_file_delete(file)
2921                             os.remove(encodeFilename(file))
2922                         return None
2923
2924                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2925                     return existing_files[0]
2926
2927                 success = True
2928                 if info_dict.get('requested_formats') is not None:
2929
2930                     def compatible_formats(formats):
2931                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2932                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2933                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2934                         if len(video_formats) > 2 or len(audio_formats) > 2:
2935                             return False
2936
2937                         # Check extension
2938                         exts = set(format.get('ext') for format in formats)
2939                         COMPATIBLE_EXTS = (
2940                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2941                             set(('webm',)),
2942                         )
2943                         for ext_sets in COMPATIBLE_EXTS:
2944                             if ext_sets.issuperset(exts):
2945                                 return True
2946                         # TODO: Check acodec/vcodec
2947                         return False
2948
2949                     requested_formats = info_dict['requested_formats']
2950                     old_ext = info_dict['ext']
2951                     if self.params.get('merge_output_format') is None:
2952                         if not compatible_formats(requested_formats):
2953                             info_dict['ext'] = 'mkv'
2954                             self.report_warning(
2955                                 'Requested formats are incompatible for merge and will be merged into mkv')
2956                         if (info_dict['ext'] == 'webm'
2957                                 and info_dict.get('thumbnails')
2958                                 # check with type instead of pp_key, __name__, or isinstance
2959                                 # since we dont want any custom PPs to trigger this
2960                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2961                             info_dict['ext'] = 'mkv'
2962                             self.report_warning(
2963                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2964                     new_ext = info_dict['ext']
2965
2966                     def correct_ext(filename, ext=new_ext):
2967                         if filename == '-':
2968                             return filename
2969                         filename_real_ext = os.path.splitext(filename)[1][1:]
2970                         filename_wo_ext = (
2971                             os.path.splitext(filename)[0]
2972                             if filename_real_ext in (old_ext, new_ext)
2973                             else filename)
2974                         return '%s.%s' % (filename_wo_ext, ext)
2975
2976                     # Ensure filename always has a correct extension for successful merge
2977                     full_filename = correct_ext(full_filename)
2978                     temp_filename = correct_ext(temp_filename)
2979                     dl_filename = existing_file(full_filename, temp_filename)
2980                     info_dict['__real_download'] = False
2981
2982                     downloaded = []
2983                     merger = FFmpegMergerPP(self)
2984
2985                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2986                     if dl_filename is not None:
2987                         self.report_file_already_downloaded(dl_filename)
2988                     elif fd:
2989                         for f in requested_formats if fd != FFmpegFD else []:
2990                             f['filepath'] = fname = prepend_extension(
2991                                 correct_ext(temp_filename, info_dict['ext']),
2992                                 'f%s' % f['format_id'], info_dict['ext'])
2993                             downloaded.append(fname)
2994                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2995                         success, real_download = self.dl(temp_filename, info_dict)
2996                         info_dict['__real_download'] = real_download
2997                     else:
2998                         if self.params.get('allow_unplayable_formats'):
2999                             self.report_warning(
3000                                 'You have requested merging of multiple formats '
3001                                 'while also allowing unplayable formats to be downloaded. '
3002                                 'The formats won\'t be merged to prevent data corruption.')
3003                         elif not merger.available:
3004                             self.report_warning(
3005                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
3006                                 'The formats won\'t be merged.')
3007
3008                         if temp_filename == '-':
3009                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3010                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3011                                       else 'but ffmpeg is not installed')
3012                             self.report_warning(
3013                                 f'You have requested downloading multiple formats to stdout {reason}. '
3014                                 'The formats will be streamed one after the other')
3015                             fname = temp_filename
3016                         for f in requested_formats:
3017                             new_info = dict(info_dict)
3018                             del new_info['requested_formats']
3019                             new_info.update(f)
3020                             if temp_filename != '-':
3021                                 fname = prepend_extension(
3022                                     correct_ext(temp_filename, new_info['ext']),
3023                                     'f%s' % f['format_id'], new_info['ext'])
3024                                 if not self._ensure_dir_exists(fname):
3025                                     return
3026                                 f['filepath'] = fname
3027                                 downloaded.append(fname)
3028                             partial_success, real_download = self.dl(fname, new_info)
3029                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3030                             success = success and partial_success
3031
3032                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3033                         info_dict['__postprocessors'].append(merger)
3034                         info_dict['__files_to_merge'] = downloaded
3035                         # Even if there were no downloads, it is being merged only now
3036                         info_dict['__real_download'] = True
3037                     else:
3038                         for file in downloaded:
3039                             files_to_move[file] = None
3040                 else:
3041                     # Just a single file
3042                     dl_filename = existing_file(full_filename, temp_filename)
3043                     if dl_filename is None or dl_filename == temp_filename:
3044                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3045                         # So we should try to resume the download
3046                         success, real_download = self.dl(temp_filename, info_dict)
3047                         info_dict['__real_download'] = real_download
3048                     else:
3049                         self.report_file_already_downloaded(dl_filename)
3050
3051                 dl_filename = dl_filename or temp_filename
3052                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3053
3054             except network_exceptions as err:
3055                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3056                 return
3057             except (OSError, IOError) as err:
3058                 raise UnavailableVideoError(err)
3059             except (ContentTooShortError, ) as err:
3060                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3061                 return
3062
3063             if success and full_filename != '-':
3064
3065                 def fixup():
3066                     do_fixup = True
3067                     fixup_policy = self.params.get('fixup')
3068                     vid = info_dict['id']
3069
3070                     if fixup_policy in ('ignore', 'never'):
3071                         return
3072                     elif fixup_policy == 'warn':
3073                         do_fixup = False
3074                     elif fixup_policy != 'force':
3075                         assert fixup_policy in ('detect_or_warn', None)
3076                         if not info_dict.get('__real_download'):
3077                             do_fixup = False
3078
3079                     def ffmpeg_fixup(cndn, msg, cls):
3080                         if not cndn:
3081                             return
3082                         if not do_fixup:
3083                             self.report_warning(f'{vid}: {msg}')
3084                             return
3085                         pp = cls(self)
3086                         if pp.available:
3087                             info_dict['__postprocessors'].append(pp)
3088                         else:
3089                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3090
3091                     stretched_ratio = info_dict.get('stretched_ratio')
3092                     ffmpeg_fixup(
3093                         stretched_ratio not in (1, None),
3094                         f'Non-uniform pixel ratio {stretched_ratio}',
3095                         FFmpegFixupStretchedPP)
3096
3097                     ffmpeg_fixup(
3098                         (info_dict.get('requested_formats') is None
3099                          and info_dict.get('container') == 'm4a_dash'
3100                          and info_dict.get('ext') == 'm4a'),
3101                         'writing DASH m4a. Only some players support this container',
3102                         FFmpegFixupM4aPP)
3103
3104                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3105                     downloader = downloader.__name__ if downloader else None
3106
3107                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3108                         ffmpeg_fixup(downloader == 'HlsFD',
3109                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3110                                      FFmpegFixupM3u8PP)
3111                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3112                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3113
3114                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3115                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3116
3117                 fixup()
3118                 try:
3119                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3120                 except PostProcessingError as err:
3121                     self.report_error('Postprocessing: %s' % str(err))
3122                     return
3123                 try:
3124                     for ph in self._post_hooks:
3125                         ph(info_dict['filepath'])
3126                 except Exception as err:
3127                     self.report_error('post hooks: %s' % str(err))
3128                     return
3129                 info_dict['__write_download_archive'] = True
3130
3131         if self.params.get('force_write_download_archive'):
3132             info_dict['__write_download_archive'] = True
3133
3134         # Make sure the info_dict was modified in-place
3135         assert info_dict is original_infodict
3136
3137         max_downloads = self.params.get('max_downloads')
3138         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3139             raise MaxDownloadsReached()
3140
3141     def __download_wrapper(self, func):
3142         @functools.wraps(func)
3143         def wrapper(*args, **kwargs):
3144             try:
3145                 res = func(*args, **kwargs)
3146             except UnavailableVideoError as e:
3147                 self.report_error(e)
3148             except MaxDownloadsReached as e:
3149                 self.to_screen(f'[info] {e}')
3150                 raise
3151             except DownloadCancelled as e:
3152                 self.to_screen(f'[info] {e}')
3153                 if not self.params.get('break_per_url'):
3154                     raise
3155             else:
3156                 if self.params.get('dump_single_json', False):
3157                     self.post_extract(res)
3158                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3159         return wrapper
3160
3161     def download(self, url_list):
3162         """Download a given list of URLs."""
3163         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3164         outtmpl = self.outtmpl_dict['default']
3165         if (len(url_list) > 1
3166                 and outtmpl != '-'
3167                 and '%' not in outtmpl
3168                 and self.params.get('max_downloads') != 1):
3169             raise SameFileError(outtmpl)
3170
3171         for url in url_list:
3172             self.__download_wrapper(self.extract_info)(
3173                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3174
3175         return self._download_retcode
3176
3177     def download_with_info_file(self, info_filename):
3178         with contextlib.closing(fileinput.FileInput(
3179                 [info_filename], mode='r',
3180                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3181             # FileInput doesn't have a read method, we can't call json.load
3182             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3183         try:
3184             self.__download_wrapper(self.process_ie_result)(info, download=True)
3185         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3186             if not isinstance(e, EntryNotInPlaylist):
3187                 self.to_stderr('\r')
3188             webpage_url = info.get('webpage_url')
3189             if webpage_url is not None:
3190                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3191                 return self.download([webpage_url])
3192             else:
3193                 raise
3194         return self._download_retcode
3195
3196     @staticmethod
3197     def sanitize_info(info_dict, remove_private_keys=False):
3198         ''' Sanitize the infodict for converting to json '''
3199         if info_dict is None:
3200             return info_dict
3201         info_dict.setdefault('epoch', int(time.time()))
3202         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3203         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3204         if remove_private_keys:
3205             remove_keys |= {
3206                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3207                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3208             }
3209             reject = lambda k, v: k not in keep_keys and (
3210                 k.startswith('_') or k in remove_keys or v is None)
3211         else:
3212             reject = lambda k, v: k in remove_keys
3213
3214         def filter_fn(obj):
3215             if isinstance(obj, dict):
3216                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3217             elif isinstance(obj, (list, tuple, set, LazyList)):
3218                 return list(map(filter_fn, obj))
3219             elif obj is None or isinstance(obj, (str, int, float, bool)):
3220                 return obj
3221             else:
3222                 return repr(obj)
3223
3224         return filter_fn(info_dict)
3225
3226     @staticmethod
3227     def filter_requested_info(info_dict, actually_filter=True):
3228         ''' Alias of sanitize_info for backward compatibility '''
3229         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3230
3231     @staticmethod
3232     def post_extract(info_dict):
3233         def actual_post_extract(info_dict):
3234             if info_dict.get('_type') in ('playlist', 'multi_video'):
3235                 for video_dict in info_dict.get('entries', {}):
3236                     actual_post_extract(video_dict or {})
3237                 return
3238
3239             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3240             extra = post_extractor().items()
3241             info_dict.update(extra)
3242             info_dict.pop('__post_extractor', None)
3243
3244             original_infodict = info_dict.get('__original_infodict') or {}
3245             original_infodict.update(extra)
3246             original_infodict.pop('__post_extractor', None)
3247
3248         actual_post_extract(info_dict or {})
3249
3250     def run_pp(self, pp, infodict):
3251         files_to_delete = []
3252         if '__files_to_move' not in infodict:
3253             infodict['__files_to_move'] = {}
3254         try:
3255             files_to_delete, infodict = pp.run(infodict)
3256         except PostProcessingError as e:
3257             # Must be True and not 'only_download'
3258             if self.params.get('ignoreerrors') is True:
3259                 self.report_error(e)
3260                 return infodict
3261             raise
3262
3263         if not files_to_delete:
3264             return infodict
3265         if self.params.get('keepvideo', False):
3266             for f in files_to_delete:
3267                 infodict['__files_to_move'].setdefault(f, '')
3268         else:
3269             for old_filename in set(files_to_delete):
3270                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3271                 try:
3272                     os.remove(encodeFilename(old_filename))
3273                 except (IOError, OSError):
3274                     self.report_warning('Unable to remove downloaded original file')
3275                 if old_filename in infodict['__files_to_move']:
3276                     del infodict['__files_to_move'][old_filename]
3277         return infodict
3278
3279     def run_all_pps(self, key, info, *, additional_pps=None):
3280         for tmpl in self.params['forceprint'].get(key, []):
3281             self._forceprint(tmpl, info)
3282         for pp in (additional_pps or []) + self._pps[key]:
3283             info = self.run_pp(info)
3284         return info
3285
3286     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3287         info = dict(ie_info)
3288         info['__files_to_move'] = files_to_move or {}
3289         info = self.run_all_pps(key, info)
3290         return info, info.pop('__files_to_move', None)
3291
3292     def post_process(self, filename, info, files_to_move=None):
3293         """Run all the postprocessors on the given file."""
3294         info['filepath'] = filename
3295         info['__files_to_move'] = files_to_move or {}
3296         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3297         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3298         del info['__files_to_move']
3299         return self.run_all_pps('after_move', info)
3300
3301     def _make_archive_id(self, info_dict):
3302         video_id = info_dict.get('id')
3303         if not video_id:
3304             return
3305         # Future-proof against any change in case
3306         # and backwards compatibility with prior versions
3307         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3308         if extractor is None:
3309             url = str_or_none(info_dict.get('url'))
3310             if not url:
3311                 return
3312             # Try to find matching extractor for the URL and take its ie_key
3313             for ie_key, ie in self._ies.items():
3314                 if ie.suitable(url):
3315                     extractor = ie_key
3316                     break
3317             else:
3318                 return
3319         return '%s %s' % (extractor.lower(), video_id)
3320
3321     def in_download_archive(self, info_dict):
3322         fn = self.params.get('download_archive')
3323         if fn is None:
3324             return False
3325
3326         vid_id = self._make_archive_id(info_dict)
3327         if not vid_id:
3328             return False  # Incomplete video information
3329
3330         return vid_id in self.archive
3331
3332     def record_download_archive(self, info_dict):
3333         fn = self.params.get('download_archive')
3334         if fn is None:
3335             return
3336         vid_id = self._make_archive_id(info_dict)
3337         assert vid_id
3338         self.write_debug(f'Adding to archive: {vid_id}')
3339         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3340             archive_file.write(vid_id + '\n')
3341         self.archive.add(vid_id)
3342
3343     @staticmethod
3344     def format_resolution(format, default='unknown'):
3345         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3346             return 'audio only'
3347         if format.get('resolution') is not None:
3348             return format['resolution']
3349         if format.get('width') and format.get('height'):
3350             return '%dx%d' % (format['width'], format['height'])
3351         elif format.get('height'):
3352             return '%sp' % format['height']
3353         elif format.get('width'):
3354             return '%dx?' % format['width']
3355         return default
3356
3357     def _list_format_headers(self, *headers):
3358         if self.params.get('listformats_table', True) is not False:
3359             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3360         return headers
3361
3362     def _format_note(self, fdict):
3363         res = ''
3364         if fdict.get('ext') in ['f4f', 'f4m']:
3365             res += '(unsupported)'
3366         if fdict.get('language'):
3367             if res:
3368                 res += ' '
3369             res += '[%s]' % fdict['language']
3370         if fdict.get('format_note') is not None:
3371             if res:
3372                 res += ' '
3373             res += fdict['format_note']
3374         if fdict.get('tbr') is not None:
3375             if res:
3376                 res += ', '
3377             res += '%4dk' % fdict['tbr']
3378         if fdict.get('container') is not None:
3379             if res:
3380                 res += ', '
3381             res += '%s container' % fdict['container']
3382         if (fdict.get('vcodec') is not None
3383                 and fdict.get('vcodec') != 'none'):
3384             if res:
3385                 res += ', '
3386             res += fdict['vcodec']
3387             if fdict.get('vbr') is not None:
3388                 res += '@'
3389         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3390             res += 'video@'
3391         if fdict.get('vbr') is not None:
3392             res += '%4dk' % fdict['vbr']
3393         if fdict.get('fps') is not None:
3394             if res:
3395                 res += ', '
3396             res += '%sfps' % fdict['fps']
3397         if fdict.get('acodec') is not None:
3398             if res:
3399                 res += ', '
3400             if fdict['acodec'] == 'none':
3401                 res += 'video only'
3402             else:
3403                 res += '%-5s' % fdict['acodec']
3404         elif fdict.get('abr') is not None:
3405             if res:
3406                 res += ', '
3407             res += 'audio'
3408         if fdict.get('abr') is not None:
3409             res += '@%3dk' % fdict['abr']
3410         if fdict.get('asr') is not None:
3411             res += ' (%5dHz)' % fdict['asr']
3412         if fdict.get('filesize') is not None:
3413             if res:
3414                 res += ', '
3415             res += format_bytes(fdict['filesize'])
3416         elif fdict.get('filesize_approx') is not None:
3417             if res:
3418                 res += ', '
3419             res += '~' + format_bytes(fdict['filesize_approx'])
3420         return res
3421
3422     def render_formats_table(self, info_dict):
3423         if not info_dict.get('formats') and not info_dict.get('url'):
3424             return None
3425
3426         formats = info_dict.get('formats', [info_dict])
3427         if not self.params.get('listformats_table', True) is not False:
3428             table = [
3429                 [
3430                     format_field(f, 'format_id'),
3431                     format_field(f, 'ext'),
3432                     self.format_resolution(f),
3433                     self._format_note(f)
3434                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3435             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3436
3437         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3438         table = [
3439             [
3440                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3441                 format_field(f, 'ext'),
3442                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3443                 format_field(f, 'fps', '\t%d'),
3444                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3445                 delim,
3446                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3447                 format_field(f, 'tbr', '\t%dk'),
3448                 shorten_protocol_name(f.get('protocol', '')),
3449                 delim,
3450                 format_field(f, 'vcodec', default='unknown').replace(
3451                     'none', 'images' if f.get('acodec') == 'none'
3452                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3453                 format_field(f, 'vbr', '\t%dk'),
3454                 format_field(f, 'acodec', default='unknown').replace(
3455                     'none', '' if f.get('vcodec') == 'none'
3456                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3457                 format_field(f, 'abr', '\t%dk'),
3458                 format_field(f, 'asr', '\t%dHz'),
3459                 join_nonempty(
3460                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3461                     format_field(f, 'language', '[%s]'),
3462                     join_nonempty(format_field(f, 'format_note'),
3463                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3464                                   delim=', '),
3465                     delim=' '),
3466             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3467         header_line = self._list_format_headers(
3468             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3469             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3470
3471         return render_table(
3472             header_line, table, hide_empty=True,
3473             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3474
3475     def render_thumbnails_table(self, info_dict):
3476         thumbnails = list(info_dict.get('thumbnails'))
3477         if not thumbnails:
3478             return None
3479         return render_table(
3480             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3481             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3482
3483     def render_subtitles_table(self, video_id, subtitles):
3484         def _row(lang, formats):
3485             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3486             if len(set(names)) == 1:
3487                 names = [] if names[0] == 'unknown' else names[:1]
3488             return [lang, ', '.join(names), ', '.join(exts)]
3489
3490         if not subtitles:
3491             return None
3492         return render_table(
3493             self._list_format_headers('Language', 'Name', 'Formats'),
3494             [_row(lang, formats) for lang, formats in subtitles.items()],
3495             hide_empty=True)
3496
3497     def __list_table(self, video_id, name, func, *args):
3498         table = func(*args)
3499         if not table:
3500             self.to_screen(f'{video_id} has no {name}')
3501             return
3502         self.to_screen(f'[info] Available {name} for {video_id}:')
3503         self.to_stdout(table)
3504
3505     def list_formats(self, info_dict):
3506         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3507
3508     def list_thumbnails(self, info_dict):
3509         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3510
3511     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3512         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3513
3514     def urlopen(self, req):
3515         """ Start an HTTP download """
3516         if isinstance(req, compat_basestring):
3517             req = sanitized_Request(req)
3518         return self._opener.open(req, timeout=self._socket_timeout)
3519
3520     def print_debug_header(self):
3521         if not self.params.get('verbose'):
3522             return
3523
3524         def get_encoding(stream):
3525             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3526             if not supports_terminal_sequences(stream):
3527                 from .compat import WINDOWS_VT_MODE
3528                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3529             return ret
3530
3531         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3532             locale.getpreferredencoding(),
3533             sys.getfilesystemencoding(),
3534             get_encoding(self._screen_file), get_encoding(self._err_file),
3535             self.get_encoding())
3536
3537         logger = self.params.get('logger')
3538         if logger:
3539             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3540             write_debug(encoding_str)
3541         else:
3542             write_string(f'[debug] {encoding_str}\n', encoding=None)
3543             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3544
3545         source = detect_variant()
3546         write_debug(join_nonempty(
3547             'yt-dlp version', __version__,
3548             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3549             '' if source == 'unknown' else f'({source})',
3550             delim=' '))
3551         if not _LAZY_LOADER:
3552             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3553                 write_debug('Lazy loading extractors is forcibly disabled')
3554             else:
3555                 write_debug('Lazy loading extractors is disabled')
3556         if plugin_extractors or plugin_postprocessors:
3557             write_debug('Plugins: %s' % [
3558                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3559                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3560         if self.params.get('compat_opts'):
3561             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3562
3563         if source == 'source':
3564             try:
3565                 sp = Popen(
3566                     ['git', 'rev-parse', '--short', 'HEAD'],
3567                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3568                     cwd=os.path.dirname(os.path.abspath(__file__)))
3569                 out, err = sp.communicate_or_kill()
3570                 out = out.decode().strip()
3571                 if re.match('[0-9a-f]+', out):
3572                     write_debug('Git HEAD: %s' % out)
3573             except Exception:
3574                 try:
3575                     sys.exc_clear()
3576                 except Exception:
3577                     pass
3578
3579         def python_implementation():
3580             impl_name = platform.python_implementation()
3581             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3582                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3583             return impl_name
3584
3585         write_debug('Python version %s (%s %s) - %s' % (
3586             platform.python_version(),
3587             python_implementation(),
3588             platform.architecture()[0],
3589             platform_name()))
3590
3591         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3592         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3593         if ffmpeg_features:
3594             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3595
3596         exe_versions['rtmpdump'] = rtmpdump_version()
3597         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3598         exe_str = ', '.join(
3599             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3600         ) or 'none'
3601         write_debug('exe versions: %s' % exe_str)
3602
3603         from .downloader.websocket import has_websockets
3604         from .postprocessor.embedthumbnail import has_mutagen
3605         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3606
3607         lib_str = join_nonempty(
3608             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3609             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3610             has_mutagen and 'mutagen',
3611             SQLITE_AVAILABLE and 'sqlite',
3612             has_websockets and 'websockets',
3613             delim=', ') or 'none'
3614         write_debug('Optional libraries: %s' % lib_str)
3615
3616         proxy_map = {}
3617         for handler in self._opener.handlers:
3618             if hasattr(handler, 'proxies'):
3619                 proxy_map.update(handler.proxies)
3620         write_debug(f'Proxy map: {proxy_map}')
3621
3622         # Not implemented
3623         if False and self.params.get('call_home'):
3624             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3625             write_debug('Public IP address: %s' % ipaddr)
3626             latest_version = self.urlopen(
3627                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3628             if version_tuple(latest_version) > version_tuple(__version__):
3629                 self.report_warning(
3630                     'You are using an outdated version (newest version: %s)! '
3631                     'See https://yt-dl.org/update if you need help updating.' %
3632                     latest_version)
3633
3634     def _setup_opener(self):
3635         timeout_val = self.params.get('socket_timeout')
3636         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3637
3638         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3639         opts_cookiefile = self.params.get('cookiefile')
3640         opts_proxy = self.params.get('proxy')
3641
3642         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3643
3644         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3645         if opts_proxy is not None:
3646             if opts_proxy == '':
3647                 proxies = {}
3648             else:
3649                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3650         else:
3651             proxies = compat_urllib_request.getproxies()
3652             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3653             if 'http' in proxies and 'https' not in proxies:
3654                 proxies['https'] = proxies['http']
3655         proxy_handler = PerRequestProxyHandler(proxies)
3656
3657         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3658         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3659         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3660         redirect_handler = YoutubeDLRedirectHandler()
3661         data_handler = compat_urllib_request_DataHandler()
3662
3663         # When passing our own FileHandler instance, build_opener won't add the
3664         # default FileHandler and allows us to disable the file protocol, which
3665         # can be used for malicious purposes (see
3666         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3667         file_handler = compat_urllib_request.FileHandler()
3668
3669         def file_open(*args, **kwargs):
3670             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3671         file_handler.file_open = file_open
3672
3673         opener = compat_urllib_request.build_opener(
3674             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3675
3676         # Delete the default user-agent header, which would otherwise apply in
3677         # cases where our custom HTTP handler doesn't come into play
3678         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3679         opener.addheaders = []
3680         self._opener = opener
3681
3682     def encode(self, s):
3683         if isinstance(s, bytes):
3684             return s  # Already encoded
3685
3686         try:
3687             return s.encode(self.get_encoding())
3688         except UnicodeEncodeError as err:
3689             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3690             raise
3691
3692     def get_encoding(self):
3693         encoding = self.params.get('encoding')
3694         if encoding is None:
3695             encoding = preferredencoding()
3696         return encoding
3697
3698     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3699         ''' Write infojson and returns True = written, False = skip, None = error '''
3700         if overwrite is None:
3701             overwrite = self.params.get('overwrites', True)
3702         if not self.params.get('writeinfojson'):
3703             return False
3704         elif not infofn:
3705             self.write_debug(f'Skipping writing {label} infojson')
3706             return False
3707         elif not self._ensure_dir_exists(infofn):
3708             return None
3709         elif not overwrite and os.path.exists(infofn):
3710             self.to_screen(f'[info] {label.title()} metadata is already present')
3711         else:
3712             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3713             try:
3714                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3715             except (OSError, IOError):
3716                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3717                 return None
3718         return True
3719
3720     def _write_description(self, label, ie_result, descfn):
3721         ''' Write description and returns True = written, False = skip, None = error '''
3722         if not self.params.get('writedescription'):
3723             return False
3724         elif not descfn:
3725             self.write_debug(f'Skipping writing {label} description')
3726             return False
3727         elif not self._ensure_dir_exists(descfn):
3728             return None
3729         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3730             self.to_screen(f'[info] {label.title()} description is already present')
3731         elif ie_result.get('description') is None:
3732             self.report_warning(f'There\'s no {label} description to write')
3733             return False
3734         else:
3735             try:
3736                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3737                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3738                     descfile.write(ie_result['description'])
3739             except (OSError, IOError):
3740                 self.report_error(f'Cannot write {label} description file {descfn}')
3741                 return None
3742         return True
3743
3744     def _write_subtitles(self, info_dict, filename):
3745         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3746         ret = []
3747         subtitles = info_dict.get('requested_subtitles')
3748         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3749             # subtitles download errors are already managed as troubles in relevant IE
3750             # that way it will silently go on when used with unsupporting IE
3751             return ret
3752
3753         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3754         if not sub_filename_base:
3755             self.to_screen('[info] Skipping writing video subtitles')
3756             return ret
3757         for sub_lang, sub_info in subtitles.items():
3758             sub_format = sub_info['ext']
3759             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3760             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3761             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3762                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3763                 sub_info['filepath'] = sub_filename
3764                 ret.append((sub_filename, sub_filename_final))
3765                 continue
3766
3767             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3768             if sub_info.get('data') is not None:
3769                 try:
3770                     # Use newline='' to prevent conversion of newline characters
3771                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3772                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3773                         subfile.write(sub_info['data'])
3774                     sub_info['filepath'] = sub_filename
3775                     ret.append((sub_filename, sub_filename_final))
3776                     continue
3777                 except (OSError, IOError):
3778                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3779                     return None
3780
3781             try:
3782                 sub_copy = sub_info.copy()
3783                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3784                 self.dl(sub_filename, sub_copy, subtitle=True)
3785                 sub_info['filepath'] = sub_filename
3786                 ret.append((sub_filename, sub_filename_final))
3787             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3788                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3789                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3790                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3791         return ret
3792
3793     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3794         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3795         write_all = self.params.get('write_all_thumbnails', False)
3796         thumbnails, ret = [], []
3797         if write_all or self.params.get('writethumbnail', False):
3798             thumbnails = info_dict.get('thumbnails') or []
3799         multiple = write_all and len(thumbnails) > 1
3800
3801         if thumb_filename_base is None:
3802             thumb_filename_base = filename
3803         if thumbnails and not thumb_filename_base:
3804             self.write_debug(f'Skipping writing {label} thumbnail')
3805             return ret
3806
3807         for idx, t in list(enumerate(thumbnails))[::-1]:
3808             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3809             thumb_display_id = f'{label} thumbnail {t["id"]}'
3810             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3811             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3812
3813             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3814                 ret.append((thumb_filename, thumb_filename_final))
3815                 t['filepath'] = thumb_filename
3816                 self.to_screen('[info] %s is already present' % (
3817                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3818             else:
3819                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3820                 try:
3821                     uf = self.urlopen(t['url'])
3822                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3823                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3824                         shutil.copyfileobj(uf, thumbf)
3825                     ret.append((thumb_filename, thumb_filename_final))
3826                     t['filepath'] = thumb_filename
3827                 except network_exceptions as err:
3828                     thumbnails.pop(idx)
3829                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3830             if ret and not write_all:
3831                 break
3832         return ret