yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     join_nonempty,
  79     LazyList,
  80     LINK_TEMPLATES,
  81     locked_file,
  82     make_dir,
  83     make_HTTPS_handler,
  84     MaxDownloadsReached,
  85     network_exceptions,
  86     number_of_digits,
  87     orderedSet,
  88     OUTTMPL_TYPES,
  89     PagedList,
  90     parse_filesize,
  91     PerRequestProxyHandler,
  92     platform_name,
  93     Popen,
  94     POSTPROCESS_WHEN,
  95     PostProcessingError,
  96     preferredencoding,
  97     prepend_extension,
  98     ReExtractInfo,
  99     register_socks_protocols,
 100     RejectedVideoReached,
 101     remove_terminal_sequences,
 102     render_table,
 103     replace_extension,
 104     SameFileError,
 105     sanitize_filename,
 106     sanitize_path,
 107     sanitize_url,
 108     sanitized_Request,
 109     std_headers,
 110     STR_FORMAT_RE_TMPL,
 111     STR_FORMAT_TYPES,
 112     str_or_none,
 113     strftime_or_none,
 114     subtitles_filename,
 115     supports_terminal_sequences,
 116     timetuple_from_msec,
 117     to_high_limit_path,
 118     traverse_obj,
 119     try_get,
 120     UnavailableVideoError,
 121     url_basename,
 122     variadic,
 123     version_tuple,
 124     write_json_file,
 125     write_string,
 126     YoutubeDLCookieProcessor,
 127     YoutubeDLHandler,
 128     YoutubeDLRedirectHandler,
 129 )
 130 from .cache import Cache
 131 from .minicurses import format_text
 132 from .extractor import (
 133     gen_extractor_classes,
 134     get_info_extractor,
 135     _LAZY_LOADER,
 136     _PLUGIN_CLASSES as plugin_extractors
 137 )
 138 from .extractor.openload import PhantomJSwrapper
 139 from .downloader import (
 140     FFmpegFD,
 141     get_suitable_downloader,
 142     shorten_protocol_name
 143 )
 144 from .downloader.rtmp import rtmpdump_version
 145 from .postprocessor import (
 146     get_postprocessor,
 147     EmbedThumbnailPP,
 148     FFmpegFixupDuplicateMoovPP,
 149     FFmpegFixupDurationPP,
 150     FFmpegFixupM3u8PP,
 151     FFmpegFixupM4aPP,
 152     FFmpegFixupStretchedPP,
 153     FFmpegFixupTimestampPP,
 154     FFmpegMergerPP,
 155     FFmpegPostProcessor,
 156     MoveFilesAfterDownloadPP,
 157     _PLUGIN_CLASSES as plugin_postprocessors
 158 )
 159 from .update import detect_variant
 160 from .version import __version__, RELEASE_GIT_HEAD
 161
 162 if compat_os_name == 'nt':
 163     import ctypes
 164
 165
 166 class YoutubeDL(object):
 167     """YoutubeDL class.
 168
 169     YoutubeDL objects are the ones responsible of downloading the
 170     actual video file and writing it to disk if the user has requested
 171     it, among some other tasks. In most cases there should be one per
 172     program. As, given a video URL, the downloader doesn't know how to
 173     extract all the needed information, task that InfoExtractors do, it
 174     has to pass the URL to one of them.
 175
 176     For this, YoutubeDL objects have a method that allows
 177     InfoExtractors to be registered in a given order. When it is passed
 178     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 179     finds that reports being able to handle it. The InfoExtractor extracts
 180     all the information about the video or videos the URL refers to, and
 181     YoutubeDL process the extracted information, possibly using a File
 182     Downloader to download the video.
 183
 184     YoutubeDL objects accept a lot of parameters. In order not to saturate
 185     the object constructor with arguments, it receives a dictionary of
 186     options instead. These options are available through the params
 187     attribute for the InfoExtractors to use. The YoutubeDL also
 188     registers itself as the downloader in charge for the InfoExtractors
 189     that are added to it, so this is a "mutual registration".
 190
 191     Available options:
 192
 193     username:          Username for authentication purposes.
 194     password:          Password for authentication purposes.
 195     videopassword:     Password for accessing a video.
 196     ap_mso:            Adobe Pass multiple-system operator identifier.
 197     ap_username:       Multiple-system operator account username.
 198     ap_password:       Multiple-system operator account password.
 199     usenetrc:          Use netrc for authentication instead.
 200     verbose:           Print additional info to stdout.
 201     quiet:             Do not print messages to stdout.
 202     no_warnings:       Do not print out anything for warnings.
 203     forceprint:        A dict with keys video/playlist mapped to
 204                        a list of templates to force print to stdout
 205                        For compatibility, a single list is also accepted
 206     forceurl:          Force printing final URL. (Deprecated)
 207     forcetitle:        Force printing title. (Deprecated)
 208     forceid:           Force printing ID. (Deprecated)
 209     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 210     forcedescription:  Force printing description. (Deprecated)
 211     forcefilename:     Force printing final filename. (Deprecated)
 212     forceduration:     Force printing duration. (Deprecated)
 213     forcejson:         Force printing info_dict as JSON.
 214     dump_single_json:  Force printing the info_dict of the whole playlist
 215                        (or video) as a single JSON line.
 216     force_write_download_archive: Force writing download archive regardless
 217                        of 'skip_download' or 'simulate'.
 218     simulate:          Do not download the video files. If unset (or None),
 219                        simulate only if listsubtitles, listformats or list_thumbnails is used
 220     format:            Video format code. see "FORMAT SELECTION" for more details.
 221                        You can also pass a function. The function takes 'ctx' as
 222                        argument and returns the formats to download.
 223                        See "build_format_selector" for an implementation
 224     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 225     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 226                        extracting metadata even if the video is not actually
 227                        available for download (experimental)
 228     format_sort:       A list of fields by which to sort the video formats.
 229                        See "Sorting Formats" for more details.
 230     format_sort_force: Force the given format_sort. see "Sorting Formats"
 231                        for more details.
 232     allow_multiple_video_streams:   Allow multiple video streams to be merged
 233                        into a single file
 234     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 235                        into a single file
 236     check_formats      Whether to test if the formats are downloadable.
 237                        Can be True (check all), False (check none),
 238                        'selected' (check selected formats),
 239                        or None (check only if requested by extractor)
 240     paths:             Dictionary of output paths. The allowed keys are 'home'
 241                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 242     outtmpl:           Dictionary of templates for output names. Allowed keys
 243                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 244                        For compatibility with youtube-dl, a single string can also be used
 245     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 246     restrictfilenames: Do not allow "&" and spaces in file names
 247     trim_file_name:    Limit length of filename (extension excluded)
 248     windowsfilenames:  Force the filenames to be windows compatible
 249     ignoreerrors:      Do not stop on download/postprocessing errors.
 250                        Can be 'only_download' to ignore only download errors.
 251                        Default is 'only_download' for CLI, but False for API
 252     skip_playlist_after_errors: Number of allowed failures until the rest of
 253                        the playlist is skipped
 254     force_generic_extractor: Force downloader to use the generic extractor
 255     overwrites:        Overwrite all video and metadata files if True,
 256                        overwrite only non-video files if None
 257                        and don't overwrite any file if False
 258                        For compatibility with youtube-dl,
 259                        "nooverwrites" may also be used instead
 260     playliststart:     Playlist item to start at.
 261     playlistend:       Playlist item to end at.
 262     playlist_items:    Specific indices of playlist to download.
 263     playlistreverse:   Download playlist items in reverse order.
 264     playlistrandom:    Download playlist items in random order.
 265     matchtitle:        Download only matching titles.
 266     rejecttitle:       Reject downloads for matching titles.
 267     logger:            Log messages to a logging.Logger instance.
 268     logtostderr:       Log messages to stderr instead of stdout.
 269     consoletitle:       Display progress in console window's titlebar.
 270     writedescription:  Write the video description to a .description file
 271     writeinfojson:     Write the video description to a .info.json file
 272     clean_infojson:    Remove private fields from the infojson
 273     getcomments:       Extract video comments. This will not be written to disk
 274                        unless writeinfojson is also given
 275     writeannotations:  Write the video annotations to a .annotations.xml file
 276     writethumbnail:    Write the thumbnail image to a file
 277     allow_playlist_files: Whether to write playlists' description, infojson etc
 278                        also to disk when using the 'write*' options
 279     write_all_thumbnails:  Write all thumbnail formats to files
 280     writelink:         Write an internet shortcut file, depending on the
 281                        current platform (.url/.webloc/.desktop)
 282     writeurllink:      Write a Windows internet shortcut file (.url)
 283     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 284     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 285     writesubtitles:    Write the video subtitles to a file
 286     writeautomaticsub: Write the automatically generated subtitles to a file
 287     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 288                        Downloads all the subtitles of the video
 289                        (requires writesubtitles or writeautomaticsub)
 290     listsubtitles:     Lists all available subtitles for the video
 291     subtitlesformat:   The format code for subtitles
 292     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 293                        The list may contain "all" to refer to all the available
 294                        subtitles. The language can be prefixed with a "-" to
 295                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 296     keepvideo:         Keep the video file after post-processing
 297     daterange:         A DateRange object, download only if the upload_date is in the range.
 298     skip_download:     Skip the actual download of the video file
 299     cachedir:          Location of the cache files in the filesystem.
 300                        False to disable filesystem cache.
 301     noplaylist:        Download single video instead of a playlist if in doubt.
 302     age_limit:         An integer representing the user's age in years.
 303                        Unsuitable videos for the given age are skipped.
 304     min_views:         An integer representing the minimum view count the video
 305                        must have in order to not be skipped.
 306                        Videos without view count information are always
 307                        downloaded. None for no limit.
 308     max_views:         An integer representing the maximum view count.
 309                        Videos that are more popular than that are not
 310                        downloaded.
 311                        Videos without view count information are always
 312                        downloaded. None for no limit.
 313     download_archive:  File name of a file where all downloads are recorded.
 314                        Videos already present in the file are not downloaded
 315                        again.
 316     break_on_existing: Stop the download process after attempting to download a
 317                        file that is in the archive.
 318     break_on_reject:   Stop the download process when encountering a video that
 319                        has been filtered out.
 320     break_per_url:     Whether break_on_reject and break_on_existing
 321                        should act on each input URL as opposed to for the entire queue
 322     cookiefile:        File name where cookies should be read from and dumped to
 323     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 324                        name/pathfrom where cookies are loaded, and the name of the
 325                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 326     nocheckcertificate:  Do not verify SSL certificates
 327     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 328                        At the moment, this is only supported by YouTube.
 329     proxy:             URL of the proxy server to use
 330     geo_verification_proxy:  URL of the proxy to use for IP address verification
 331                        on geo-restricted sites.
 332     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 333     bidi_workaround:   Work around buggy terminals without bidirectional text
 334                        support, using fridibi
 335     debug_printtraffic:Print out sent and received HTTP traffic
 336     include_ads:       Download ads as well (deprecated)
 337     default_search:    Prepend this string if an input url is not valid.
 338                        'auto' for elaborate guessing
 339     encoding:          Use this encoding instead of the system-specified.
 340     extract_flat:      Do not resolve URLs, return the immediate result.
 341                        Pass in 'in_playlist' to only show this behavior for
 342                        playlist items.
 343     wait_for_video:    If given, wait for scheduled streams to become available.
 344                        The value should be a tuple containing the range
 345                        (min_secs, max_secs) to wait between retries
 346     postprocessors:    A list of dictionaries, each with an entry
 347                        * key:  The name of the postprocessor. See
 348                                yt_dlp/postprocessor/__init__.py for a list.
 349                        * when: When to run the postprocessor. Can be one of
 350                                pre_process|before_dl|post_process|after_move.
 351                                Assumed to be 'post_process' if not given
 352     post_hooks:        Deprecated - Register a custom postprocessor instead
 353                        A list of functions that get called as the final step
 354                        for each video file, after all postprocessors have been
 355                        called. The filename will be passed as the only argument.
 356     progress_hooks:    A list of functions that get called on download
 357                        progress, with a dictionary with the entries
 358                        * status: One of "downloading", "error", or "finished".
 359                                  Check this first and ignore unknown values.
 360                        * info_dict: The extracted info_dict
 361
 362                        If status is one of "downloading", or "finished", the
 363                        following properties may also be present:
 364                        * filename: The final filename (always present)
 365                        * tmpfilename: The filename we're currently writing to
 366                        * downloaded_bytes: Bytes on disk
 367                        * total_bytes: Size of the whole file, None if unknown
 368                        * total_bytes_estimate: Guess of the eventual file size,
 369                                                None if unavailable.
 370                        * elapsed: The number of seconds since download started.
 371                        * eta: The estimated time in seconds, None if unknown
 372                        * speed: The download speed in bytes/second, None if
 373                                 unknown
 374                        * fragment_index: The counter of the currently
 375                                          downloaded video fragment.
 376                        * fragment_count: The number of fragments (= individual
 377                                          files that will be merged)
 378
 379                        Progress hooks are guaranteed to be called at least once
 380                        (with status "finished") if the download is successful.
 381     postprocessor_hooks:  A list of functions that get called on postprocessing
 382                        progress, with a dictionary with the entries
 383                        * status: One of "started", "processing", or "finished".
 384                                  Check this first and ignore unknown values.
 385                        * postprocessor: Name of the postprocessor
 386                        * info_dict: The extracted info_dict
 387
 388                        Progress hooks are guaranteed to be called at least twice
 389                        (with status "started" and "finished") if the processing is successful.
 390     merge_output_format: Extension to use when merging formats.
 391     final_ext:         Expected final extension; used to detect when the file was
 392                        already downloaded and converted
 393     fixup:             Automatically correct known faults of the file.
 394                        One of:
 395                        - "never": do nothing
 396                        - "warn": only emit a warning
 397                        - "detect_or_warn": check whether we can do anything
 398                                            about it, warn otherwise (default)
 399     source_address:    Client-side IP address to bind to.
 400     call_home:         Boolean, true iff we are allowed to contact the
 401                        yt-dlp servers for debugging. (BROKEN)
 402     sleep_interval_requests: Number of seconds to sleep between requests
 403                        during extraction
 404     sleep_interval:    Number of seconds to sleep before each download when
 405                        used alone or a lower bound of a range for randomized
 406                        sleep before each download (minimum possible number
 407                        of seconds to sleep) when used along with
 408                        max_sleep_interval.
 409     max_sleep_interval:Upper bound of a range for randomized sleep before each
 410                        download (maximum possible number of seconds to sleep).
 411                        Must only be used along with sleep_interval.
 412                        Actual sleep time will be a random float from range
 413                        [sleep_interval; max_sleep_interval].
 414     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 415     listformats:       Print an overview of available video formats and exit.
 416     list_thumbnails:   Print a table of all thumbnails and exit.
 417     match_filter:      A function that gets called with the info_dict of
 418                        every video.
 419                        If it returns a message, the video is ignored.
 420                        If it returns None, the video is downloaded.
 421                        match_filter_func in utils.py is one example for this.
 422     no_color:          Do not emit color codes in output.
 423     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 424                        HTTP header
 425     geo_bypass_country:
 426                        Two-letter ISO 3166-2 country code that will be used for
 427                        explicit geographic restriction bypassing via faking
 428                        X-Forwarded-For HTTP header
 429     geo_bypass_ip_block:
 430                        IP range in CIDR notation that will be used similarly to
 431                        geo_bypass_country
 432
 433     The following options determine which downloader is picked:
 434     external_downloader: A dictionary of protocol keys and the executable of the
 435                        external downloader to use for it. The allowed protocols
 436                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 437                        Set the value to 'native' to use the native downloader
 438     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 439                        or {'m3u8': 'ffmpeg'} instead.
 440                        Use the native HLS downloader instead of ffmpeg/avconv
 441                        if True, otherwise use ffmpeg/avconv if False, otherwise
 442                        use downloader suggested by extractor if None.
 443     compat_opts:       Compatibility options. See "Differences in default behavior".
 444                        The following options do not work when used through the API:
 445                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 446                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 447                        Refer __init__.py for their implementation
 448     progress_template: Dictionary of templates for progress outputs.
 449                        Allowed keys are 'download', 'postprocess',
 450                        'download-title' (console title) and 'postprocess-title'.
 451                        The template is mapped on a dictionary with keys 'progress' and 'info'
 452
 453     The following parameters are not used by YoutubeDL itself, they are used by
 454     the downloader (see yt_dlp/downloader/common.py):
 455     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 456     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 457     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 458     external_downloader_args, concurrent_fragment_downloads.
 459
 460     The following options are used by the post processors:
 461     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 462                        otherwise prefer ffmpeg. (avconv support is deprecated)
 463     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 464                        to the binary or its containing directory.
 465     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 466                        and a list of additional command-line arguments for the
 467                        postprocessor/executable. The dict can also have "PP+EXE" keys
 468                        which are used when the given exe is used by the given PP.
 469                        Use 'default' as the name for arguments to passed to all PP
 470                        For compatibility with youtube-dl, a single list of args
 471                        can also be used
 472
 473     The following options are used by the extractors:
 474     extractor_retries: Number of times to retry for known errors
 475     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 476     hls_split_discontinuity: Split HLS playlists to different formats at
 477                        discontinuities such as ad breaks (default: False)
 478     extractor_args:    A dictionary of arguments to be passed to the extractors.
 479                        See "EXTRACTOR ARGUMENTS" for details.
 480                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 481     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 482                        If True (default), DASH manifests and related
 483                        data will be downloaded and processed by extractor.
 484                        You can reduce network I/O by disabling it if you don't
 485                        care about DASH. (only for youtube)
 486     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 487                        If True (default), HLS manifests and related
 488                        data will be downloaded and processed by extractor.
 489                        You can reduce network I/O by disabling it if you don't
 490                        care about HLS. (only for youtube)
 491     """
 492
 493     _NUMERIC_FIELDS = set((
 494         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 495         'timestamp', 'release_timestamp',
 496         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 497         'average_rating', 'comment_count', 'age_limit',
 498         'start_time', 'end_time',
 499         'chapter_number', 'season_number', 'episode_number',
 500         'track_number', 'disc_number', 'release_year',
 501     ))
 502
 503     _format_selection_exts = {
 504         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 505         'video': {'mp4', 'flv', 'webm', '3gp'},
 506         'storyboards': {'mhtml'},
 507     }
 508
 509     params = None
 510     _ies = {}
 511     _pps = {k: [] for k in POSTPROCESS_WHEN}
 512     _printed_messages = set()
 513     _first_webpage_request = True
 514     _download_retcode = None
 515     _num_downloads = None
 516     _playlist_level = 0
 517     _playlist_urls = set()
 518     _screen_file = None
 519
 520     def __init__(self, params=None, auto_init=True):
 521         """Create a FileDownloader object with the given options.
 522         @param auto_init    Whether to load the default extractors and print header (if verbose).
 523                             Set to 'no_verbose_header' to not print the header
 524         """
 525         if params is None:
 526             params = {}
 527         self._ies = {}
 528         self._ies_instances = {}
 529         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 530         self._printed_messages = set()
 531         self._first_webpage_request = True
 532         self._post_hooks = []
 533         self._progress_hooks = []
 534         self._postprocessor_hooks = []
 535         self._download_retcode = 0
 536         self._num_downloads = 0
 537         self._num_videos = 0
 538         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 539         self._err_file = sys.stderr
 540         self.params = params
 541         self.cache = Cache(self)
 542
 543         windows_enable_vt_mode()
 544         self._allow_colors = {
 545             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 546             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 547         }
 548
 549         if sys.version_info < (3, 6):
 550             self.report_warning(
 551                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 552
 553         if self.params.get('allow_unplayable_formats'):
 554             self.report_warning(
 555                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 556                 'This is a developer option intended for debugging. \n'
 557                 '         If you experience any issues while using this option, '
 558                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 559
 560         def check_deprecated(param, option, suggestion):
 561             if self.params.get(param) is not None:
 562                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 563                 return True
 564             return False
 565
 566         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 567             if self.params.get('geo_verification_proxy') is None:
 568                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 569
 570         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 571         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 572         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 573
 574         for msg in self.params.get('_warnings', []):
 575             self.report_warning(msg)
 576         for msg in self.params.get('_deprecation_warnings', []):
 577             self.deprecation_warning(msg)
 578
 579         if 'list-formats' in self.params.get('compat_opts', []):
 580             self.params['listformats_table'] = False
 581
 582         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 583             # nooverwrites was unnecessarily changed to overwrites
 584             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 585             # This ensures compatibility with both keys
 586             self.params['overwrites'] = not self.params['nooverwrites']
 587         elif self.params.get('overwrites') is None:
 588             self.params.pop('overwrites', None)
 589         else:
 590             self.params['nooverwrites'] = not self.params['overwrites']
 591
 592         # Compatibility with older syntax
 593         params.setdefault('forceprint', {})
 594         if not isinstance(params['forceprint'], dict):
 595             params['forceprint'] = {'video': params['forceprint']}
 596
 597         if params.get('bidi_workaround', False):
 598             try:
 599                 import pty
 600                 master, slave = pty.openpty()
 601                 width = compat_get_terminal_size().columns
 602                 if width is None:
 603                     width_args = []
 604                 else:
 605                     width_args = ['-w', str(width)]
 606                 sp_kwargs = dict(
 607                     stdin=subprocess.PIPE,
 608                     stdout=slave,
 609                     stderr=self._err_file)
 610                 try:
 611                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 612                 except OSError:
 613                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 614                 self._output_channel = os.fdopen(master, 'rb')
 615             except OSError as ose:
 616                 if ose.errno == errno.ENOENT:
 617                     self.report_warning(
 618                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 619                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 620                 else:
 621                     raise
 622
 623         if (sys.platform != 'win32'
 624                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 625                 and not params.get('restrictfilenames', False)):
 626             # Unicode filesystem API will throw errors (#1474, #13027)
 627             self.report_warning(
 628                 'Assuming --restrict-filenames since file system encoding '
 629                 'cannot encode all characters. '
 630                 'Set the LC_ALL environment variable to fix this.')
 631             self.params['restrictfilenames'] = True
 632
 633         self.outtmpl_dict = self.parse_outtmpl()
 634
 635         # Creating format selector here allows us to catch syntax errors before the extraction
 636         self.format_selector = (
 637             self.params.get('format') if self.params.get('format') in (None, '-')
 638             else self.params['format'] if callable(self.params['format'])
 639             else self.build_format_selector(self.params['format']))
 640
 641         self._setup_opener()
 642
 643         if auto_init:
 644             if auto_init != 'no_verbose_header':
 645                 self.print_debug_header()
 646             self.add_default_info_extractors()
 647
 648         hooks = {
 649             'post_hooks': self.add_post_hook,
 650             'progress_hooks': self.add_progress_hook,
 651             'postprocessor_hooks': self.add_postprocessor_hook,
 652         }
 653         for opt, fn in hooks.items():
 654             for ph in self.params.get(opt, []):
 655                 fn(ph)
 656
 657         for pp_def_raw in self.params.get('postprocessors', []):
 658             pp_def = dict(pp_def_raw)
 659             when = pp_def.pop('when', 'post_process')
 660             self.add_post_processor(
 661                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 662                 when=when)
 663
 664         register_socks_protocols()
 665
 666         def preload_download_archive(fn):
 667             """Preload the archive, if any is specified"""
 668             if fn is None:
 669                 return False
 670             self.write_debug(f'Loading archive file {fn!r}')
 671             try:
 672                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 673                     for line in archive_file:
 674                         self.archive.add(line.strip())
 675             except IOError as ioe:
 676                 if ioe.errno != errno.ENOENT:
 677                     raise
 678                 return False
 679             return True
 680
 681         self.archive = set()
 682         preload_download_archive(self.params.get('download_archive'))
 683
 684     def warn_if_short_id(self, argv):
 685         # short YouTube ID starting with dash?
 686         idxs = [
 687             i for i, a in enumerate(argv)
 688             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 689         if idxs:
 690             correct_argv = (
 691                 ['yt-dlp']
 692                 + [a for i, a in enumerate(argv) if i not in idxs]
 693                 + ['--'] + [argv[i] for i in idxs]
 694             )
 695             self.report_warning(
 696                 'Long argument string detected. '
 697                 'Use -- to separate parameters and URLs, like this:\n%s' %
 698                 args_to_str(correct_argv))
 699
 700     def add_info_extractor(self, ie):
 701         """Add an InfoExtractor object to the end of the list."""
 702         ie_key = ie.ie_key()
 703         self._ies[ie_key] = ie
 704         if not isinstance(ie, type):
 705             self._ies_instances[ie_key] = ie
 706             ie.set_downloader(self)
 707
 708     def _get_info_extractor_class(self, ie_key):
 709         ie = self._ies.get(ie_key)
 710         if ie is None:
 711             ie = get_info_extractor(ie_key)
 712             self.add_info_extractor(ie)
 713         return ie
 714
 715     def get_info_extractor(self, ie_key):
 716         """
 717         Get an instance of an IE with name ie_key, it will try to get one from
 718         the _ies list, if there's no instance it will create a new one and add
 719         it to the extractor list.
 720         """
 721         ie = self._ies_instances.get(ie_key)
 722         if ie is None:
 723             ie = get_info_extractor(ie_key)()
 724             self.add_info_extractor(ie)
 725         return ie
 726
 727     def add_default_info_extractors(self):
 728         """
 729         Add the InfoExtractors returned by gen_extractors to the end of the list
 730         """
 731         for ie in gen_extractor_classes():
 732             self.add_info_extractor(ie)
 733
 734     def add_post_processor(self, pp, when='post_process'):
 735         """Add a PostProcessor object to the end of the chain."""
 736         self._pps[when].append(pp)
 737         pp.set_downloader(self)
 738
 739     def add_post_hook(self, ph):
 740         """Add the post hook"""
 741         self._post_hooks.append(ph)
 742
 743     def add_progress_hook(self, ph):
 744         """Add the download progress hook"""
 745         self._progress_hooks.append(ph)
 746
 747     def add_postprocessor_hook(self, ph):
 748         """Add the postprocessing progress hook"""
 749         self._postprocessor_hooks.append(ph)
 750         for pps in self._pps.values():
 751             for pp in pps:
 752                 pp.add_progress_hook(ph)
 753
 754     def _bidi_workaround(self, message):
 755         if not hasattr(self, '_output_channel'):
 756             return message
 757
 758         assert hasattr(self, '_output_process')
 759         assert isinstance(message, compat_str)
 760         line_count = message.count('\n') + 1
 761         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 762         self._output_process.stdin.flush()
 763         res = ''.join(self._output_channel.readline().decode('utf-8')
 764                       for _ in range(line_count))
 765         return res[:-len('\n')]
 766
 767     def _write_string(self, message, out=None, only_once=False):
 768         if only_once:
 769             if message in self._printed_messages:
 770                 return
 771             self._printed_messages.add(message)
 772         write_string(message, out=out, encoding=self.params.get('encoding'))
 773
 774     def to_stdout(self, message, skip_eol=False, quiet=False):
 775         """Print message to stdout"""
 776         if self.params.get('logger'):
 777             self.params['logger'].debug(message)
 778         elif not quiet or self.params.get('verbose'):
 779             self._write_string(
 780                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 781                 self._err_file if quiet else self._screen_file)
 782
 783     def to_stderr(self, message, only_once=False):
 784         """Print message to stderr"""
 785         assert isinstance(message, compat_str)
 786         if self.params.get('logger'):
 787             self.params['logger'].error(message)
 788         else:
 789             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 790
 791     def to_console_title(self, message):
 792         if not self.params.get('consoletitle', False):
 793             return
 794         message = remove_terminal_sequences(message)
 795         if compat_os_name == 'nt':
 796             if ctypes.windll.kernel32.GetConsoleWindow():
 797                 # c_wchar_p() might not be necessary if `message` is
 798                 # already of type unicode()
 799                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 800         elif 'TERM' in os.environ:
 801             self._write_string('\033]0;%s\007' % message, self._screen_file)
 802
 803     def save_console_title(self):
 804         if not self.params.get('consoletitle', False):
 805             return
 806         if self.params.get('simulate'):
 807             return
 808         if compat_os_name != 'nt' and 'TERM' in os.environ:
 809             # Save the title on stack
 810             self._write_string('\033[22;0t', self._screen_file)
 811
 812     def restore_console_title(self):
 813         if not self.params.get('consoletitle', False):
 814             return
 815         if self.params.get('simulate'):
 816             return
 817         if compat_os_name != 'nt' and 'TERM' in os.environ:
 818             # Restore the title from stack
 819             self._write_string('\033[23;0t', self._screen_file)
 820
 821     def __enter__(self):
 822         self.save_console_title()
 823         return self
 824
 825     def __exit__(self, *args):
 826         self.restore_console_title()
 827
 828         if self.params.get('cookiefile') is not None:
 829             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 830
 831     def trouble(self, message=None, tb=None, is_error=True):
 832         """Determine action to take when a download problem appears.
 833
 834         Depending on if the downloader has been configured to ignore
 835         download errors or not, this method may throw an exception or
 836         not when errors are found, after printing the message.
 837
 838         @param tb          If given, is additional traceback information
 839         @param is_error    Whether to raise error according to ignorerrors
 840         """
 841         if message is not None:
 842             self.to_stderr(message)
 843         if self.params.get('verbose'):
 844             if tb is None:
 845                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 846                     tb = ''
 847                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 848                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 849                     tb += encode_compat_str(traceback.format_exc())
 850                 else:
 851                     tb_data = traceback.format_list(traceback.extract_stack())
 852                     tb = ''.join(tb_data)
 853             if tb:
 854                 self.to_stderr(tb)
 855         if not is_error:
 856             return
 857         if not self.params.get('ignoreerrors'):
 858             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 859                 exc_info = sys.exc_info()[1].exc_info
 860             else:
 861                 exc_info = sys.exc_info()
 862             raise DownloadError(message, exc_info)
 863         self._download_retcode = 1
 864
 865     def to_screen(self, message, skip_eol=False):
 866         """Print message to stdout if not in quiet mode"""
 867         self.to_stdout(
 868             message, skip_eol, quiet=self.params.get('quiet', False))
 869
 870     class Styles(Enum):
 871         HEADERS = 'yellow'
 872         EMPHASIS = 'light blue'
 873         ID = 'green'
 874         DELIM = 'blue'
 875         ERROR = 'red'
 876         WARNING = 'yellow'
 877         SUPPRESS = 'light black'
 878
 879     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 880         if test_encoding:
 881             original_text = text
 882             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 883             text = text.encode(encoding, 'ignore').decode(encoding)
 884             if fallback is not None and text != original_text:
 885                 text = fallback
 886         if isinstance(f, self.Styles):
 887             f = f.value
 888         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 889
 890     def _format_screen(self, *args, **kwargs):
 891         return self._format_text(
 892             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 893
 894     def _format_err(self, *args, **kwargs):
 895         return self._format_text(
 896             self._err_file, self._allow_colors['err'], *args, **kwargs)
 897
 898     def report_warning(self, message, only_once=False):
 899         '''
 900         Print the message to stderr, it will be prefixed with 'WARNING:'
 901         If stderr is a tty file the 'WARNING:' will be colored
 902         '''
 903         if self.params.get('logger') is not None:
 904             self.params['logger'].warning(message)
 905         else:
 906             if self.params.get('no_warnings'):
 907                 return
 908             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 909
 910     def deprecation_warning(self, message):
 911         if self.params.get('logger') is not None:
 912             self.params['logger'].warning('DeprecationWarning: {message}')
 913         else:
 914             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 915
 916     def report_error(self, message, *args, **kwargs):
 917         '''
 918         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 919         in red if stderr is a tty file.
 920         '''
 921         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 922
 923     def write_debug(self, message, only_once=False):
 924         '''Log debug message or Print message to stderr'''
 925         if not self.params.get('verbose', False):
 926             return
 927         message = '[debug] %s' % message
 928         if self.params.get('logger'):
 929             self.params['logger'].debug(message)
 930         else:
 931             self.to_stderr(message, only_once)
 932
 933     def report_file_already_downloaded(self, file_name):
 934         """Report file has already been fully downloaded."""
 935         try:
 936             self.to_screen('[download] %s has already been downloaded' % file_name)
 937         except UnicodeEncodeError:
 938             self.to_screen('[download] The file has already been downloaded')
 939
 940     def report_file_delete(self, file_name):
 941         """Report that existing file will be deleted."""
 942         try:
 943             self.to_screen('Deleting existing file %s' % file_name)
 944         except UnicodeEncodeError:
 945             self.to_screen('Deleting existing file')
 946
 947     def raise_no_formats(self, info, forced=False):
 948         has_drm = info.get('__has_drm')
 949         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 950         expected = self.params.get('ignore_no_formats_error')
 951         if forced or not expected:
 952             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 953                                  expected=has_drm or expected)
 954         else:
 955             self.report_warning(msg)
 956
 957     def parse_outtmpl(self):
 958         outtmpl_dict = self.params.get('outtmpl', {})
 959         if not isinstance(outtmpl_dict, dict):
 960             outtmpl_dict = {'default': outtmpl_dict}
 961         # Remove spaces in the default template
 962         if self.params.get('restrictfilenames'):
 963             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 964         else:
 965             sanitize = lambda x: x
 966         outtmpl_dict.update({
 967             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 968             if outtmpl_dict.get(k) is None})
 969         for key, val in outtmpl_dict.items():
 970             if isinstance(val, bytes):
 971                 self.report_warning(
 972                     'Parameter outtmpl is bytes, but should be a unicode string. '
 973                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 974         return outtmpl_dict
 975
 976     def get_output_path(self, dir_type='', filename=None):
 977         paths = self.params.get('paths', {})
 978         assert isinstance(paths, dict)
 979         path = os.path.join(
 980             expand_path(paths.get('home', '').strip()),
 981             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 982             filename or '')
 983
 984         # Temporary fix for #4787
 985         # 'Treat' all problem characters by passing filename through preferredencoding
 986         # to workaround encoding issues with subprocess on python2 @ Windows
 987         if sys.version_info < (3, 0) and sys.platform == 'win32':
 988             path = encodeFilename(path, True).decode(preferredencoding())
 989         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 990
 991     @staticmethod
 992     def _outtmpl_expandpath(outtmpl):
 993         # expand_path translates '%%' into '%' and '$$' into '$'
 994         # correspondingly that is not what we want since we need to keep
 995         # '%%' intact for template dict substitution step. Working around
 996         # with boundary-alike separator hack.
 997         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 998         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 999
1000         # outtmpl should be expand_path'ed before template dict substitution
1001         # because meta fields may contain env variables we don't want to
1002         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1003         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1004         return expand_path(outtmpl).replace(sep, '')
1005
1006     @staticmethod
1007     def escape_outtmpl(outtmpl):
1008         ''' Escape any remaining strings like %s, %abc% etc. '''
1009         return re.sub(
1010             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1011             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1012             outtmpl)
1013
1014     @classmethod
1015     def validate_outtmpl(cls, outtmpl):
1016         ''' @return None or Exception object '''
1017         outtmpl = re.sub(
1018             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1019             lambda mobj: f'{mobj.group(0)[:-1]}s',
1020             cls._outtmpl_expandpath(outtmpl))
1021         try:
1022             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1023             return None
1024         except ValueError as err:
1025             return err
1026
1027     @staticmethod
1028     def _copy_infodict(info_dict):
1029         info_dict = dict(info_dict)
1030         for key in ('__original_infodict', '__postprocessors'):
1031             info_dict.pop(key, None)
1032         return info_dict
1033
1034     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1035         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1036         @param sanitize    Whether to sanitize the output as a filename.
1037                            For backward compatibility, a function can also be passed
1038         """
1039
1040         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1041
1042         info_dict = self._copy_infodict(info_dict)
1043         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1044             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1045             if info_dict.get('duration', None) is not None
1046             else None)
1047         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1048         info_dict['video_autonumber'] = self._num_videos
1049         if info_dict.get('resolution') is None:
1050             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1051
1052         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1053         # of %(field)s to %(field)0Nd for backward compatibility
1054         field_size_compat_map = {
1055             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1056             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1057             'autonumber': self.params.get('autonumber_size') or 5,
1058         }
1059
1060         TMPL_DICT = {}
1061         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1062         MATH_FUNCTIONS = {
1063             '+': float.__add__,
1064             '-': float.__sub__,
1065         }
1066         # Field is of the form key1.key2...
1067         # where keys (except first) can be string, int or slice
1068         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1069         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1070         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1071         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1072             (?P<negate>-)?
1073             (?P<fields>{field})
1074             (?P<maths>(?:{math_op}{math_field})*)
1075             (?:>(?P<strf_format>.+?))?
1076             (?P<alternate>(?<!\\),[^|&)]+)?
1077             (?:&(?P<replacement>.*?))?
1078             (?:\|(?P<default>.*?))?
1079             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1080
1081         def _traverse_infodict(k):
1082             k = k.split('.')
1083             if k[0] == '':
1084                 k.pop(0)
1085             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1086
1087         def get_value(mdict):
1088             # Object traversal
1089             value = _traverse_infodict(mdict['fields'])
1090             # Negative
1091             if mdict['negate']:
1092                 value = float_or_none(value)
1093                 if value is not None:
1094                     value *= -1
1095             # Do maths
1096             offset_key = mdict['maths']
1097             if offset_key:
1098                 value = float_or_none(value)
1099                 operator = None
1100                 while offset_key:
1101                     item = re.match(
1102                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1103                         offset_key).group(0)
1104                     offset_key = offset_key[len(item):]
1105                     if operator is None:
1106                         operator = MATH_FUNCTIONS[item]
1107                         continue
1108                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1109                     offset = float_or_none(item)
1110                     if offset is None:
1111                         offset = float_or_none(_traverse_infodict(item))
1112                     try:
1113                         value = operator(value, multiplier * offset)
1114                     except (TypeError, ZeroDivisionError):
1115                         return None
1116                     operator = None
1117             # Datetime formatting
1118             if mdict['strf_format']:
1119                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1120
1121             return value
1122
1123         na = self.params.get('outtmpl_na_placeholder', 'NA')
1124
1125         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1126             return sanitize_filename(str(value), restricted=restricted,
1127                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1128
1129         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1130         sanitize = bool(sanitize)
1131
1132         def _dumpjson_default(obj):
1133             if isinstance(obj, (set, LazyList)):
1134                 return list(obj)
1135             return repr(obj)
1136
1137         def create_key(outer_mobj):
1138             if not outer_mobj.group('has_key'):
1139                 return outer_mobj.group(0)
1140             key = outer_mobj.group('key')
1141             mobj = re.match(INTERNAL_FORMAT_RE, key)
1142             initial_field = mobj.group('fields') if mobj else ''
1143             value, replacement, default = None, None, na
1144             while mobj:
1145                 mobj = mobj.groupdict()
1146                 default = mobj['default'] if mobj['default'] is not None else default
1147                 value = get_value(mobj)
1148                 replacement = mobj['replacement']
1149                 if value is None and mobj['alternate']:
1150                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1151                 else:
1152                     break
1153
1154             fmt = outer_mobj.group('format')
1155             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1156                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1157
1158             value = default if value is None else value if replacement is None else replacement
1159
1160             flags = outer_mobj.group('conversion') or ''
1161             str_fmt = f'{fmt[:-1]}s'
1162             if fmt[-1] == 'l':  # list
1163                 delim = '\n' if '#' in flags else ', '
1164                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1165             elif fmt[-1] == 'j':  # json
1166                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1167             elif fmt[-1] == 'q':  # quoted
1168                 value = map(str, variadic(value) if '#' in flags else [value])
1169                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1170             elif fmt[-1] == 'B':  # bytes
1171                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1172                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1173             elif fmt[-1] == 'U':  # unicode normalized
1174                 value, fmt = unicodedata.normalize(
1175                     # "+" = compatibility equivalence, "#" = NFD
1176                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1177                     value), str_fmt
1178             elif fmt[-1] == 'D':  # decimal suffix
1179                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1180                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1181                                               factor=1024 if '#' in flags else 1000)
1182             elif fmt[-1] == 'S':  # filename sanitization
1183                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1184             elif fmt[-1] == 'c':
1185                 if value:
1186                     value = str(value)[0]
1187                 else:
1188                     fmt = str_fmt
1189             elif fmt[-1] not in 'rs':  # numeric
1190                 value = float_or_none(value)
1191                 if value is None:
1192                     value, fmt = default, 's'
1193
1194             if sanitize:
1195                 if fmt[-1] == 'r':
1196                     # If value is an object, sanitize might convert it to a string
1197                     # So we convert it to repr first
1198                     value, fmt = repr(value), str_fmt
1199                 if fmt[-1] in 'csr':
1200                     value = sanitizer(initial_field, value)
1201
1202             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1203             TMPL_DICT[key] = value
1204             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1205
1206         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1207
1208     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1209         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1210         return self.escape_outtmpl(outtmpl) % info_dict
1211
1212     def _prepare_filename(self, info_dict, tmpl_type='default'):
1213         try:
1214             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1215             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1216
1217             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1218             if filename and force_ext is not None:
1219                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1220
1221             # https://github.com/blackjack4494/youtube-dlc/issues/85
1222             trim_file_name = self.params.get('trim_file_name', False)
1223             if trim_file_name:
1224                 no_ext, *ext = filename.rsplit('.', 2)
1225                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1226
1227             return filename
1228         except ValueError as err:
1229             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1230             return None
1231
1232     def prepare_filename(self, info_dict, dir_type='', warn=False):
1233         """Generate the output filename."""
1234
1235         filename = self._prepare_filename(info_dict, dir_type or 'default')
1236         if not filename and dir_type not in ('', 'temp'):
1237             return ''
1238
1239         if warn:
1240             if not self.params.get('paths'):
1241                 pass
1242             elif filename == '-':
1243                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1244             elif os.path.isabs(filename):
1245                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1246         if filename == '-' or not filename:
1247             return filename
1248
1249         return self.get_output_path(dir_type, filename)
1250
1251     def _match_entry(self, info_dict, incomplete=False, silent=False):
1252         """ Returns None if the file should be downloaded """
1253
1254         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1255
1256         def check_filter():
1257             if 'title' in info_dict:
1258                 # This can happen when we're just evaluating the playlist
1259                 title = info_dict['title']
1260                 matchtitle = self.params.get('matchtitle', False)
1261                 if matchtitle:
1262                     if not re.search(matchtitle, title, re.IGNORECASE):
1263                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1264                 rejecttitle = self.params.get('rejecttitle', False)
1265                 if rejecttitle:
1266                     if re.search(rejecttitle, title, re.IGNORECASE):
1267                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1268             date = info_dict.get('upload_date')
1269             if date is not None:
1270                 dateRange = self.params.get('daterange', DateRange())
1271                 if date not in dateRange:
1272                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1273             view_count = info_dict.get('view_count')
1274             if view_count is not None:
1275                 min_views = self.params.get('min_views')
1276                 if min_views is not None and view_count < min_views:
1277                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1278                 max_views = self.params.get('max_views')
1279                 if max_views is not None and view_count > max_views:
1280                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1281             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1282                 return 'Skipping "%s" because it is age restricted' % video_title
1283
1284             match_filter = self.params.get('match_filter')
1285             if match_filter is not None:
1286                 try:
1287                     ret = match_filter(info_dict, incomplete=incomplete)
1288                 except TypeError:
1289                     # For backward compatibility
1290                     ret = None if incomplete else match_filter(info_dict)
1291                 if ret is not None:
1292                     return ret
1293             return None
1294
1295         if self.in_download_archive(info_dict):
1296             reason = '%s has already been recorded in the archive' % video_title
1297             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1298         else:
1299             reason = check_filter()
1300             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1301         if reason is not None:
1302             if not silent:
1303                 self.to_screen('[download] ' + reason)
1304             if self.params.get(break_opt, False):
1305                 raise break_err()
1306         return reason
1307
1308     @staticmethod
1309     def add_extra_info(info_dict, extra_info):
1310         '''Set the keys from extra_info in info dict if they are missing'''
1311         for key, value in extra_info.items():
1312             info_dict.setdefault(key, value)
1313
1314     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1315                      process=True, force_generic_extractor=False):
1316         """
1317         Return a list with a dictionary for each video extracted.
1318
1319         Arguments:
1320         url -- URL to extract
1321
1322         Keyword arguments:
1323         download -- whether to download videos during extraction
1324         ie_key -- extractor key hint
1325         extra_info -- dictionary containing the extra values to add to each result
1326         process -- whether to resolve all unresolved references (URLs, playlist items),
1327             must be True for download to work.
1328         force_generic_extractor -- force using the generic extractor
1329         """
1330
1331         if extra_info is None:
1332             extra_info = {}
1333
1334         if not ie_key and force_generic_extractor:
1335             ie_key = 'Generic'
1336
1337         if ie_key:
1338             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1339         else:
1340             ies = self._ies
1341
1342         for ie_key, ie in ies.items():
1343             if not ie.suitable(url):
1344                 continue
1345
1346             if not ie.working():
1347                 self.report_warning('The program functionality for this site has been marked as broken, '
1348                                     'and will probably not work.')
1349
1350             temp_id = ie.get_temp_id(url)
1351             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1352                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1353                 if self.params.get('break_on_existing', False):
1354                     raise ExistingVideoReached()
1355                 break
1356             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1357         else:
1358             self.report_error('no suitable InfoExtractor for URL %s' % url)
1359
1360     def __handle_extraction_exceptions(func):
1361         @functools.wraps(func)
1362         def wrapper(self, *args, **kwargs):
1363             while True:
1364                 try:
1365                     return func(self, *args, **kwargs)
1366                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1367                     raise
1368                 except ReExtractInfo as e:
1369                     if e.expected:
1370                         self.to_screen(f'{e}; Re-extracting data')
1371                     else:
1372                         self.to_stderr('\r')
1373                         self.report_warning(f'{e}; Re-extracting data')
1374                     continue
1375                 except GeoRestrictedError as e:
1376                     msg = e.msg
1377                     if e.countries:
1378                         msg += '\nThis video is available in %s.' % ', '.join(
1379                             map(ISO3166Utils.short2full, e.countries))
1380                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1381                     self.report_error(msg)
1382                 except ExtractorError as e:  # An error we somewhat expected
1383                     self.report_error(str(e), e.format_traceback())
1384                 except Exception as e:
1385                     if self.params.get('ignoreerrors'):
1386                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1387                     else:
1388                         raise
1389                 break
1390         return wrapper
1391
1392     def _wait_for_video(self, ie_result):
1393         if (not self.params.get('wait_for_video')
1394                 or ie_result.get('_type', 'video') != 'video'
1395                 or ie_result.get('formats') or ie_result.get('url')):
1396             return
1397
1398         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1399         last_msg = ''
1400
1401         def progress(msg):
1402             nonlocal last_msg
1403             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1404             last_msg = msg
1405
1406         min_wait, max_wait = self.params.get('wait_for_video')
1407         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1408         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1409             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1410             self.report_warning('Release time of video is not known')
1411         elif (diff or 0) <= 0:
1412             self.report_warning('Video should already be available according to extracted info')
1413         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1414         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1415
1416         wait_till = time.time() + diff
1417         try:
1418             while True:
1419                 diff = wait_till - time.time()
1420                 if diff <= 0:
1421                     progress('')
1422                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1423                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1424                 time.sleep(1)
1425         except KeyboardInterrupt:
1426             progress('')
1427             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1428         except BaseException as e:
1429             if not isinstance(e, ReExtractInfo):
1430                 self.to_screen('')
1431             raise
1432
1433     @__handle_extraction_exceptions
1434     def __extract_info(self, url, ie, download, extra_info, process):
1435         ie_result = ie.extract(url)
1436         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1437             return
1438         if isinstance(ie_result, list):
1439             # Backwards compatibility: old IE result format
1440             ie_result = {
1441                 '_type': 'compat_list',
1442                 'entries': ie_result,
1443             }
1444         if extra_info.get('original_url'):
1445             ie_result.setdefault('original_url', extra_info['original_url'])
1446         self.add_default_extra_info(ie_result, ie, url)
1447         if process:
1448             self._wait_for_video(ie_result)
1449             return self.process_ie_result(ie_result, download, extra_info)
1450         else:
1451             return ie_result
1452
1453     def add_default_extra_info(self, ie_result, ie, url):
1454         if url is not None:
1455             self.add_extra_info(ie_result, {
1456                 'webpage_url': url,
1457                 'original_url': url,
1458                 'webpage_url_basename': url_basename(url),
1459                 'webpage_url_domain': get_domain(url),
1460             })
1461         if ie is not None:
1462             self.add_extra_info(ie_result, {
1463                 'extractor': ie.IE_NAME,
1464                 'extractor_key': ie.ie_key(),
1465             })
1466
1467     def process_ie_result(self, ie_result, download=True, extra_info=None):
1468         """
1469         Take the result of the ie(may be modified) and resolve all unresolved
1470         references (URLs, playlist items).
1471
1472         It will also download the videos if 'download'.
1473         Returns the resolved ie_result.
1474         """
1475         if extra_info is None:
1476             extra_info = {}
1477         result_type = ie_result.get('_type', 'video')
1478
1479         if result_type in ('url', 'url_transparent'):
1480             ie_result['url'] = sanitize_url(ie_result['url'])
1481             if ie_result.get('original_url'):
1482                 extra_info.setdefault('original_url', ie_result['original_url'])
1483
1484             extract_flat = self.params.get('extract_flat', False)
1485             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1486                     or extract_flat is True):
1487                 info_copy = ie_result.copy()
1488                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1489                 if ie and not ie_result.get('id'):
1490                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1491                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1492                 self.add_extra_info(info_copy, extra_info)
1493                 info_copy, _ = self.pre_process(info_copy)
1494                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1495                 if self.params.get('force_write_download_archive', False):
1496                     self.record_download_archive(info_copy)
1497                 return ie_result
1498
1499         if result_type == 'video':
1500             self.add_extra_info(ie_result, extra_info)
1501             ie_result = self.process_video_result(ie_result, download=download)
1502             additional_urls = (ie_result or {}).get('additional_urls')
1503             if additional_urls:
1504                 # TODO: Improve MetadataParserPP to allow setting a list
1505                 if isinstance(additional_urls, compat_str):
1506                     additional_urls = [additional_urls]
1507                 self.to_screen(
1508                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1509                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1510                 ie_result['additional_entries'] = [
1511                     self.extract_info(
1512                         url, download, extra_info=extra_info,
1513                         force_generic_extractor=self.params.get('force_generic_extractor'))
1514                     for url in additional_urls
1515                 ]
1516             return ie_result
1517         elif result_type == 'url':
1518             # We have to add extra_info to the results because it may be
1519             # contained in a playlist
1520             return self.extract_info(
1521                 ie_result['url'], download,
1522                 ie_key=ie_result.get('ie_key'),
1523                 extra_info=extra_info)
1524         elif result_type == 'url_transparent':
1525             # Use the information from the embedding page
1526             info = self.extract_info(
1527                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1528                 extra_info=extra_info, download=False, process=False)
1529
1530             # extract_info may return None when ignoreerrors is enabled and
1531             # extraction failed with an error, don't crash and return early
1532             # in this case
1533             if not info:
1534                 return info
1535
1536             force_properties = dict(
1537                 (k, v) for k, v in ie_result.items() if v is not None)
1538             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1539                 if f in force_properties:
1540                     del force_properties[f]
1541             new_result = info.copy()
1542             new_result.update(force_properties)
1543
1544             # Extracted info may not be a video result (i.e.
1545             # info.get('_type', 'video') != video) but rather an url or
1546             # url_transparent. In such cases outer metadata (from ie_result)
1547             # should be propagated to inner one (info). For this to happen
1548             # _type of info should be overridden with url_transparent. This
1549             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1550             if new_result.get('_type') == 'url':
1551                 new_result['_type'] = 'url_transparent'
1552
1553             return self.process_ie_result(
1554                 new_result, download=download, extra_info=extra_info)
1555         elif result_type in ('playlist', 'multi_video'):
1556             # Protect from infinite recursion due to recursively nested playlists
1557             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1558             webpage_url = ie_result['webpage_url']
1559             if webpage_url in self._playlist_urls:
1560                 self.to_screen(
1561                     '[download] Skipping already downloaded playlist: %s'
1562                     % ie_result.get('title') or ie_result.get('id'))
1563                 return
1564
1565             self._playlist_level += 1
1566             self._playlist_urls.add(webpage_url)
1567             self._sanitize_thumbnails(ie_result)
1568             try:
1569                 return self.__process_playlist(ie_result, download)
1570             finally:
1571                 self._playlist_level -= 1
1572                 if not self._playlist_level:
1573                     self._playlist_urls.clear()
1574         elif result_type == 'compat_list':
1575             self.report_warning(
1576                 'Extractor %s returned a compat_list result. '
1577                 'It needs to be updated.' % ie_result.get('extractor'))
1578
1579             def _fixup(r):
1580                 self.add_extra_info(r, {
1581                     'extractor': ie_result['extractor'],
1582                     'webpage_url': ie_result['webpage_url'],
1583                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1584                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1585                     'extractor_key': ie_result['extractor_key'],
1586                 })
1587                 return r
1588             ie_result['entries'] = [
1589                 self.process_ie_result(_fixup(r), download, extra_info)
1590                 for r in ie_result['entries']
1591             ]
1592             return ie_result
1593         else:
1594             raise Exception('Invalid result type: %s' % result_type)
1595
1596     def _ensure_dir_exists(self, path):
1597         return make_dir(path, self.report_error)
1598
1599     @staticmethod
1600     def _playlist_infodict(ie_result, **kwargs):
1601         return {
1602             **ie_result,
1603             'playlist': ie_result.get('title') or ie_result.get('id'),
1604             'playlist_id': ie_result.get('id'),
1605             'playlist_title': ie_result.get('title'),
1606             'playlist_uploader': ie_result.get('uploader'),
1607             'playlist_uploader_id': ie_result.get('uploader_id'),
1608             'playlist_index': 0,
1609             **kwargs,
1610         }
1611
1612     def __process_playlist(self, ie_result, download):
1613         # We process each entry in the playlist
1614         playlist = ie_result.get('title') or ie_result.get('id')
1615         self.to_screen('[download] Downloading playlist: %s' % playlist)
1616
1617         if 'entries' not in ie_result:
1618             raise EntryNotInPlaylist('There are no entries')
1619
1620         MissingEntry = object()
1621         incomplete_entries = bool(ie_result.get('requested_entries'))
1622         if incomplete_entries:
1623             def fill_missing_entries(entries, indices):
1624                 ret = [MissingEntry] * max(indices)
1625                 for i, entry in zip(indices, entries):
1626                     ret[i - 1] = entry
1627                 return ret
1628             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1629
1630         playlist_results = []
1631
1632         playliststart = self.params.get('playliststart', 1)
1633         playlistend = self.params.get('playlistend')
1634         # For backwards compatibility, interpret -1 as whole list
1635         if playlistend == -1:
1636             playlistend = None
1637
1638         playlistitems_str = self.params.get('playlist_items')
1639         playlistitems = None
1640         if playlistitems_str is not None:
1641             def iter_playlistitems(format):
1642                 for string_segment in format.split(','):
1643                     if '-' in string_segment:
1644                         start, end = string_segment.split('-')
1645                         for item in range(int(start), int(end) + 1):
1646                             yield int(item)
1647                     else:
1648                         yield int(string_segment)
1649             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1650
1651         ie_entries = ie_result['entries']
1652         if isinstance(ie_entries, list):
1653             playlist_count = len(ie_entries)
1654             msg = f'Collected {playlist_count} videos; downloading %d of them'
1655             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1656
1657             def get_entry(i):
1658                 return ie_entries[i - 1]
1659         else:
1660             msg = 'Downloading %d videos'
1661             if not isinstance(ie_entries, (PagedList, LazyList)):
1662                 ie_entries = LazyList(ie_entries)
1663
1664             def get_entry(i):
1665                 return YoutubeDL.__handle_extraction_exceptions(
1666                     lambda self, i: ie_entries[i - 1]
1667                 )(self, i)
1668
1669         entries, broken = [], False
1670         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1671         for i in items:
1672             if i == 0:
1673                 continue
1674             if playlistitems is None and playlistend is not None and playlistend < i:
1675                 break
1676             entry = None
1677             try:
1678                 entry = get_entry(i)
1679                 if entry is MissingEntry:
1680                     raise EntryNotInPlaylist()
1681             except (IndexError, EntryNotInPlaylist):
1682                 if incomplete_entries:
1683                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1684                 elif not playlistitems:
1685                     break
1686             entries.append(entry)
1687             try:
1688                 if entry is not None:
1689                     self._match_entry(entry, incomplete=True, silent=True)
1690             except (ExistingVideoReached, RejectedVideoReached):
1691                 broken = True
1692                 break
1693         ie_result['entries'] = entries
1694
1695         # Save playlist_index before re-ordering
1696         entries = [
1697             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1698             for i, entry in enumerate(entries, 1)
1699             if entry is not None]
1700         n_entries = len(entries)
1701
1702         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1703             ie_result['playlist_count'] = n_entries
1704
1705         if not playlistitems and (playliststart != 1 or playlistend):
1706             playlistitems = list(range(playliststart, playliststart + n_entries))
1707         ie_result['requested_entries'] = playlistitems
1708
1709         _infojson_written = False
1710         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1711             ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1712             _infojson_written = self._write_info_json(
1713                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1714             if _infojson_written is None:
1715                 return
1716             if self._write_description('playlist', ie_result,
1717                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1718                 return
1719             # TODO: This should be passed to ThumbnailsConvertor if necessary
1720             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1721
1722         if self.params.get('playlistreverse', False):
1723             entries = entries[::-1]
1724         if self.params.get('playlistrandom', False):
1725             random.shuffle(entries)
1726
1727         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1728
1729         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1730         failures = 0
1731         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1732         for i, entry_tuple in enumerate(entries, 1):
1733             playlist_index, entry = entry_tuple
1734             if 'playlist-index' in self.params.get('compat_opts', []):
1735                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1736             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1737             # This __x_forwarded_for_ip thing is a bit ugly but requires
1738             # minimal changes
1739             if x_forwarded_for:
1740                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1741             extra = {
1742                 'n_entries': n_entries,
1743                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1744                 'playlist_count': ie_result.get('playlist_count'),
1745                 'playlist_index': playlist_index,
1746                 'playlist_autonumber': i,
1747                 'playlist': playlist,
1748                 'playlist_id': ie_result.get('id'),
1749                 'playlist_title': ie_result.get('title'),
1750                 'playlist_uploader': ie_result.get('uploader'),
1751                 'playlist_uploader_id': ie_result.get('uploader_id'),
1752                 'extractor': ie_result['extractor'],
1753                 'webpage_url': ie_result['webpage_url'],
1754                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1755                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1756                 'extractor_key': ie_result['extractor_key'],
1757             }
1758
1759             if self._match_entry(entry, incomplete=True) is not None:
1760                 continue
1761
1762             entry_result = self.__process_iterable_entry(entry, download, extra)
1763             if not entry_result:
1764                 failures += 1
1765             if failures >= max_failures:
1766                 self.report_error(
1767                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1768                 break
1769             playlist_results.append(entry_result)
1770         ie_result['entries'] = playlist_results
1771
1772         # Write the updated info to json
1773         if _infojson_written and self._write_info_json(
1774                 'updated playlist', ie_result,
1775                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1776             return
1777
1778         ie_result = self.run_all_pps('playlist', ie_result)
1779         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1780         return ie_result
1781
1782     @__handle_extraction_exceptions
1783     def __process_iterable_entry(self, entry, download, extra_info):
1784         return self.process_ie_result(
1785             entry, download=download, extra_info=extra_info)
1786
1787     def _build_format_filter(self, filter_spec):
1788         " Returns a function to filter the formats according to the filter_spec "
1789
1790         OPERATORS = {
1791             '<': operator.lt,
1792             '<=': operator.le,
1793             '>': operator.gt,
1794             '>=': operator.ge,
1795             '=': operator.eq,
1796             '!=': operator.ne,
1797         }
1798         operator_rex = re.compile(r'''(?x)\s*
1799             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1800             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1801             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1802             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1803         m = operator_rex.fullmatch(filter_spec)
1804         if m:
1805             try:
1806                 comparison_value = int(m.group('value'))
1807             except ValueError:
1808                 comparison_value = parse_filesize(m.group('value'))
1809                 if comparison_value is None:
1810                     comparison_value = parse_filesize(m.group('value') + 'B')
1811                 if comparison_value is None:
1812                     raise ValueError(
1813                         'Invalid value %r in format specification %r' % (
1814                             m.group('value'), filter_spec))
1815             op = OPERATORS[m.group('op')]
1816
1817         if not m:
1818             STR_OPERATORS = {
1819                 '=': operator.eq,
1820                 '^=': lambda attr, value: attr.startswith(value),
1821                 '$=': lambda attr, value: attr.endswith(value),
1822                 '*=': lambda attr, value: value in attr,
1823             }
1824             str_operator_rex = re.compile(r'''(?x)\s*
1825                 (?P<key>[a-zA-Z0-9._-]+)\s*
1826                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1827                 (?P<value>[a-zA-Z0-9._-]+)\s*
1828                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1829             m = str_operator_rex.fullmatch(filter_spec)
1830             if m:
1831                 comparison_value = m.group('value')
1832                 str_op = STR_OPERATORS[m.group('op')]
1833                 if m.group('negation'):
1834                     op = lambda attr, value: not str_op(attr, value)
1835                 else:
1836                     op = str_op
1837
1838         if not m:
1839             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1840
1841         def _filter(f):
1842             actual_value = f.get(m.group('key'))
1843             if actual_value is None:
1844                 return m.group('none_inclusive')
1845             return op(actual_value, comparison_value)
1846         return _filter
1847
1848     def _check_formats(self, formats):
1849         for f in formats:
1850             self.to_screen('[info] Testing format %s' % f['format_id'])
1851             path = self.get_output_path('temp')
1852             if not self._ensure_dir_exists(f'{path}/'):
1853                 continue
1854             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1855             temp_file.close()
1856             try:
1857                 success, _ = self.dl(temp_file.name, f, test=True)
1858             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1859                 success = False
1860             finally:
1861                 if os.path.exists(temp_file.name):
1862                     try:
1863                         os.remove(temp_file.name)
1864                     except OSError:
1865                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1866             if success:
1867                 yield f
1868             else:
1869                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1870
1871     def _default_format_spec(self, info_dict, download=True):
1872
1873         def can_merge():
1874             merger = FFmpegMergerPP(self)
1875             return merger.available and merger.can_merge()
1876
1877         prefer_best = (
1878             not self.params.get('simulate')
1879             and download
1880             and (
1881                 not can_merge()
1882                 or info_dict.get('is_live', False)
1883                 or self.outtmpl_dict['default'] == '-'))
1884         compat = (
1885             prefer_best
1886             or self.params.get('allow_multiple_audio_streams', False)
1887             or 'format-spec' in self.params.get('compat_opts', []))
1888
1889         return (
1890             'best/bestvideo+bestaudio' if prefer_best
1891             else 'bestvideo*+bestaudio/best' if not compat
1892             else 'bestvideo+bestaudio/best')
1893
1894     def build_format_selector(self, format_spec):
1895         def syntax_error(note, start):
1896             message = (
1897                 'Invalid format specification: '
1898                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1899             return SyntaxError(message)
1900
1901         PICKFIRST = 'PICKFIRST'
1902         MERGE = 'MERGE'
1903         SINGLE = 'SINGLE'
1904         GROUP = 'GROUP'
1905         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1906
1907         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1908                                   'video': self.params.get('allow_multiple_video_streams', False)}
1909
1910         check_formats = self.params.get('check_formats') == 'selected'
1911
1912         def _parse_filter(tokens):
1913             filter_parts = []
1914             for type, string, start, _, _ in tokens:
1915                 if type == tokenize.OP and string == ']':
1916                     return ''.join(filter_parts)
1917                 else:
1918                     filter_parts.append(string)
1919
1920         def _remove_unused_ops(tokens):
1921             # Remove operators that we don't use and join them with the surrounding strings
1922             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1923             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1924             last_string, last_start, last_end, last_line = None, None, None, None
1925             for type, string, start, end, line in tokens:
1926                 if type == tokenize.OP and string == '[':
1927                     if last_string:
1928                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1929                         last_string = None
1930                     yield type, string, start, end, line
1931                     # everything inside brackets will be handled by _parse_filter
1932                     for type, string, start, end, line in tokens:
1933                         yield type, string, start, end, line
1934                         if type == tokenize.OP and string == ']':
1935                             break
1936                 elif type == tokenize.OP and string in ALLOWED_OPS:
1937                     if last_string:
1938                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1939                         last_string = None
1940                     yield type, string, start, end, line
1941                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1942                     if not last_string:
1943                         last_string = string
1944                         last_start = start
1945                         last_end = end
1946                     else:
1947                         last_string += string
1948             if last_string:
1949                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1950
1951         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1952             selectors = []
1953             current_selector = None
1954             for type, string, start, _, _ in tokens:
1955                 # ENCODING is only defined in python 3.x
1956                 if type == getattr(tokenize, 'ENCODING', None):
1957                     continue
1958                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1959                     current_selector = FormatSelector(SINGLE, string, [])
1960                 elif type == tokenize.OP:
1961                     if string == ')':
1962                         if not inside_group:
1963                             # ')' will be handled by the parentheses group
1964                             tokens.restore_last_token()
1965                         break
1966                     elif inside_merge and string in ['/', ',']:
1967                         tokens.restore_last_token()
1968                         break
1969                     elif inside_choice and string == ',':
1970                         tokens.restore_last_token()
1971                         break
1972                     elif string == ',':
1973                         if not current_selector:
1974                             raise syntax_error('"," must follow a format selector', start)
1975                         selectors.append(current_selector)
1976                         current_selector = None
1977                     elif string == '/':
1978                         if not current_selector:
1979                             raise syntax_error('"/" must follow a format selector', start)
1980                         first_choice = current_selector
1981                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1982                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1983                     elif string == '[':
1984                         if not current_selector:
1985                             current_selector = FormatSelector(SINGLE, 'best', [])
1986                         format_filter = _parse_filter(tokens)
1987                         current_selector.filters.append(format_filter)
1988                     elif string == '(':
1989                         if current_selector:
1990                             raise syntax_error('Unexpected "("', start)
1991                         group = _parse_format_selection(tokens, inside_group=True)
1992                         current_selector = FormatSelector(GROUP, group, [])
1993                     elif string == '+':
1994                         if not current_selector:
1995                             raise syntax_error('Unexpected "+"', start)
1996                         selector_1 = current_selector
1997                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1998                         if not selector_2:
1999                             raise syntax_error('Expected a selector', start)
2000                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2001                     else:
2002                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2003                 elif type == tokenize.ENDMARKER:
2004                     break
2005             if current_selector:
2006                 selectors.append(current_selector)
2007             return selectors
2008
2009         def _merge(formats_pair):
2010             format_1, format_2 = formats_pair
2011
2012             formats_info = []
2013             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2014             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2015
2016             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2017                 get_no_more = {'video': False, 'audio': False}
2018                 for (i, fmt_info) in enumerate(formats_info):
2019                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2020                         formats_info.pop(i)
2021                         continue
2022                     for aud_vid in ['audio', 'video']:
2023                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2024                             if get_no_more[aud_vid]:
2025                                 formats_info.pop(i)
2026                                 break
2027                             get_no_more[aud_vid] = True
2028
2029             if len(formats_info) == 1:
2030                 return formats_info[0]
2031
2032             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2033             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2034
2035             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2036             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2037
2038             output_ext = self.params.get('merge_output_format')
2039             if not output_ext:
2040                 if the_only_video:
2041                     output_ext = the_only_video['ext']
2042                 elif the_only_audio and not video_fmts:
2043                     output_ext = the_only_audio['ext']
2044                 else:
2045                     output_ext = 'mkv'
2046
2047             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2048
2049             new_dict = {
2050                 'requested_formats': formats_info,
2051                 'format': '+'.join(filtered('format')),
2052                 'format_id': '+'.join(filtered('format_id')),
2053                 'ext': output_ext,
2054                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2055                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2056                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2057                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2058                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2059             }
2060
2061             if the_only_video:
2062                 new_dict.update({
2063                     'width': the_only_video.get('width'),
2064                     'height': the_only_video.get('height'),
2065                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2066                     'fps': the_only_video.get('fps'),
2067                     'dynamic_range': the_only_video.get('dynamic_range'),
2068                     'vcodec': the_only_video.get('vcodec'),
2069                     'vbr': the_only_video.get('vbr'),
2070                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2071                 })
2072
2073             if the_only_audio:
2074                 new_dict.update({
2075                     'acodec': the_only_audio.get('acodec'),
2076                     'abr': the_only_audio.get('abr'),
2077                     'asr': the_only_audio.get('asr'),
2078                 })
2079
2080             return new_dict
2081
2082         def _check_formats(formats):
2083             if not check_formats:
2084                 yield from formats
2085                 return
2086             yield from self._check_formats(formats)
2087
2088         def _build_selector_function(selector):
2089             if isinstance(selector, list):  # ,
2090                 fs = [_build_selector_function(s) for s in selector]
2091
2092                 def selector_function(ctx):
2093                     for f in fs:
2094                         yield from f(ctx)
2095                 return selector_function
2096
2097             elif selector.type == GROUP:  # ()
2098                 selector_function = _build_selector_function(selector.selector)
2099
2100             elif selector.type == PICKFIRST:  # /
2101                 fs = [_build_selector_function(s) for s in selector.selector]
2102
2103                 def selector_function(ctx):
2104                     for f in fs:
2105                         picked_formats = list(f(ctx))
2106                         if picked_formats:
2107                             return picked_formats
2108                     return []
2109
2110             elif selector.type == MERGE:  # +
2111                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2112
2113                 def selector_function(ctx):
2114                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2115                         yield _merge(pair)
2116
2117             elif selector.type == SINGLE:  # atom
2118                 format_spec = selector.selector or 'best'
2119
2120                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2121                 if format_spec == 'all':
2122                     def selector_function(ctx):
2123                         yield from _check_formats(ctx['formats'][::-1])
2124                 elif format_spec == 'mergeall':
2125                     def selector_function(ctx):
2126                         formats = list(_check_formats(ctx['formats']))
2127                         if not formats:
2128                             return
2129                         merged_format = formats[-1]
2130                         for f in formats[-2::-1]:
2131                             merged_format = _merge((merged_format, f))
2132                         yield merged_format
2133
2134                 else:
2135                     format_fallback, format_reverse, format_idx = False, True, 1
2136                     mobj = re.match(
2137                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2138                         format_spec)
2139                     if mobj is not None:
2140                         format_idx = int_or_none(mobj.group('n'), default=1)
2141                         format_reverse = mobj.group('bw')[0] == 'b'
2142                         format_type = (mobj.group('type') or [None])[0]
2143                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2144                         format_modified = mobj.group('mod') is not None
2145
2146                         format_fallback = not format_type and not format_modified  # for b, w
2147                         _filter_f = (
2148                             (lambda f: f.get('%scodec' % format_type) != 'none')
2149                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2150                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2151                             if format_type  # bv, ba, wv, wa
2152                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2153                             if not format_modified  # b, w
2154                             else lambda f: True)  # b*, w*
2155                         filter_f = lambda f: _filter_f(f) and (
2156                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2157                     else:
2158                         if format_spec in self._format_selection_exts['audio']:
2159                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2160                         elif format_spec in self._format_selection_exts['video']:
2161                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2162                         elif format_spec in self._format_selection_exts['storyboards']:
2163                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2164                         else:
2165                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2166
2167                     def selector_function(ctx):
2168                         formats = list(ctx['formats'])
2169                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2170                         if format_fallback and ctx['incomplete_formats'] and not matches:
2171                             # for extractors with incomplete formats (audio only (soundcloud)
2172                             # or video only (imgur)) best/worst will fallback to
2173                             # best/worst {video,audio}-only format
2174                             matches = formats
2175                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2176                         try:
2177                             yield matches[format_idx - 1]
2178                         except IndexError:
2179                             return
2180
2181             filters = [self._build_format_filter(f) for f in selector.filters]
2182
2183             def final_selector(ctx):
2184                 ctx_copy = dict(ctx)
2185                 for _filter in filters:
2186                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2187                 return selector_function(ctx_copy)
2188             return final_selector
2189
2190         stream = io.BytesIO(format_spec.encode('utf-8'))
2191         try:
2192             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2193         except tokenize.TokenError:
2194             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2195
2196         class TokenIterator(object):
2197             def __init__(self, tokens):
2198                 self.tokens = tokens
2199                 self.counter = 0
2200
2201             def __iter__(self):
2202                 return self
2203
2204             def __next__(self):
2205                 if self.counter >= len(self.tokens):
2206                     raise StopIteration()
2207                 value = self.tokens[self.counter]
2208                 self.counter += 1
2209                 return value
2210
2211             next = __next__
2212
2213             def restore_last_token(self):
2214                 self.counter -= 1
2215
2216         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2217         return _build_selector_function(parsed_selector)
2218
2219     def _calc_headers(self, info_dict):
2220         res = std_headers.copy()
2221
2222         add_headers = info_dict.get('http_headers')
2223         if add_headers:
2224             res.update(add_headers)
2225
2226         cookies = self._calc_cookies(info_dict)
2227         if cookies:
2228             res['Cookie'] = cookies
2229
2230         if 'X-Forwarded-For' not in res:
2231             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2232             if x_forwarded_for_ip:
2233                 res['X-Forwarded-For'] = x_forwarded_for_ip
2234
2235         return res
2236
2237     def _calc_cookies(self, info_dict):
2238         pr = sanitized_Request(info_dict['url'])
2239         self.cookiejar.add_cookie_header(pr)
2240         return pr.get_header('Cookie')
2241
2242     def _sort_thumbnails(self, thumbnails):
2243         thumbnails.sort(key=lambda t: (
2244             t.get('preference') if t.get('preference') is not None else -1,
2245             t.get('width') if t.get('width') is not None else -1,
2246             t.get('height') if t.get('height') is not None else -1,
2247             t.get('id') if t.get('id') is not None else '',
2248             t.get('url')))
2249
2250     def _sanitize_thumbnails(self, info_dict):
2251         thumbnails = info_dict.get('thumbnails')
2252         if thumbnails is None:
2253             thumbnail = info_dict.get('thumbnail')
2254             if thumbnail:
2255                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2256         if not thumbnails:
2257             return
2258
2259         def check_thumbnails(thumbnails):
2260             for t in thumbnails:
2261                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2262                 try:
2263                     self.urlopen(HEADRequest(t['url']))
2264                 except network_exceptions as err:
2265                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2266                     continue
2267                 yield t
2268
2269         self._sort_thumbnails(thumbnails)
2270         for i, t in enumerate(thumbnails):
2271             if t.get('id') is None:
2272                 t['id'] = '%d' % i
2273             if t.get('width') and t.get('height'):
2274                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2275             t['url'] = sanitize_url(t['url'])
2276
2277         if self.params.get('check_formats') is True:
2278             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2279         else:
2280             info_dict['thumbnails'] = thumbnails
2281
2282     def process_video_result(self, info_dict, download=True):
2283         assert info_dict.get('_type', 'video') == 'video'
2284         self._num_videos += 1
2285
2286         if 'id' not in info_dict:
2287             raise ExtractorError('Missing "id" field in extractor result')
2288         if 'title' not in info_dict:
2289             raise ExtractorError('Missing "title" field in extractor result',
2290                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2291
2292         def report_force_conversion(field, field_not, conversion):
2293             self.report_warning(
2294                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2295                 % (field, field_not, conversion))
2296
2297         def sanitize_string_field(info, string_field):
2298             field = info.get(string_field)
2299             if field is None or isinstance(field, compat_str):
2300                 return
2301             report_force_conversion(string_field, 'a string', 'string')
2302             info[string_field] = compat_str(field)
2303
2304         def sanitize_numeric_fields(info):
2305             for numeric_field in self._NUMERIC_FIELDS:
2306                 field = info.get(numeric_field)
2307                 if field is None or isinstance(field, compat_numeric_types):
2308                     continue
2309                 report_force_conversion(numeric_field, 'numeric', 'int')
2310                 info[numeric_field] = int_or_none(field)
2311
2312         sanitize_string_field(info_dict, 'id')
2313         sanitize_numeric_fields(info_dict)
2314
2315         if 'playlist' not in info_dict:
2316             # It isn't part of a playlist
2317             info_dict['playlist'] = None
2318             info_dict['playlist_index'] = None
2319
2320         self._sanitize_thumbnails(info_dict)
2321
2322         thumbnail = info_dict.get('thumbnail')
2323         thumbnails = info_dict.get('thumbnails')
2324         if thumbnail:
2325             info_dict['thumbnail'] = sanitize_url(thumbnail)
2326         elif thumbnails:
2327             info_dict['thumbnail'] = thumbnails[-1]['url']
2328
2329         if info_dict.get('display_id') is None and 'id' in info_dict:
2330             info_dict['display_id'] = info_dict['id']
2331
2332         if info_dict.get('duration') is not None:
2333             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2334
2335         for ts_key, date_key in (
2336                 ('timestamp', 'upload_date'),
2337                 ('release_timestamp', 'release_date'),
2338                 ('modified_timestamp', 'modified_date'),
2339         ):
2340             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2341                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2342                 # see http://bugs.python.org/issue1646728)
2343                 try:
2344                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2345                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2346                 except (ValueError, OverflowError, OSError):
2347                     pass
2348
2349         live_keys = ('is_live', 'was_live')
2350         live_status = info_dict.get('live_status')
2351         if live_status is None:
2352             for key in live_keys:
2353                 if info_dict.get(key) is False:
2354                     continue
2355                 if info_dict.get(key):
2356                     live_status = key
2357                 break
2358             if all(info_dict.get(key) is False for key in live_keys):
2359                 live_status = 'not_live'
2360         if live_status:
2361             info_dict['live_status'] = live_status
2362             for key in live_keys:
2363                 if info_dict.get(key) is None:
2364                     info_dict[key] = (live_status == key)
2365
2366         # Auto generate title fields corresponding to the *_number fields when missing
2367         # in order to always have clean titles. This is very common for TV series.
2368         for field in ('chapter', 'season', 'episode'):
2369             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2370                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2371
2372         for cc_kind in ('subtitles', 'automatic_captions'):
2373             cc = info_dict.get(cc_kind)
2374             if cc:
2375                 for _, subtitle in cc.items():
2376                     for subtitle_format in subtitle:
2377                         if subtitle_format.get('url'):
2378                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2379                         if subtitle_format.get('ext') is None:
2380                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2381
2382         automatic_captions = info_dict.get('automatic_captions')
2383         subtitles = info_dict.get('subtitles')
2384
2385         info_dict['requested_subtitles'] = self.process_subtitles(
2386             info_dict['id'], subtitles, automatic_captions)
2387
2388         if info_dict.get('formats') is None:
2389             # There's only one format available
2390             formats = [info_dict]
2391         else:
2392             formats = info_dict['formats']
2393
2394         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2395         if not self.params.get('allow_unplayable_formats'):
2396             formats = [f for f in formats if not f.get('has_drm')]
2397
2398         # backward compatibility
2399         info_dict['fulltitle'] = info_dict['title']
2400
2401         if info_dict.get('is_live'):
2402             get_from_start = bool(self.params.get('live_from_start'))
2403             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2404             if not get_from_start:
2405                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2406
2407         if not formats:
2408             self.raise_no_formats(info_dict)
2409
2410         def is_wellformed(f):
2411             url = f.get('url')
2412             if not url:
2413                 self.report_warning(
2414                     '"url" field is missing or empty - skipping format, '
2415                     'there is an error in extractor')
2416                 return False
2417             if isinstance(url, bytes):
2418                 sanitize_string_field(f, 'url')
2419             return True
2420
2421         # Filter out malformed formats for better extraction robustness
2422         formats = list(filter(is_wellformed, formats))
2423
2424         formats_dict = {}
2425
2426         # We check that all the formats have the format and format_id fields
2427         for i, format in enumerate(formats):
2428             sanitize_string_field(format, 'format_id')
2429             sanitize_numeric_fields(format)
2430             format['url'] = sanitize_url(format['url'])
2431             if not format.get('format_id'):
2432                 format['format_id'] = compat_str(i)
2433             else:
2434                 # Sanitize format_id from characters used in format selector expression
2435                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2436             format_id = format['format_id']
2437             if format_id not in formats_dict:
2438                 formats_dict[format_id] = []
2439             formats_dict[format_id].append(format)
2440
2441         # Make sure all formats have unique format_id
2442         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2443         for format_id, ambiguous_formats in formats_dict.items():
2444             ambigious_id = len(ambiguous_formats) > 1
2445             for i, format in enumerate(ambiguous_formats):
2446                 if ambigious_id:
2447                     format['format_id'] = '%s-%d' % (format_id, i)
2448                 if format.get('ext') is None:
2449                     format['ext'] = determine_ext(format['url']).lower()
2450                 # Ensure there is no conflict between id and ext in format selection
2451                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2452                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2453                     format['format_id'] = 'f%s' % format['format_id']
2454
2455         for i, format in enumerate(formats):
2456             if format.get('format') is None:
2457                 format['format'] = '{id} - {res}{note}'.format(
2458                     id=format['format_id'],
2459                     res=self.format_resolution(format),
2460                     note=format_field(format, 'format_note', ' (%s)'),
2461                 )
2462             if format.get('protocol') is None:
2463                 format['protocol'] = determine_protocol(format)
2464             if format.get('resolution') is None:
2465                 format['resolution'] = self.format_resolution(format, default=None)
2466             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2467                 format['dynamic_range'] = 'SDR'
2468             if (info_dict.get('duration') and format.get('tbr')
2469                     and not format.get('filesize') and not format.get('filesize_approx')):
2470                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2471
2472             # Add HTTP headers, so that external programs can use them from the
2473             # json output
2474             full_format_info = info_dict.copy()
2475             full_format_info.update(format)
2476             format['http_headers'] = self._calc_headers(full_format_info)
2477         # Remove private housekeeping stuff
2478         if '__x_forwarded_for_ip' in info_dict:
2479             del info_dict['__x_forwarded_for_ip']
2480
2481         # TODO Central sorting goes here
2482
2483         if self.params.get('check_formats') is True:
2484             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2485
2486         if not formats or formats[0] is not info_dict:
2487             # only set the 'formats' fields if the original info_dict list them
2488             # otherwise we end up with a circular reference, the first (and unique)
2489             # element in the 'formats' field in info_dict is info_dict itself,
2490             # which can't be exported to json
2491             info_dict['formats'] = formats
2492
2493         info_dict, _ = self.pre_process(info_dict)
2494
2495         # The pre-processors may have modified the formats
2496         formats = info_dict.get('formats', [info_dict])
2497
2498         list_only = self.params.get('simulate') is None and (
2499             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2500         interactive_format_selection = not list_only and self.format_selector == '-'
2501         if self.params.get('list_thumbnails'):
2502             self.list_thumbnails(info_dict)
2503         if self.params.get('listsubtitles'):
2504             if 'automatic_captions' in info_dict:
2505                 self.list_subtitles(
2506                     info_dict['id'], automatic_captions, 'automatic captions')
2507             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2508         if self.params.get('listformats') or interactive_format_selection:
2509             self.list_formats(info_dict)
2510         if list_only:
2511             # Without this printing, -F --print-json will not work
2512             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2513             return
2514
2515         format_selector = self.format_selector
2516         if format_selector is None:
2517             req_format = self._default_format_spec(info_dict, download=download)
2518             self.write_debug('Default format spec: %s' % req_format)
2519             format_selector = self.build_format_selector(req_format)
2520
2521         while True:
2522             if interactive_format_selection:
2523                 req_format = input(
2524                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2525                 try:
2526                     format_selector = self.build_format_selector(req_format)
2527                 except SyntaxError as err:
2528                     self.report_error(err, tb=False, is_error=False)
2529                     continue
2530
2531             # While in format selection we may need to have an access to the original
2532             # format set in order to calculate some metrics or do some processing.
2533             # For now we need to be able to guess whether original formats provided
2534             # by extractor are incomplete or not (i.e. whether extractor provides only
2535             # video-only or audio-only formats) for proper formats selection for
2536             # extractors with such incomplete formats (see
2537             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2538             # Since formats may be filtered during format selection and may not match
2539             # the original formats the results may be incorrect. Thus original formats
2540             # or pre-calculated metrics should be passed to format selection routines
2541             # as well.
2542             # We will pass a context object containing all necessary additional data
2543             # instead of just formats.
2544             # This fixes incorrect format selection issue (see
2545             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2546             incomplete_formats = (
2547                 # All formats are video-only or
2548                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2549                 # all formats are audio-only
2550                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2551
2552             ctx = {
2553                 'formats': formats,
2554                 'incomplete_formats': incomplete_formats,
2555             }
2556
2557             formats_to_download = list(format_selector(ctx))
2558             if interactive_format_selection and not formats_to_download:
2559                 self.report_error('Requested format is not available', tb=False, is_error=False)
2560                 continue
2561             break
2562
2563         if not formats_to_download:
2564             if not self.params.get('ignore_no_formats_error'):
2565                 raise ExtractorError('Requested format is not available', expected=True,
2566                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2567             self.report_warning('Requested format is not available')
2568             # Process what we can, even without any available formats.
2569             formats_to_download = [{}]
2570
2571         best_format = formats_to_download[-1]
2572         if download:
2573             if best_format:
2574                 self.to_screen(
2575                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2576                     + ', '.join([f['format_id'] for f in formats_to_download]))
2577             max_downloads_reached = False
2578             for i, fmt in enumerate(formats_to_download):
2579                 formats_to_download[i] = new_info = dict(info_dict)
2580                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2581                 new_info.update(fmt)
2582                 new_info['__original_infodict'] = info_dict
2583                 try:
2584                     self.process_info(new_info)
2585                 except MaxDownloadsReached:
2586                     max_downloads_reached = True
2587                 new_info.pop('__original_infodict')
2588                 # Remove copied info
2589                 for key, val in tuple(new_info.items()):
2590                     if info_dict.get(key) == val:
2591                         new_info.pop(key)
2592                 if max_downloads_reached:
2593                     break
2594
2595             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2596             assert write_archive.issubset({True, False, 'ignore'})
2597             if True in write_archive and False not in write_archive:
2598                 self.record_download_archive(info_dict)
2599
2600             info_dict['requested_downloads'] = formats_to_download
2601             info_dict = self.run_all_pps('after_video', info_dict)
2602             if max_downloads_reached:
2603                 raise MaxDownloadsReached()
2604
2605         # We update the info dict with the selected best quality format (backwards compatibility)
2606         info_dict.update(best_format)
2607         return info_dict
2608
2609     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2610         """Select the requested subtitles and their format"""
2611         available_subs = {}
2612         if normal_subtitles and self.params.get('writesubtitles'):
2613             available_subs.update(normal_subtitles)
2614         if automatic_captions and self.params.get('writeautomaticsub'):
2615             for lang, cap_info in automatic_captions.items():
2616                 if lang not in available_subs:
2617                     available_subs[lang] = cap_info
2618
2619         if (not self.params.get('writesubtitles') and not
2620                 self.params.get('writeautomaticsub') or not
2621                 available_subs):
2622             return None
2623
2624         all_sub_langs = available_subs.keys()
2625         if self.params.get('allsubtitles', False):
2626             requested_langs = all_sub_langs
2627         elif self.params.get('subtitleslangs', False):
2628             # A list is used so that the order of languages will be the same as
2629             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2630             requested_langs = []
2631             for lang_re in self.params.get('subtitleslangs'):
2632                 if lang_re == 'all':
2633                     requested_langs.extend(all_sub_langs)
2634                     continue
2635                 discard = lang_re[0] == '-'
2636                 if discard:
2637                     lang_re = lang_re[1:]
2638                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2639                 if discard:
2640                     for lang in current_langs:
2641                         while lang in requested_langs:
2642                             requested_langs.remove(lang)
2643                 else:
2644                     requested_langs.extend(current_langs)
2645             requested_langs = orderedSet(requested_langs)
2646         elif 'en' in available_subs:
2647             requested_langs = ['en']
2648         else:
2649             requested_langs = [list(all_sub_langs)[0]]
2650         if requested_langs:
2651             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2652
2653         formats_query = self.params.get('subtitlesformat', 'best')
2654         formats_preference = formats_query.split('/') if formats_query else []
2655         subs = {}
2656         for lang in requested_langs:
2657             formats = available_subs.get(lang)
2658             if formats is None:
2659                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2660                 continue
2661             for ext in formats_preference:
2662                 if ext == 'best':
2663                     f = formats[-1]
2664                     break
2665                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2666                 if matches:
2667                     f = matches[-1]
2668                     break
2669             else:
2670                 f = formats[-1]
2671                 self.report_warning(
2672                     'No subtitle format found matching "%s" for language %s, '
2673                     'using %s' % (formats_query, lang, f['ext']))
2674             subs[lang] = f
2675         return subs
2676
2677     def _forceprint(self, tmpl, info_dict):
2678         mobj = re.match(r'\w+(=?)$', tmpl)
2679         if mobj and mobj.group(1):
2680             tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2681         elif mobj:
2682             tmpl = '%({})s'.format(tmpl)
2683
2684         info_dict = info_dict.copy()
2685         info_dict['formats_table'] = self.render_formats_table(info_dict)
2686         info_dict['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2687         info_dict['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2688         info_dict['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2689         self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2690
2691     def __forced_printings(self, info_dict, filename, incomplete):
2692         def print_mandatory(field, actual_field=None):
2693             if actual_field is None:
2694                 actual_field = field
2695             if (self.params.get('force%s' % field, False)
2696                     and (not incomplete or info_dict.get(actual_field) is not None)):
2697                 self.to_stdout(info_dict[actual_field])
2698
2699         def print_optional(field):
2700             if (self.params.get('force%s' % field, False)
2701                     and info_dict.get(field) is not None):
2702                 self.to_stdout(info_dict[field])
2703
2704         info_dict = info_dict.copy()
2705         if filename is not None:
2706             info_dict['filename'] = filename
2707         if info_dict.get('requested_formats') is not None:
2708             # For RTMP URLs, also include the playpath
2709             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2710         elif 'url' in info_dict:
2711             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2712
2713         if self.params['forceprint'].get('video') or self.params.get('forcejson'):
2714             self.post_extract(info_dict)
2715         for tmpl in self.params['forceprint'].get('video', []):
2716             self._forceprint(tmpl, info_dict)
2717
2718         print_mandatory('title')
2719         print_mandatory('id')
2720         print_mandatory('url', 'urls')
2721         print_optional('thumbnail')
2722         print_optional('description')
2723         print_optional('filename')
2724         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2725             self.to_stdout(formatSeconds(info_dict['duration']))
2726         print_mandatory('format')
2727
2728         if self.params.get('forcejson'):
2729             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2730
2731     def dl(self, name, info, subtitle=False, test=False):
2732         if not info.get('url'):
2733             self.raise_no_formats(info, True)
2734
2735         if test:
2736             verbose = self.params.get('verbose')
2737             params = {
2738                 'test': True,
2739                 'quiet': self.params.get('quiet') or not verbose,
2740                 'verbose': verbose,
2741                 'noprogress': not verbose,
2742                 'nopart': True,
2743                 'skip_unavailable_fragments': False,
2744                 'keep_fragments': False,
2745                 'overwrites': True,
2746                 '_no_ytdl_file': True,
2747             }
2748         else:
2749             params = self.params
2750         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2751         if not test:
2752             for ph in self._progress_hooks:
2753                 fd.add_progress_hook(ph)
2754             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2755             self.write_debug('Invoking downloader on "%s"' % urls)
2756
2757         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2758         # But it may contain objects that are not deep-copyable
2759         new_info = self._copy_infodict(info)
2760         if new_info.get('http_headers') is None:
2761             new_info['http_headers'] = self._calc_headers(new_info)
2762         return fd.download(name, new_info, subtitle)
2763
2764     def existing_file(self, filepaths, *, default_overwrite=True):
2765         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2766         if existing_files and not self.params.get('overwrites', default_overwrite):
2767             return existing_files[0]
2768
2769         for file in existing_files:
2770             self.report_file_delete(file)
2771             os.remove(file)
2772         return None
2773
2774     def process_info(self, info_dict):
2775         """Process a single resolved IE result. (Modified it in-place)"""
2776
2777         assert info_dict.get('_type', 'video') == 'video'
2778         original_infodict = info_dict
2779
2780         if 'format' not in info_dict and 'ext' in info_dict:
2781             info_dict['format'] = info_dict['ext']
2782
2783         if self._match_entry(info_dict) is not None:
2784             info_dict['__write_download_archive'] = 'ignore'
2785             return
2786
2787         self.post_extract(info_dict)
2788         self._num_downloads += 1
2789
2790         # info_dict['_filename'] needs to be set for backward compatibility
2791         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2792         temp_filename = self.prepare_filename(info_dict, 'temp')
2793         files_to_move = {}
2794
2795         # Forced printings
2796         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2797
2798         if self.params.get('simulate'):
2799             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2800             return
2801
2802         if full_filename is None:
2803             return
2804         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2805             return
2806         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2807             return
2808
2809         if self._write_description('video', info_dict,
2810                                    self.prepare_filename(info_dict, 'description')) is None:
2811             return
2812
2813         sub_files = self._write_subtitles(info_dict, temp_filename)
2814         if sub_files is None:
2815             return
2816         files_to_move.update(dict(sub_files))
2817
2818         thumb_files = self._write_thumbnails(
2819             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2820         if thumb_files is None:
2821             return
2822         files_to_move.update(dict(thumb_files))
2823
2824         infofn = self.prepare_filename(info_dict, 'infojson')
2825         _infojson_written = self._write_info_json('video', info_dict, infofn)
2826         if _infojson_written:
2827             info_dict['infojson_filename'] = infofn
2828             # For backward compatibility, even though it was a private field
2829             info_dict['__infojson_filename'] = infofn
2830         elif _infojson_written is None:
2831             return
2832
2833         # Note: Annotations are deprecated
2834         annofn = None
2835         if self.params.get('writeannotations', False):
2836             annofn = self.prepare_filename(info_dict, 'annotation')
2837         if annofn:
2838             if not self._ensure_dir_exists(encodeFilename(annofn)):
2839                 return
2840             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2841                 self.to_screen('[info] Video annotations are already present')
2842             elif not info_dict.get('annotations'):
2843                 self.report_warning('There are no annotations to write.')
2844             else:
2845                 try:
2846                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2847                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2848                         annofile.write(info_dict['annotations'])
2849                 except (KeyError, TypeError):
2850                     self.report_warning('There are no annotations to write.')
2851                 except (OSError, IOError):
2852                     self.report_error('Cannot write annotations file: ' + annofn)
2853                     return
2854
2855         # Write internet shortcut files
2856         def _write_link_file(link_type):
2857             if 'webpage_url' not in info_dict:
2858                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2859                 return False
2860             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2861             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2862                 return False
2863             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2864                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2865                 return True
2866             try:
2867                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2868                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2869                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2870                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2871                     if link_type == 'desktop':
2872                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2873                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2874             except (OSError, IOError):
2875                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2876                 return False
2877             return True
2878
2879         write_links = {
2880             'url': self.params.get('writeurllink'),
2881             'webloc': self.params.get('writewebloclink'),
2882             'desktop': self.params.get('writedesktoplink'),
2883         }
2884         if self.params.get('writelink'):
2885             link_type = ('webloc' if sys.platform == 'darwin'
2886                          else 'desktop' if sys.platform.startswith('linux')
2887                          else 'url')
2888             write_links[link_type] = True
2889
2890         if any(should_write and not _write_link_file(link_type)
2891                for link_type, should_write in write_links.items()):
2892             return
2893
2894         def replace_info_dict(new_info):
2895             nonlocal info_dict
2896             if new_info == info_dict:
2897                 return
2898             info_dict.clear()
2899             info_dict.update(new_info)
2900
2901         try:
2902             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2903             replace_info_dict(new_info)
2904         except PostProcessingError as err:
2905             self.report_error('Preprocessing: %s' % str(err))
2906             return
2907
2908         if self.params.get('skip_download'):
2909             info_dict['filepath'] = temp_filename
2910             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2911             info_dict['__files_to_move'] = files_to_move
2912             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2913             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2914         else:
2915             # Download
2916             info_dict.setdefault('__postprocessors', [])
2917             try:
2918
2919                 def existing_video_file(*filepaths):
2920                     ext = info_dict.get('ext')
2921                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2922                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2923                                               default_overwrite=False)
2924                     if file:
2925                         info_dict['ext'] = os.path.splitext(file)[1][1:]
2926                     return file
2927
2928                 success = True
2929                 if info_dict.get('requested_formats') is not None:
2930
2931                     def compatible_formats(formats):
2932                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2933                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2934                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2935                         if len(video_formats) > 2 or len(audio_formats) > 2:
2936                             return False
2937
2938                         # Check extension
2939                         exts = set(format.get('ext') for format in formats)
2940                         COMPATIBLE_EXTS = (
2941                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2942                             set(('webm',)),
2943                         )
2944                         for ext_sets in COMPATIBLE_EXTS:
2945                             if ext_sets.issuperset(exts):
2946                                 return True
2947                         # TODO: Check acodec/vcodec
2948                         return False
2949
2950                     requested_formats = info_dict['requested_formats']
2951                     old_ext = info_dict['ext']
2952                     if self.params.get('merge_output_format') is None:
2953                         if not compatible_formats(requested_formats):
2954                             info_dict['ext'] = 'mkv'
2955                             self.report_warning(
2956                                 'Requested formats are incompatible for merge and will be merged into mkv')
2957                         if (info_dict['ext'] == 'webm'
2958                                 and info_dict.get('thumbnails')
2959                                 # check with type instead of pp_key, __name__, or isinstance
2960                                 # since we dont want any custom PPs to trigger this
2961                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2962                             info_dict['ext'] = 'mkv'
2963                             self.report_warning(
2964                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2965                     new_ext = info_dict['ext']
2966
2967                     def correct_ext(filename, ext=new_ext):
2968                         if filename == '-':
2969                             return filename
2970                         filename_real_ext = os.path.splitext(filename)[1][1:]
2971                         filename_wo_ext = (
2972                             os.path.splitext(filename)[0]
2973                             if filename_real_ext in (old_ext, new_ext)
2974                             else filename)
2975                         return '%s.%s' % (filename_wo_ext, ext)
2976
2977                     # Ensure filename always has a correct extension for successful merge
2978                     full_filename = correct_ext(full_filename)
2979                     temp_filename = correct_ext(temp_filename)
2980                     dl_filename = existing_video_file(full_filename, temp_filename)
2981                     info_dict['__real_download'] = False
2982
2983                     downloaded = []
2984                     merger = FFmpegMergerPP(self)
2985
2986                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2987                     if dl_filename is not None:
2988                         self.report_file_already_downloaded(dl_filename)
2989                     elif fd:
2990                         for f in requested_formats if fd != FFmpegFD else []:
2991                             f['filepath'] = fname = prepend_extension(
2992                                 correct_ext(temp_filename, info_dict['ext']),
2993                                 'f%s' % f['format_id'], info_dict['ext'])
2994                             downloaded.append(fname)
2995                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2996                         success, real_download = self.dl(temp_filename, info_dict)
2997                         info_dict['__real_download'] = real_download
2998                     else:
2999                         if self.params.get('allow_unplayable_formats'):
3000                             self.report_warning(
3001                                 'You have requested merging of multiple formats '
3002                                 'while also allowing unplayable formats to be downloaded. '
3003                                 'The formats won\'t be merged to prevent data corruption.')
3004                         elif not merger.available:
3005                             self.report_warning(
3006                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
3007                                 'The formats won\'t be merged.')
3008
3009                         if temp_filename == '-':
3010                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3011                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3012                                       else 'but ffmpeg is not installed')
3013                             self.report_warning(
3014                                 f'You have requested downloading multiple formats to stdout {reason}. '
3015                                 'The formats will be streamed one after the other')
3016                             fname = temp_filename
3017                         for f in requested_formats:
3018                             new_info = dict(info_dict)
3019                             del new_info['requested_formats']
3020                             new_info.update(f)
3021                             if temp_filename != '-':
3022                                 fname = prepend_extension(
3023                                     correct_ext(temp_filename, new_info['ext']),
3024                                     'f%s' % f['format_id'], new_info['ext'])
3025                                 if not self._ensure_dir_exists(fname):
3026                                     return
3027                                 f['filepath'] = fname
3028                                 downloaded.append(fname)
3029                             partial_success, real_download = self.dl(fname, new_info)
3030                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3031                             success = success and partial_success
3032
3033                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3034                         info_dict['__postprocessors'].append(merger)
3035                         info_dict['__files_to_merge'] = downloaded
3036                         # Even if there were no downloads, it is being merged only now
3037                         info_dict['__real_download'] = True
3038                     else:
3039                         for file in downloaded:
3040                             files_to_move[file] = None
3041                 else:
3042                     # Just a single file
3043                     dl_filename = existing_video_file(full_filename, temp_filename)
3044                     if dl_filename is None or dl_filename == temp_filename:
3045                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3046                         # So we should try to resume the download
3047                         success, real_download = self.dl(temp_filename, info_dict)
3048                         info_dict['__real_download'] = real_download
3049                     else:
3050                         self.report_file_already_downloaded(dl_filename)
3051
3052                 dl_filename = dl_filename or temp_filename
3053                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3054
3055             except network_exceptions as err:
3056                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3057                 return
3058             except (OSError, IOError) as err:
3059                 raise UnavailableVideoError(err)
3060             except (ContentTooShortError, ) as err:
3061                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3062                 return
3063
3064             if success and full_filename != '-':
3065
3066                 def fixup():
3067                     do_fixup = True
3068                     fixup_policy = self.params.get('fixup')
3069                     vid = info_dict['id']
3070
3071                     if fixup_policy in ('ignore', 'never'):
3072                         return
3073                     elif fixup_policy == 'warn':
3074                         do_fixup = False
3075                     elif fixup_policy != 'force':
3076                         assert fixup_policy in ('detect_or_warn', None)
3077                         if not info_dict.get('__real_download'):
3078                             do_fixup = False
3079
3080                     def ffmpeg_fixup(cndn, msg, cls):
3081                         if not cndn:
3082                             return
3083                         if not do_fixup:
3084                             self.report_warning(f'{vid}: {msg}')
3085                             return
3086                         pp = cls(self)
3087                         if pp.available:
3088                             info_dict['__postprocessors'].append(pp)
3089                         else:
3090                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3091
3092                     stretched_ratio = info_dict.get('stretched_ratio')
3093                     ffmpeg_fixup(
3094                         stretched_ratio not in (1, None),
3095                         f'Non-uniform pixel ratio {stretched_ratio}',
3096                         FFmpegFixupStretchedPP)
3097
3098                     ffmpeg_fixup(
3099                         (info_dict.get('requested_formats') is None
3100                          and info_dict.get('container') == 'm4a_dash'
3101                          and info_dict.get('ext') == 'm4a'),
3102                         'writing DASH m4a. Only some players support this container',
3103                         FFmpegFixupM4aPP)
3104
3105                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3106                     downloader = downloader.__name__ if downloader else None
3107
3108                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3109                         ffmpeg_fixup(downloader == 'HlsFD',
3110                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3111                                      FFmpegFixupM3u8PP)
3112                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3113                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3114
3115                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3116                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3117
3118                 fixup()
3119                 try:
3120                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3121                 except PostProcessingError as err:
3122                     self.report_error('Postprocessing: %s' % str(err))
3123                     return
3124                 try:
3125                     for ph in self._post_hooks:
3126                         ph(info_dict['filepath'])
3127                 except Exception as err:
3128                     self.report_error('post hooks: %s' % str(err))
3129                     return
3130                 info_dict['__write_download_archive'] = True
3131
3132         if self.params.get('force_write_download_archive'):
3133             info_dict['__write_download_archive'] = True
3134
3135         # Make sure the info_dict was modified in-place
3136         assert info_dict is original_infodict
3137
3138         max_downloads = self.params.get('max_downloads')
3139         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3140             raise MaxDownloadsReached()
3141
3142     def __download_wrapper(self, func):
3143         @functools.wraps(func)
3144         def wrapper(*args, **kwargs):
3145             try:
3146                 res = func(*args, **kwargs)
3147             except UnavailableVideoError as e:
3148                 self.report_error(e)
3149             except MaxDownloadsReached as e:
3150                 self.to_screen(f'[info] {e}')
3151                 raise
3152             except DownloadCancelled as e:
3153                 self.to_screen(f'[info] {e}')
3154                 if not self.params.get('break_per_url'):
3155                     raise
3156             else:
3157                 if self.params.get('dump_single_json', False):
3158                     self.post_extract(res)
3159                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3160         return wrapper
3161
3162     def download(self, url_list):
3163         """Download a given list of URLs."""
3164         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3165         outtmpl = self.outtmpl_dict['default']
3166         if (len(url_list) > 1
3167                 and outtmpl != '-'
3168                 and '%' not in outtmpl
3169                 and self.params.get('max_downloads') != 1):
3170             raise SameFileError(outtmpl)
3171
3172         for url in url_list:
3173             self.__download_wrapper(self.extract_info)(
3174                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3175
3176         return self._download_retcode
3177
3178     def download_with_info_file(self, info_filename):
3179         with contextlib.closing(fileinput.FileInput(
3180                 [info_filename], mode='r',
3181                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3182             # FileInput doesn't have a read method, we can't call json.load
3183             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3184         try:
3185             self.__download_wrapper(self.process_ie_result)(info, download=True)
3186         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3187             if not isinstance(e, EntryNotInPlaylist):
3188                 self.to_stderr('\r')
3189             webpage_url = info.get('webpage_url')
3190             if webpage_url is not None:
3191                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3192                 return self.download([webpage_url])
3193             else:
3194                 raise
3195         return self._download_retcode
3196
3197     @staticmethod
3198     def sanitize_info(info_dict, remove_private_keys=False):
3199         ''' Sanitize the infodict for converting to json '''
3200         if info_dict is None:
3201             return info_dict
3202         info_dict.setdefault('epoch', int(time.time()))
3203         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3204         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3205         if remove_private_keys:
3206             remove_keys |= {
3207                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3208                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3209             }
3210             reject = lambda k, v: k not in keep_keys and (
3211                 k.startswith('_') or k in remove_keys or v is None)
3212         else:
3213             reject = lambda k, v: k in remove_keys
3214
3215         def filter_fn(obj):
3216             if isinstance(obj, dict):
3217                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3218             elif isinstance(obj, (list, tuple, set, LazyList)):
3219                 return list(map(filter_fn, obj))
3220             elif obj is None or isinstance(obj, (str, int, float, bool)):
3221                 return obj
3222             else:
3223                 return repr(obj)
3224
3225         return filter_fn(info_dict)
3226
3227     @staticmethod
3228     def filter_requested_info(info_dict, actually_filter=True):
3229         ''' Alias of sanitize_info for backward compatibility '''
3230         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3231
3232     @staticmethod
3233     def post_extract(info_dict):
3234         def actual_post_extract(info_dict):
3235             if info_dict.get('_type') in ('playlist', 'multi_video'):
3236                 for video_dict in info_dict.get('entries', {}):
3237                     actual_post_extract(video_dict or {})
3238                 return
3239
3240             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3241             extra = post_extractor().items()
3242             info_dict.update(extra)
3243             info_dict.pop('__post_extractor', None)
3244
3245             original_infodict = info_dict.get('__original_infodict') or {}
3246             original_infodict.update(extra)
3247             original_infodict.pop('__post_extractor', None)
3248
3249         actual_post_extract(info_dict or {})
3250
3251     def run_pp(self, pp, infodict):
3252         files_to_delete = []
3253         if '__files_to_move' not in infodict:
3254             infodict['__files_to_move'] = {}
3255         try:
3256             files_to_delete, infodict = pp.run(infodict)
3257         except PostProcessingError as e:
3258             # Must be True and not 'only_download'
3259             if self.params.get('ignoreerrors') is True:
3260                 self.report_error(e)
3261                 return infodict
3262             raise
3263
3264         if not files_to_delete:
3265             return infodict
3266         if self.params.get('keepvideo', False):
3267             for f in files_to_delete:
3268                 infodict['__files_to_move'].setdefault(f, '')
3269         else:
3270             for old_filename in set(files_to_delete):
3271                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3272                 try:
3273                     os.remove(encodeFilename(old_filename))
3274                 except (IOError, OSError):
3275                     self.report_warning('Unable to remove downloaded original file')
3276                 if old_filename in infodict['__files_to_move']:
3277                     del infodict['__files_to_move'][old_filename]
3278         return infodict
3279
3280     def run_all_pps(self, key, info, *, additional_pps=None):
3281         for tmpl in self.params['forceprint'].get(key, []):
3282             self._forceprint(tmpl, info)
3283         for pp in (additional_pps or []) + self._pps[key]:
3284             info = self.run_pp(pp, info)
3285         return info
3286
3287     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3288         info = dict(ie_info)
3289         info['__files_to_move'] = files_to_move or {}
3290         info = self.run_all_pps(key, info)
3291         return info, info.pop('__files_to_move', None)
3292
3293     def post_process(self, filename, info, files_to_move=None):
3294         """Run all the postprocessors on the given file."""
3295         info['filepath'] = filename
3296         info['__files_to_move'] = files_to_move or {}
3297         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3298         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3299         del info['__files_to_move']
3300         return self.run_all_pps('after_move', info)
3301
3302     def _make_archive_id(self, info_dict):
3303         video_id = info_dict.get('id')
3304         if not video_id:
3305             return
3306         # Future-proof against any change in case
3307         # and backwards compatibility with prior versions
3308         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3309         if extractor is None:
3310             url = str_or_none(info_dict.get('url'))
3311             if not url:
3312                 return
3313             # Try to find matching extractor for the URL and take its ie_key
3314             for ie_key, ie in self._ies.items():
3315                 if ie.suitable(url):
3316                     extractor = ie_key
3317                     break
3318             else:
3319                 return
3320         return '%s %s' % (extractor.lower(), video_id)
3321
3322     def in_download_archive(self, info_dict):
3323         fn = self.params.get('download_archive')
3324         if fn is None:
3325             return False
3326
3327         vid_id = self._make_archive_id(info_dict)
3328         if not vid_id:
3329             return False  # Incomplete video information
3330
3331         return vid_id in self.archive
3332
3333     def record_download_archive(self, info_dict):
3334         fn = self.params.get('download_archive')
3335         if fn is None:
3336             return
3337         vid_id = self._make_archive_id(info_dict)
3338         assert vid_id
3339         self.write_debug(f'Adding to archive: {vid_id}')
3340         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3341             archive_file.write(vid_id + '\n')
3342         self.archive.add(vid_id)
3343
3344     @staticmethod
3345     def format_resolution(format, default='unknown'):
3346         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3347             return 'audio only'
3348         if format.get('resolution') is not None:
3349             return format['resolution']
3350         if format.get('width') and format.get('height'):
3351             return '%dx%d' % (format['width'], format['height'])
3352         elif format.get('height'):
3353             return '%sp' % format['height']
3354         elif format.get('width'):
3355             return '%dx?' % format['width']
3356         return default
3357
3358     def _list_format_headers(self, *headers):
3359         if self.params.get('listformats_table', True) is not False:
3360             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3361         return headers
3362
3363     def _format_note(self, fdict):
3364         res = ''
3365         if fdict.get('ext') in ['f4f', 'f4m']:
3366             res += '(unsupported)'
3367         if fdict.get('language'):
3368             if res:
3369                 res += ' '
3370             res += '[%s]' % fdict['language']
3371         if fdict.get('format_note') is not None:
3372             if res:
3373                 res += ' '
3374             res += fdict['format_note']
3375         if fdict.get('tbr') is not None:
3376             if res:
3377                 res += ', '
3378             res += '%4dk' % fdict['tbr']
3379         if fdict.get('container') is not None:
3380             if res:
3381                 res += ', '
3382             res += '%s container' % fdict['container']
3383         if (fdict.get('vcodec') is not None
3384                 and fdict.get('vcodec') != 'none'):
3385             if res:
3386                 res += ', '
3387             res += fdict['vcodec']
3388             if fdict.get('vbr') is not None:
3389                 res += '@'
3390         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3391             res += 'video@'
3392         if fdict.get('vbr') is not None:
3393             res += '%4dk' % fdict['vbr']
3394         if fdict.get('fps') is not None:
3395             if res:
3396                 res += ', '
3397             res += '%sfps' % fdict['fps']
3398         if fdict.get('acodec') is not None:
3399             if res:
3400                 res += ', '
3401             if fdict['acodec'] == 'none':
3402                 res += 'video only'
3403             else:
3404                 res += '%-5s' % fdict['acodec']
3405         elif fdict.get('abr') is not None:
3406             if res:
3407                 res += ', '
3408             res += 'audio'
3409         if fdict.get('abr') is not None:
3410             res += '@%3dk' % fdict['abr']
3411         if fdict.get('asr') is not None:
3412             res += ' (%5dHz)' % fdict['asr']
3413         if fdict.get('filesize') is not None:
3414             if res:
3415                 res += ', '
3416             res += format_bytes(fdict['filesize'])
3417         elif fdict.get('filesize_approx') is not None:
3418             if res:
3419                 res += ', '
3420             res += '~' + format_bytes(fdict['filesize_approx'])
3421         return res
3422
3423     def render_formats_table(self, info_dict):
3424         if not info_dict.get('formats') and not info_dict.get('url'):
3425             return None
3426
3427         formats = info_dict.get('formats', [info_dict])
3428         if not self.params.get('listformats_table', True) is not False:
3429             table = [
3430                 [
3431                     format_field(f, 'format_id'),
3432                     format_field(f, 'ext'),
3433                     self.format_resolution(f),
3434                     self._format_note(f)
3435                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3436             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3437
3438         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3439         table = [
3440             [
3441                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3442                 format_field(f, 'ext'),
3443                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3444                 format_field(f, 'fps', '\t%d'),
3445                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3446                 delim,
3447                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3448                 format_field(f, 'tbr', '\t%dk'),
3449                 shorten_protocol_name(f.get('protocol', '')),
3450                 delim,
3451                 format_field(f, 'vcodec', default='unknown').replace(
3452                     'none', 'images' if f.get('acodec') == 'none'
3453                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3454                 format_field(f, 'vbr', '\t%dk'),
3455                 format_field(f, 'acodec', default='unknown').replace(
3456                     'none', '' if f.get('vcodec') == 'none'
3457                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3458                 format_field(f, 'abr', '\t%dk'),
3459                 format_field(f, 'asr', '\t%dHz'),
3460                 join_nonempty(
3461                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3462                     format_field(f, 'language', '[%s]'),
3463                     join_nonempty(format_field(f, 'format_note'),
3464                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3465                                   delim=', '),
3466                     delim=' '),
3467             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3468         header_line = self._list_format_headers(
3469             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3470             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3471
3472         return render_table(
3473             header_line, table, hide_empty=True,
3474             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3475
3476     def render_thumbnails_table(self, info_dict):
3477         thumbnails = list(info_dict.get('thumbnails'))
3478         if not thumbnails:
3479             return None
3480         return render_table(
3481             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3482             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3483
3484     def render_subtitles_table(self, video_id, subtitles):
3485         def _row(lang, formats):
3486             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3487             if len(set(names)) == 1:
3488                 names = [] if names[0] == 'unknown' else names[:1]
3489             return [lang, ', '.join(names), ', '.join(exts)]
3490
3491         if not subtitles:
3492             return None
3493         return render_table(
3494             self._list_format_headers('Language', 'Name', 'Formats'),
3495             [_row(lang, formats) for lang, formats in subtitles.items()],
3496             hide_empty=True)
3497
3498     def __list_table(self, video_id, name, func, *args):
3499         table = func(*args)
3500         if not table:
3501             self.to_screen(f'{video_id} has no {name}')
3502             return
3503         self.to_screen(f'[info] Available {name} for {video_id}:')
3504         self.to_stdout(table)
3505
3506     def list_formats(self, info_dict):
3507         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3508
3509     def list_thumbnails(self, info_dict):
3510         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3511
3512     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3513         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3514
3515     def urlopen(self, req):
3516         """ Start an HTTP download """
3517         if isinstance(req, compat_basestring):
3518             req = sanitized_Request(req)
3519         return self._opener.open(req, timeout=self._socket_timeout)
3520
3521     def print_debug_header(self):
3522         if not self.params.get('verbose'):
3523             return
3524
3525         def get_encoding(stream):
3526             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3527             if not supports_terminal_sequences(stream):
3528                 from .compat import WINDOWS_VT_MODE
3529                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3530             return ret
3531
3532         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3533             locale.getpreferredencoding(),
3534             sys.getfilesystemencoding(),
3535             get_encoding(self._screen_file), get_encoding(self._err_file),
3536             self.get_encoding())
3537
3538         logger = self.params.get('logger')
3539         if logger:
3540             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3541             write_debug(encoding_str)
3542         else:
3543             write_string(f'[debug] {encoding_str}\n', encoding=None)
3544             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3545
3546         source = detect_variant()
3547         write_debug(join_nonempty(
3548             'yt-dlp version', __version__,
3549             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3550             '' if source == 'unknown' else f'({source})',
3551             delim=' '))
3552         if not _LAZY_LOADER:
3553             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3554                 write_debug('Lazy loading extractors is forcibly disabled')
3555             else:
3556                 write_debug('Lazy loading extractors is disabled')
3557         if plugin_extractors or plugin_postprocessors:
3558             write_debug('Plugins: %s' % [
3559                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3560                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3561         if self.params.get('compat_opts'):
3562             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3563
3564         if source == 'source':
3565             try:
3566                 sp = Popen(
3567                     ['git', 'rev-parse', '--short', 'HEAD'],
3568                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3569                     cwd=os.path.dirname(os.path.abspath(__file__)))
3570                 out, err = sp.communicate_or_kill()
3571                 out = out.decode().strip()
3572                 if re.match('[0-9a-f]+', out):
3573                     write_debug('Git HEAD: %s' % out)
3574             except Exception:
3575                 try:
3576                     sys.exc_clear()
3577                 except Exception:
3578                     pass
3579
3580         def python_implementation():
3581             impl_name = platform.python_implementation()
3582             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3583                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3584             return impl_name
3585
3586         write_debug('Python version %s (%s %s) - %s' % (
3587             platform.python_version(),
3588             python_implementation(),
3589             platform.architecture()[0],
3590             platform_name()))
3591
3592         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3593         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3594         if ffmpeg_features:
3595             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3596
3597         exe_versions['rtmpdump'] = rtmpdump_version()
3598         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3599         exe_str = ', '.join(
3600             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3601         ) or 'none'
3602         write_debug('exe versions: %s' % exe_str)
3603
3604         from .downloader.websocket import has_websockets
3605         from .postprocessor.embedthumbnail import has_mutagen
3606         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3607
3608         lib_str = join_nonempty(
3609             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3610             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3611             has_mutagen and 'mutagen',
3612             SQLITE_AVAILABLE and 'sqlite',
3613             has_websockets and 'websockets',
3614             delim=', ') or 'none'
3615         write_debug('Optional libraries: %s' % lib_str)
3616
3617         proxy_map = {}
3618         for handler in self._opener.handlers:
3619             if hasattr(handler, 'proxies'):
3620                 proxy_map.update(handler.proxies)
3621         write_debug(f'Proxy map: {proxy_map}')
3622
3623         # Not implemented
3624         if False and self.params.get('call_home'):
3625             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3626             write_debug('Public IP address: %s' % ipaddr)
3627             latest_version = self.urlopen(
3628                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3629             if version_tuple(latest_version) > version_tuple(__version__):
3630                 self.report_warning(
3631                     'You are using an outdated version (newest version: %s)! '
3632                     'See https://yt-dl.org/update if you need help updating.' %
3633                     latest_version)
3634
3635     def _setup_opener(self):
3636         timeout_val = self.params.get('socket_timeout')
3637         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3638
3639         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3640         opts_cookiefile = self.params.get('cookiefile')
3641         opts_proxy = self.params.get('proxy')
3642
3643         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3644
3645         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3646         if opts_proxy is not None:
3647             if opts_proxy == '':
3648                 proxies = {}
3649             else:
3650                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3651         else:
3652             proxies = compat_urllib_request.getproxies()
3653             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3654             if 'http' in proxies and 'https' not in proxies:
3655                 proxies['https'] = proxies['http']
3656         proxy_handler = PerRequestProxyHandler(proxies)
3657
3658         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3659         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3660         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3661         redirect_handler = YoutubeDLRedirectHandler()
3662         data_handler = compat_urllib_request_DataHandler()
3663
3664         # When passing our own FileHandler instance, build_opener won't add the
3665         # default FileHandler and allows us to disable the file protocol, which
3666         # can be used for malicious purposes (see
3667         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3668         file_handler = compat_urllib_request.FileHandler()
3669
3670         def file_open(*args, **kwargs):
3671             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3672         file_handler.file_open = file_open
3673
3674         opener = compat_urllib_request.build_opener(
3675             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3676
3677         # Delete the default user-agent header, which would otherwise apply in
3678         # cases where our custom HTTP handler doesn't come into play
3679         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3680         opener.addheaders = []
3681         self._opener = opener
3682
3683     def encode(self, s):
3684         if isinstance(s, bytes):
3685             return s  # Already encoded
3686
3687         try:
3688             return s.encode(self.get_encoding())
3689         except UnicodeEncodeError as err:
3690             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3691             raise
3692
3693     def get_encoding(self):
3694         encoding = self.params.get('encoding')
3695         if encoding is None:
3696             encoding = preferredencoding()
3697         return encoding
3698
3699     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3700         ''' Write infojson and returns True = written, False = skip, None = error '''
3701         if overwrite is None:
3702             overwrite = self.params.get('overwrites', True)
3703         if not self.params.get('writeinfojson'):
3704             return False
3705         elif not infofn:
3706             self.write_debug(f'Skipping writing {label} infojson')
3707             return False
3708         elif not self._ensure_dir_exists(infofn):
3709             return None
3710         elif not overwrite and os.path.exists(infofn):
3711             self.to_screen(f'[info] {label.title()} metadata is already present')
3712         else:
3713             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3714             try:
3715                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3716             except (OSError, IOError):
3717                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3718                 return None
3719         return True
3720
3721     def _write_description(self, label, ie_result, descfn):
3722         ''' Write description and returns True = written, False = skip, None = error '''
3723         if not self.params.get('writedescription'):
3724             return False
3725         elif not descfn:
3726             self.write_debug(f'Skipping writing {label} description')
3727             return False
3728         elif not self._ensure_dir_exists(descfn):
3729             return None
3730         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3731             self.to_screen(f'[info] {label.title()} description is already present')
3732         elif ie_result.get('description') is None:
3733             self.report_warning(f'There\'s no {label} description to write')
3734             return False
3735         else:
3736             try:
3737                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3738                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3739                     descfile.write(ie_result['description'])
3740             except (OSError, IOError):
3741                 self.report_error(f'Cannot write {label} description file {descfn}')
3742                 return None
3743         return True
3744
3745     def _write_subtitles(self, info_dict, filename):
3746         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3747         ret = []
3748         subtitles = info_dict.get('requested_subtitles')
3749         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3750             # subtitles download errors are already managed as troubles in relevant IE
3751             # that way it will silently go on when used with unsupporting IE
3752             return ret
3753
3754         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3755         if not sub_filename_base:
3756             self.to_screen('[info] Skipping writing video subtitles')
3757             return ret
3758         for sub_lang, sub_info in subtitles.items():
3759             sub_format = sub_info['ext']
3760             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3761             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3762             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3763             if existing_sub:
3764                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3765                 sub_info['filepath'] = existing_sub
3766                 ret.append((existing_sub, sub_filename_final))
3767                 continue
3768
3769             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3770             if sub_info.get('data') is not None:
3771                 try:
3772                     # Use newline='' to prevent conversion of newline characters
3773                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3774                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3775                         subfile.write(sub_info['data'])
3776                     sub_info['filepath'] = sub_filename
3777                     ret.append((sub_filename, sub_filename_final))
3778                     continue
3779                 except (OSError, IOError):
3780                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3781                     return None
3782
3783             try:
3784                 sub_copy = sub_info.copy()
3785                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3786                 self.dl(sub_filename, sub_copy, subtitle=True)
3787                 sub_info['filepath'] = sub_filename
3788                 ret.append((sub_filename, sub_filename_final))
3789             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3790                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3791                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3792                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3793         return ret
3794
3795     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3796         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3797         write_all = self.params.get('write_all_thumbnails', False)
3798         thumbnails, ret = [], []
3799         if write_all or self.params.get('writethumbnail', False):
3800             thumbnails = info_dict.get('thumbnails') or []
3801         multiple = write_all and len(thumbnails) > 1
3802
3803         if thumb_filename_base is None:
3804             thumb_filename_base = filename
3805         if thumbnails and not thumb_filename_base:
3806             self.write_debug(f'Skipping writing {label} thumbnail')
3807             return ret
3808
3809         for idx, t in list(enumerate(thumbnails))[::-1]:
3810             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3811             thumb_display_id = f'{label} thumbnail {t["id"]}'
3812             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3813             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3814
3815             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3816             if existing_thumb:
3817                 self.to_screen('[info] %s is already present' % (
3818                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3819                 t['filepath'] = existing_thumb
3820                 ret.append((existing_thumb, thumb_filename_final))
3821             else:
3822                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3823                 try:
3824                     uf = self.urlopen(t['url'])
3825                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3826                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3827                         shutil.copyfileobj(uf, thumbf)
3828                     ret.append((thumb_filename, thumb_filename_final))
3829                     t['filepath'] = thumb_filename
3830                 except network_exceptions as err:
3831                     thumbnails.pop(idx)
3832                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3833             if ret and not write_all:
3834                 break
3835         return ret