yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     InAdvancePagedList,
  76     int_or_none,
  77     iri_to_uri,
  78     ISO3166Utils,
  79     join_nonempty,
  80     LazyList,
  81     LINK_TEMPLATES,
  82     locked_file,
  83     make_dir,
  84     make_HTTPS_handler,
  85     MaxDownloadsReached,
  86     network_exceptions,
  87     number_of_digits,
  88     orderedSet,
  89     OUTTMPL_TYPES,
  90     PagedList,
  91     parse_filesize,
  92     PerRequestProxyHandler,
  93     platform_name,
  94     Popen,
  95     POSTPROCESS_WHEN,
  96     PostProcessingError,
  97     preferredencoding,
  98     prepend_extension,
  99     ReExtractInfo,
 100     register_socks_protocols,
 101     RejectedVideoReached,
 102     remove_terminal_sequences,
 103     render_table,
 104     replace_extension,
 105     SameFileError,
 106     sanitize_filename,
 107     sanitize_path,
 108     sanitize_url,
 109     sanitized_Request,
 110     std_headers,
 111     STR_FORMAT_RE_TMPL,
 112     STR_FORMAT_TYPES,
 113     str_or_none,
 114     strftime_or_none,
 115     subtitles_filename,
 116     supports_terminal_sequences,
 117     timetuple_from_msec,
 118     to_high_limit_path,
 119     traverse_obj,
 120     try_get,
 121     UnavailableVideoError,
 122     url_basename,
 123     variadic,
 124     version_tuple,
 125     write_json_file,
 126     write_string,
 127     YoutubeDLCookieProcessor,
 128     YoutubeDLHandler,
 129     YoutubeDLRedirectHandler,
 130 )
 131 from .cache import Cache
 132 from .minicurses import format_text
 133 from .extractor import (
 134     gen_extractor_classes,
 135     get_info_extractor,
 136     _LAZY_LOADER,
 137     _PLUGIN_CLASSES as plugin_extractors
 138 )
 139 from .extractor.openload import PhantomJSwrapper
 140 from .downloader import (
 141     FFmpegFD,
 142     get_suitable_downloader,
 143     shorten_protocol_name
 144 )
 145 from .downloader.rtmp import rtmpdump_version
 146 from .postprocessor import (
 147     get_postprocessor,
 148     EmbedThumbnailPP,
 149     FFmpegFixupDuplicateMoovPP,
 150     FFmpegFixupDurationPP,
 151     FFmpegFixupM3u8PP,
 152     FFmpegFixupM4aPP,
 153     FFmpegFixupStretchedPP,
 154     FFmpegFixupTimestampPP,
 155     FFmpegMergerPP,
 156     FFmpegPostProcessor,
 157     MoveFilesAfterDownloadPP,
 158     _PLUGIN_CLASSES as plugin_postprocessors
 159 )
 160 from .update import detect_variant
 161 from .version import __version__, RELEASE_GIT_HEAD
 162
 163 if compat_os_name == 'nt':
 164     import ctypes
 165
 166
 167 class YoutubeDL(object):
 168     """YoutubeDL class.
 169
 170     YoutubeDL objects are the ones responsible of downloading the
 171     actual video file and writing it to disk if the user has requested
 172     it, among some other tasks. In most cases there should be one per
 173     program. As, given a video URL, the downloader doesn't know how to
 174     extract all the needed information, task that InfoExtractors do, it
 175     has to pass the URL to one of them.
 176
 177     For this, YoutubeDL objects have a method that allows
 178     InfoExtractors to be registered in a given order. When it is passed
 179     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 180     finds that reports being able to handle it. The InfoExtractor extracts
 181     all the information about the video or videos the URL refers to, and
 182     YoutubeDL process the extracted information, possibly using a File
 183     Downloader to download the video.
 184
 185     YoutubeDL objects accept a lot of parameters. In order not to saturate
 186     the object constructor with arguments, it receives a dictionary of
 187     options instead. These options are available through the params
 188     attribute for the InfoExtractors to use. The YoutubeDL also
 189     registers itself as the downloader in charge for the InfoExtractors
 190     that are added to it, so this is a "mutual registration".
 191
 192     Available options:
 193
 194     username:          Username for authentication purposes.
 195     password:          Password for authentication purposes.
 196     videopassword:     Password for accessing a video.
 197     ap_mso:            Adobe Pass multiple-system operator identifier.
 198     ap_username:       Multiple-system operator account username.
 199     ap_password:       Multiple-system operator account password.
 200     usenetrc:          Use netrc for authentication instead.
 201     verbose:           Print additional info to stdout.
 202     quiet:             Do not print messages to stdout.
 203     no_warnings:       Do not print out anything for warnings.
 204     forceprint:        A dict with keys WHEN mapped to a list of templates to
 205                        print to stdout. The allowed keys are video or any of the
 206                        items in utils.POSTPROCESS_WHEN.
 207                        For compatibility, a single list is also accepted
 208     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 209                        a list of tuples with (template, filename)
 210     forceurl:          Force printing final URL. (Deprecated)
 211     forcetitle:        Force printing title. (Deprecated)
 212     forceid:           Force printing ID. (Deprecated)
 213     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 214     forcedescription:  Force printing description. (Deprecated)
 215     forcefilename:     Force printing final filename. (Deprecated)
 216     forceduration:     Force printing duration. (Deprecated)
 217     forcejson:         Force printing info_dict as JSON.
 218     dump_single_json:  Force printing the info_dict of the whole playlist
 219                        (or video) as a single JSON line.
 220     force_write_download_archive: Force writing download archive regardless
 221                        of 'skip_download' or 'simulate'.
 222     simulate:          Do not download the video files. If unset (or None),
 223                        simulate only if listsubtitles, listformats or list_thumbnails is used
 224     format:            Video format code. see "FORMAT SELECTION" for more details.
 225                        You can also pass a function. The function takes 'ctx' as
 226                        argument and returns the formats to download.
 227                        See "build_format_selector" for an implementation
 228     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 229     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 230                        extracting metadata even if the video is not actually
 231                        available for download (experimental)
 232     format_sort:       A list of fields by which to sort the video formats.
 233                        See "Sorting Formats" for more details.
 234     format_sort_force: Force the given format_sort. see "Sorting Formats"
 235                        for more details.
 236     allow_multiple_video_streams:   Allow multiple video streams to be merged
 237                        into a single file
 238     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 239                        into a single file
 240     check_formats      Whether to test if the formats are downloadable.
 241                        Can be True (check all), False (check none),
 242                        'selected' (check selected formats),
 243                        or None (check only if requested by extractor)
 244     paths:             Dictionary of output paths. The allowed keys are 'home'
 245                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 246     outtmpl:           Dictionary of templates for output names. Allowed keys
 247                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 248                        For compatibility with youtube-dl, a single string can also be used
 249     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 250     restrictfilenames: Do not allow "&" and spaces in file names
 251     trim_file_name:    Limit length of filename (extension excluded)
 252     windowsfilenames:  Force the filenames to be windows compatible
 253     ignoreerrors:      Do not stop on download/postprocessing errors.
 254                        Can be 'only_download' to ignore only download errors.
 255                        Default is 'only_download' for CLI, but False for API
 256     skip_playlist_after_errors: Number of allowed failures until the rest of
 257                        the playlist is skipped
 258     force_generic_extractor: Force downloader to use the generic extractor
 259     overwrites:        Overwrite all video and metadata files if True,
 260                        overwrite only non-video files if None
 261                        and don't overwrite any file if False
 262                        For compatibility with youtube-dl,
 263                        "nooverwrites" may also be used instead
 264     playliststart:     Playlist item to start at.
 265     playlistend:       Playlist item to end at.
 266     playlist_items:    Specific indices of playlist to download.
 267     playlistreverse:   Download playlist items in reverse order.
 268     playlistrandom:    Download playlist items in random order.
 269     matchtitle:        Download only matching titles.
 270     rejecttitle:       Reject downloads for matching titles.
 271     logger:            Log messages to a logging.Logger instance.
 272     logtostderr:       Log messages to stderr instead of stdout.
 273     consoletitle:       Display progress in console window's titlebar.
 274     writedescription:  Write the video description to a .description file
 275     writeinfojson:     Write the video description to a .info.json file
 276     clean_infojson:    Remove private fields from the infojson
 277     getcomments:       Extract video comments. This will not be written to disk
 278                        unless writeinfojson is also given
 279     writeannotations:  Write the video annotations to a .annotations.xml file
 280     writethumbnail:    Write the thumbnail image to a file
 281     allow_playlist_files: Whether to write playlists' description, infojson etc
 282                        also to disk when using the 'write*' options
 283     write_all_thumbnails:  Write all thumbnail formats to files
 284     writelink:         Write an internet shortcut file, depending on the
 285                        current platform (.url/.webloc/.desktop)
 286     writeurllink:      Write a Windows internet shortcut file (.url)
 287     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 288     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 289     writesubtitles:    Write the video subtitles to a file
 290     writeautomaticsub: Write the automatically generated subtitles to a file
 291     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 292                        Downloads all the subtitles of the video
 293                        (requires writesubtitles or writeautomaticsub)
 294     listsubtitles:     Lists all available subtitles for the video
 295     subtitlesformat:   The format code for subtitles
 296     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 297                        The list may contain "all" to refer to all the available
 298                        subtitles. The language can be prefixed with a "-" to
 299                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 300     keepvideo:         Keep the video file after post-processing
 301     daterange:         A DateRange object, download only if the upload_date is in the range.
 302     skip_download:     Skip the actual download of the video file
 303     cachedir:          Location of the cache files in the filesystem.
 304                        False to disable filesystem cache.
 305     noplaylist:        Download single video instead of a playlist if in doubt.
 306     age_limit:         An integer representing the user's age in years.
 307                        Unsuitable videos for the given age are skipped.
 308     min_views:         An integer representing the minimum view count the video
 309                        must have in order to not be skipped.
 310                        Videos without view count information are always
 311                        downloaded. None for no limit.
 312     max_views:         An integer representing the maximum view count.
 313                        Videos that are more popular than that are not
 314                        downloaded.
 315                        Videos without view count information are always
 316                        downloaded. None for no limit.
 317     download_archive:  File name of a file where all downloads are recorded.
 318                        Videos already present in the file are not downloaded
 319                        again.
 320     break_on_existing: Stop the download process after attempting to download a
 321                        file that is in the archive.
 322     break_on_reject:   Stop the download process when encountering a video that
 323                        has been filtered out.
 324     break_per_url:     Whether break_on_reject and break_on_existing
 325                        should act on each input URL as opposed to for the entire queue
 326     cookiefile:        File name where cookies should be read from and dumped to
 327     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 328                        name/pathfrom where cookies are loaded, and the name of the
 329                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 330     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 331                        support RFC 5746 secure renegotiation
 332     nocheckcertificate:  Do not verify SSL certificates
 333     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 334                        At the moment, this is only supported by YouTube.
 335     proxy:             URL of the proxy server to use
 336     geo_verification_proxy:  URL of the proxy to use for IP address verification
 337                        on geo-restricted sites.
 338     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 339     bidi_workaround:   Work around buggy terminals without bidirectional text
 340                        support, using fridibi
 341     debug_printtraffic:Print out sent and received HTTP traffic
 342     include_ads:       Download ads as well (deprecated)
 343     default_search:    Prepend this string if an input url is not valid.
 344                        'auto' for elaborate guessing
 345     encoding:          Use this encoding instead of the system-specified.
 346     extract_flat:      Do not resolve URLs, return the immediate result.
 347                        Pass in 'in_playlist' to only show this behavior for
 348                        playlist items.
 349     wait_for_video:    If given, wait for scheduled streams to become available.
 350                        The value should be a tuple containing the range
 351                        (min_secs, max_secs) to wait between retries
 352     postprocessors:    A list of dictionaries, each with an entry
 353                        * key:  The name of the postprocessor. See
 354                                yt_dlp/postprocessor/__init__.py for a list.
 355                        * when: When to run the postprocessor. Allowed values are
 356                                the entries of utils.POSTPROCESS_WHEN
 357                                Assumed to be 'post_process' if not given
 358     post_hooks:        Deprecated - Register a custom postprocessor instead
 359                        A list of functions that get called as the final step
 360                        for each video file, after all postprocessors have been
 361                        called. The filename will be passed as the only argument.
 362     progress_hooks:    A list of functions that get called on download
 363                        progress, with a dictionary with the entries
 364                        * status: One of "downloading", "error", or "finished".
 365                                  Check this first and ignore unknown values.
 366                        * info_dict: The extracted info_dict
 367
 368                        If status is one of "downloading", or "finished", the
 369                        following properties may also be present:
 370                        * filename: The final filename (always present)
 371                        * tmpfilename: The filename we're currently writing to
 372                        * downloaded_bytes: Bytes on disk
 373                        * total_bytes: Size of the whole file, None if unknown
 374                        * total_bytes_estimate: Guess of the eventual file size,
 375                                                None if unavailable.
 376                        * elapsed: The number of seconds since download started.
 377                        * eta: The estimated time in seconds, None if unknown
 378                        * speed: The download speed in bytes/second, None if
 379                                 unknown
 380                        * fragment_index: The counter of the currently
 381                                          downloaded video fragment.
 382                        * fragment_count: The number of fragments (= individual
 383                                          files that will be merged)
 384
 385                        Progress hooks are guaranteed to be called at least once
 386                        (with status "finished") if the download is successful.
 387     postprocessor_hooks:  A list of functions that get called on postprocessing
 388                        progress, with a dictionary with the entries
 389                        * status: One of "started", "processing", or "finished".
 390                                  Check this first and ignore unknown values.
 391                        * postprocessor: Name of the postprocessor
 392                        * info_dict: The extracted info_dict
 393
 394                        Progress hooks are guaranteed to be called at least twice
 395                        (with status "started" and "finished") if the processing is successful.
 396     merge_output_format: Extension to use when merging formats.
 397     final_ext:         Expected final extension; used to detect when the file was
 398                        already downloaded and converted
 399     fixup:             Automatically correct known faults of the file.
 400                        One of:
 401                        - "never": do nothing
 402                        - "warn": only emit a warning
 403                        - "detect_or_warn": check whether we can do anything
 404                                            about it, warn otherwise (default)
 405     source_address:    Client-side IP address to bind to.
 406     call_home:         Boolean, true iff we are allowed to contact the
 407                        yt-dlp servers for debugging. (BROKEN)
 408     sleep_interval_requests: Number of seconds to sleep between requests
 409                        during extraction
 410     sleep_interval:    Number of seconds to sleep before each download when
 411                        used alone or a lower bound of a range for randomized
 412                        sleep before each download (minimum possible number
 413                        of seconds to sleep) when used along with
 414                        max_sleep_interval.
 415     max_sleep_interval:Upper bound of a range for randomized sleep before each
 416                        download (maximum possible number of seconds to sleep).
 417                        Must only be used along with sleep_interval.
 418                        Actual sleep time will be a random float from range
 419                        [sleep_interval; max_sleep_interval].
 420     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 421     listformats:       Print an overview of available video formats and exit.
 422     list_thumbnails:   Print a table of all thumbnails and exit.
 423     match_filter:      A function that gets called with the info_dict of
 424                        every video.
 425                        If it returns a message, the video is ignored.
 426                        If it returns None, the video is downloaded.
 427                        match_filter_func in utils.py is one example for this.
 428     no_color:          Do not emit color codes in output.
 429     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 430                        HTTP header
 431     geo_bypass_country:
 432                        Two-letter ISO 3166-2 country code that will be used for
 433                        explicit geographic restriction bypassing via faking
 434                        X-Forwarded-For HTTP header
 435     geo_bypass_ip_block:
 436                        IP range in CIDR notation that will be used similarly to
 437                        geo_bypass_country
 438
 439     The following options determine which downloader is picked:
 440     external_downloader: A dictionary of protocol keys and the executable of the
 441                        external downloader to use for it. The allowed protocols
 442                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 443                        Set the value to 'native' to use the native downloader
 444     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 445                        or {'m3u8': 'ffmpeg'} instead.
 446                        Use the native HLS downloader instead of ffmpeg/avconv
 447                        if True, otherwise use ffmpeg/avconv if False, otherwise
 448                        use downloader suggested by extractor if None.
 449     compat_opts:       Compatibility options. See "Differences in default behavior".
 450                        The following options do not work when used through the API:
 451                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 452                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 453                        Refer __init__.py for their implementation
 454     progress_template: Dictionary of templates for progress outputs.
 455                        Allowed keys are 'download', 'postprocess',
 456                        'download-title' (console title) and 'postprocess-title'.
 457                        The template is mapped on a dictionary with keys 'progress' and 'info'
 458
 459     The following parameters are not used by YoutubeDL itself, they are used by
 460     the downloader (see yt_dlp/downloader/common.py):
 461     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 462     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 463     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 464     external_downloader_args, concurrent_fragment_downloads.
 465
 466     The following options are used by the post processors:
 467     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 468                        otherwise prefer ffmpeg. (avconv support is deprecated)
 469     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 470                        to the binary or its containing directory.
 471     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 472                        and a list of additional command-line arguments for the
 473                        postprocessor/executable. The dict can also have "PP+EXE" keys
 474                        which are used when the given exe is used by the given PP.
 475                        Use 'default' as the name for arguments to passed to all PP
 476                        For compatibility with youtube-dl, a single list of args
 477                        can also be used
 478
 479     The following options are used by the extractors:
 480     extractor_retries: Number of times to retry for known errors
 481     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 482     hls_split_discontinuity: Split HLS playlists to different formats at
 483                        discontinuities such as ad breaks (default: False)
 484     extractor_args:    A dictionary of arguments to be passed to the extractors.
 485                        See "EXTRACTOR ARGUMENTS" for details.
 486                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 487     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 488     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 489                        If True (default), DASH manifests and related
 490                        data will be downloaded and processed by extractor.
 491                        You can reduce network I/O by disabling it if you don't
 492                        care about DASH. (only for youtube)
 493     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 494                        If True (default), HLS manifests and related
 495                        data will be downloaded and processed by extractor.
 496                        You can reduce network I/O by disabling it if you don't
 497                        care about HLS. (only for youtube)
 498     """
 499
 500     _NUMERIC_FIELDS = set((
 501         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 502         'timestamp', 'release_timestamp',
 503         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 504         'average_rating', 'comment_count', 'age_limit',
 505         'start_time', 'end_time',
 506         'chapter_number', 'season_number', 'episode_number',
 507         'track_number', 'disc_number', 'release_year',
 508     ))
 509
 510     _format_selection_exts = {
 511         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 512         'video': {'mp4', 'flv', 'webm', '3gp'},
 513         'storyboards': {'mhtml'},
 514     }
 515
 516     params = None
 517     _ies = {}
 518     _pps = {k: [] for k in POSTPROCESS_WHEN}
 519     _printed_messages = set()
 520     _first_webpage_request = True
 521     _download_retcode = None
 522     _num_downloads = None
 523     _playlist_level = 0
 524     _playlist_urls = set()
 525     _screen_file = None
 526
 527     def __init__(self, params=None, auto_init=True):
 528         """Create a FileDownloader object with the given options.
 529         @param auto_init    Whether to load the default extractors and print header (if verbose).
 530                             Set to 'no_verbose_header' to not print the header
 531         """
 532         if params is None:
 533             params = {}
 534         self._ies = {}
 535         self._ies_instances = {}
 536         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 537         self._printed_messages = set()
 538         self._first_webpage_request = True
 539         self._post_hooks = []
 540         self._progress_hooks = []
 541         self._postprocessor_hooks = []
 542         self._download_retcode = 0
 543         self._num_downloads = 0
 544         self._num_videos = 0
 545         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 546         self._err_file = sys.stderr
 547         self.params = params
 548         self.cache = Cache(self)
 549
 550         windows_enable_vt_mode()
 551         self._allow_colors = {
 552             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 553             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 554         }
 555
 556         if sys.version_info < (3, 6):
 557             self.report_warning(
 558                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 559
 560         if self.params.get('allow_unplayable_formats'):
 561             self.report_warning(
 562                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 563                 'This is a developer option intended for debugging. \n'
 564                 '         If you experience any issues while using this option, '
 565                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 566
 567         def check_deprecated(param, option, suggestion):
 568             if self.params.get(param) is not None:
 569                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 570                 return True
 571             return False
 572
 573         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 574             if self.params.get('geo_verification_proxy') is None:
 575                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 576
 577         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 578         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 579         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 580
 581         for msg in self.params.get('_warnings', []):
 582             self.report_warning(msg)
 583         for msg in self.params.get('_deprecation_warnings', []):
 584             self.deprecation_warning(msg)
 585
 586         if 'list-formats' in self.params.get('compat_opts', []):
 587             self.params['listformats_table'] = False
 588
 589         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 590             # nooverwrites was unnecessarily changed to overwrites
 591             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 592             # This ensures compatibility with both keys
 593             self.params['overwrites'] = not self.params['nooverwrites']
 594         elif self.params.get('overwrites') is None:
 595             self.params.pop('overwrites', None)
 596         else:
 597             self.params['nooverwrites'] = not self.params['overwrites']
 598
 599         self.params.setdefault('forceprint', {})
 600         self.params.setdefault('print_to_file', {})
 601
 602         # Compatibility with older syntax
 603         if not isinstance(params['forceprint'], dict):
 604             self.params['forceprint'] = {'video': params['forceprint']}
 605
 606         if self.params.get('bidi_workaround', False):
 607             try:
 608                 import pty
 609                 master, slave = pty.openpty()
 610                 width = compat_get_terminal_size().columns
 611                 if width is None:
 612                     width_args = []
 613                 else:
 614                     width_args = ['-w', str(width)]
 615                 sp_kwargs = dict(
 616                     stdin=subprocess.PIPE,
 617                     stdout=slave,
 618                     stderr=self._err_file)
 619                 try:
 620                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 621                 except OSError:
 622                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 623                 self._output_channel = os.fdopen(master, 'rb')
 624             except OSError as ose:
 625                 if ose.errno == errno.ENOENT:
 626                     self.report_warning(
 627                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 628                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 629                 else:
 630                     raise
 631
 632         if (sys.platform != 'win32'
 633                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 634                 and not self.params.get('restrictfilenames', False)):
 635             # Unicode filesystem API will throw errors (#1474, #13027)
 636             self.report_warning(
 637                 'Assuming --restrict-filenames since file system encoding '
 638                 'cannot encode all characters. '
 639                 'Set the LC_ALL environment variable to fix this.')
 640             self.params['restrictfilenames'] = True
 641
 642         self.outtmpl_dict = self.parse_outtmpl()
 643
 644         # Creating format selector here allows us to catch syntax errors before the extraction
 645         self.format_selector = (
 646             self.params.get('format') if self.params.get('format') in (None, '-')
 647             else self.params['format'] if callable(self.params['format'])
 648             else self.build_format_selector(self.params['format']))
 649
 650         self._setup_opener()
 651
 652         if auto_init:
 653             if auto_init != 'no_verbose_header':
 654                 self.print_debug_header()
 655             self.add_default_info_extractors()
 656
 657         hooks = {
 658             'post_hooks': self.add_post_hook,
 659             'progress_hooks': self.add_progress_hook,
 660             'postprocessor_hooks': self.add_postprocessor_hook,
 661         }
 662         for opt, fn in hooks.items():
 663             for ph in self.params.get(opt, []):
 664                 fn(ph)
 665
 666         for pp_def_raw in self.params.get('postprocessors', []):
 667             pp_def = dict(pp_def_raw)
 668             when = pp_def.pop('when', 'post_process')
 669             self.add_post_processor(
 670                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 671                 when=when)
 672
 673         register_socks_protocols()
 674
 675         def preload_download_archive(fn):
 676             """Preload the archive, if any is specified"""
 677             if fn is None:
 678                 return False
 679             self.write_debug(f'Loading archive file {fn!r}')
 680             try:
 681                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 682                     for line in archive_file:
 683                         self.archive.add(line.strip())
 684             except IOError as ioe:
 685                 if ioe.errno != errno.ENOENT:
 686                     raise
 687                 return False
 688             return True
 689
 690         self.archive = set()
 691         preload_download_archive(self.params.get('download_archive'))
 692
 693     def warn_if_short_id(self, argv):
 694         # short YouTube ID starting with dash?
 695         idxs = [
 696             i for i, a in enumerate(argv)
 697             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 698         if idxs:
 699             correct_argv = (
 700                 ['yt-dlp']
 701                 + [a for i, a in enumerate(argv) if i not in idxs]
 702                 + ['--'] + [argv[i] for i in idxs]
 703             )
 704             self.report_warning(
 705                 'Long argument string detected. '
 706                 'Use -- to separate parameters and URLs, like this:\n%s' %
 707                 args_to_str(correct_argv))
 708
 709     def add_info_extractor(self, ie):
 710         """Add an InfoExtractor object to the end of the list."""
 711         ie_key = ie.ie_key()
 712         self._ies[ie_key] = ie
 713         if not isinstance(ie, type):
 714             self._ies_instances[ie_key] = ie
 715             ie.set_downloader(self)
 716
 717     def _get_info_extractor_class(self, ie_key):
 718         ie = self._ies.get(ie_key)
 719         if ie is None:
 720             ie = get_info_extractor(ie_key)
 721             self.add_info_extractor(ie)
 722         return ie
 723
 724     def get_info_extractor(self, ie_key):
 725         """
 726         Get an instance of an IE with name ie_key, it will try to get one from
 727         the _ies list, if there's no instance it will create a new one and add
 728         it to the extractor list.
 729         """
 730         ie = self._ies_instances.get(ie_key)
 731         if ie is None:
 732             ie = get_info_extractor(ie_key)()
 733             self.add_info_extractor(ie)
 734         return ie
 735
 736     def add_default_info_extractors(self):
 737         """
 738         Add the InfoExtractors returned by gen_extractors to the end of the list
 739         """
 740         for ie in gen_extractor_classes():
 741             self.add_info_extractor(ie)
 742
 743     def add_post_processor(self, pp, when='post_process'):
 744         """Add a PostProcessor object to the end of the chain."""
 745         self._pps[when].append(pp)
 746         pp.set_downloader(self)
 747
 748     def add_post_hook(self, ph):
 749         """Add the post hook"""
 750         self._post_hooks.append(ph)
 751
 752     def add_progress_hook(self, ph):
 753         """Add the download progress hook"""
 754         self._progress_hooks.append(ph)
 755
 756     def add_postprocessor_hook(self, ph):
 757         """Add the postprocessing progress hook"""
 758         self._postprocessor_hooks.append(ph)
 759         for pps in self._pps.values():
 760             for pp in pps:
 761                 pp.add_progress_hook(ph)
 762
 763     def _bidi_workaround(self, message):
 764         if not hasattr(self, '_output_channel'):
 765             return message
 766
 767         assert hasattr(self, '_output_process')
 768         assert isinstance(message, compat_str)
 769         line_count = message.count('\n') + 1
 770         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 771         self._output_process.stdin.flush()
 772         res = ''.join(self._output_channel.readline().decode('utf-8')
 773                       for _ in range(line_count))
 774         return res[:-len('\n')]
 775
 776     def _write_string(self, message, out=None, only_once=False):
 777         if only_once:
 778             if message in self._printed_messages:
 779                 return
 780             self._printed_messages.add(message)
 781         write_string(message, out=out, encoding=self.params.get('encoding'))
 782
 783     def to_stdout(self, message, skip_eol=False, quiet=False):
 784         """Print message to stdout"""
 785         if self.params.get('logger'):
 786             self.params['logger'].debug(message)
 787         elif not quiet or self.params.get('verbose'):
 788             self._write_string(
 789                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 790                 self._err_file if quiet else self._screen_file)
 791
 792     def to_stderr(self, message, only_once=False):
 793         """Print message to stderr"""
 794         assert isinstance(message, compat_str)
 795         if self.params.get('logger'):
 796             self.params['logger'].error(message)
 797         else:
 798             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 799
 800     def to_console_title(self, message):
 801         if not self.params.get('consoletitle', False):
 802             return
 803         message = remove_terminal_sequences(message)
 804         if compat_os_name == 'nt':
 805             if ctypes.windll.kernel32.GetConsoleWindow():
 806                 # c_wchar_p() might not be necessary if `message` is
 807                 # already of type unicode()
 808                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 809         elif 'TERM' in os.environ:
 810             self._write_string('\033]0;%s\007' % message, self._screen_file)
 811
 812     def save_console_title(self):
 813         if not self.params.get('consoletitle', False):
 814             return
 815         if self.params.get('simulate'):
 816             return
 817         if compat_os_name != 'nt' and 'TERM' in os.environ:
 818             # Save the title on stack
 819             self._write_string('\033[22;0t', self._screen_file)
 820
 821     def restore_console_title(self):
 822         if not self.params.get('consoletitle', False):
 823             return
 824         if self.params.get('simulate'):
 825             return
 826         if compat_os_name != 'nt' and 'TERM' in os.environ:
 827             # Restore the title from stack
 828             self._write_string('\033[23;0t', self._screen_file)
 829
 830     def __enter__(self):
 831         self.save_console_title()
 832         return self
 833
 834     def __exit__(self, *args):
 835         self.restore_console_title()
 836
 837         if self.params.get('cookiefile') is not None:
 838             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 839
 840     def trouble(self, message=None, tb=None, is_error=True):
 841         """Determine action to take when a download problem appears.
 842
 843         Depending on if the downloader has been configured to ignore
 844         download errors or not, this method may throw an exception or
 845         not when errors are found, after printing the message.
 846
 847         @param tb          If given, is additional traceback information
 848         @param is_error    Whether to raise error according to ignorerrors
 849         """
 850         if message is not None:
 851             self.to_stderr(message)
 852         if self.params.get('verbose'):
 853             if tb is None:
 854                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 855                     tb = ''
 856                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 857                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 858                     tb += encode_compat_str(traceback.format_exc())
 859                 else:
 860                     tb_data = traceback.format_list(traceback.extract_stack())
 861                     tb = ''.join(tb_data)
 862             if tb:
 863                 self.to_stderr(tb)
 864         if not is_error:
 865             return
 866         if not self.params.get('ignoreerrors'):
 867             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 868                 exc_info = sys.exc_info()[1].exc_info
 869             else:
 870                 exc_info = sys.exc_info()
 871             raise DownloadError(message, exc_info)
 872         self._download_retcode = 1
 873
 874     def to_screen(self, message, skip_eol=False):
 875         """Print message to stdout if not in quiet mode"""
 876         self.to_stdout(
 877             message, skip_eol, quiet=self.params.get('quiet', False))
 878
 879     class Styles(Enum):
 880         HEADERS = 'yellow'
 881         EMPHASIS = 'light blue'
 882         ID = 'green'
 883         DELIM = 'blue'
 884         ERROR = 'red'
 885         WARNING = 'yellow'
 886         SUPPRESS = 'light black'
 887
 888     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 889         if test_encoding:
 890             original_text = text
 891             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 892             text = text.encode(encoding, 'ignore').decode(encoding)
 893             if fallback is not None and text != original_text:
 894                 text = fallback
 895         if isinstance(f, self.Styles):
 896             f = f.value
 897         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 898
 899     def _format_screen(self, *args, **kwargs):
 900         return self._format_text(
 901             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 902
 903     def _format_err(self, *args, **kwargs):
 904         return self._format_text(
 905             self._err_file, self._allow_colors['err'], *args, **kwargs)
 906
 907     def report_warning(self, message, only_once=False):
 908         '''
 909         Print the message to stderr, it will be prefixed with 'WARNING:'
 910         If stderr is a tty file the 'WARNING:' will be colored
 911         '''
 912         if self.params.get('logger') is not None:
 913             self.params['logger'].warning(message)
 914         else:
 915             if self.params.get('no_warnings'):
 916                 return
 917             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 918
 919     def deprecation_warning(self, message):
 920         if self.params.get('logger') is not None:
 921             self.params['logger'].warning('DeprecationWarning: {message}')
 922         else:
 923             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 924
 925     def report_error(self, message, *args, **kwargs):
 926         '''
 927         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 928         in red if stderr is a tty file.
 929         '''
 930         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 931
 932     def write_debug(self, message, only_once=False):
 933         '''Log debug message or Print message to stderr'''
 934         if not self.params.get('verbose', False):
 935             return
 936         message = '[debug] %s' % message
 937         if self.params.get('logger'):
 938             self.params['logger'].debug(message)
 939         else:
 940             self.to_stderr(message, only_once)
 941
 942     def report_file_already_downloaded(self, file_name):
 943         """Report file has already been fully downloaded."""
 944         try:
 945             self.to_screen('[download] %s has already been downloaded' % file_name)
 946         except UnicodeEncodeError:
 947             self.to_screen('[download] The file has already been downloaded')
 948
 949     def report_file_delete(self, file_name):
 950         """Report that existing file will be deleted."""
 951         try:
 952             self.to_screen('Deleting existing file %s' % file_name)
 953         except UnicodeEncodeError:
 954             self.to_screen('Deleting existing file')
 955
 956     def raise_no_formats(self, info, forced=False):
 957         has_drm = info.get('__has_drm')
 958         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 959         expected = self.params.get('ignore_no_formats_error')
 960         if forced or not expected:
 961             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 962                                  expected=has_drm or expected)
 963         else:
 964             self.report_warning(msg)
 965
 966     def parse_outtmpl(self):
 967         outtmpl_dict = self.params.get('outtmpl', {})
 968         if not isinstance(outtmpl_dict, dict):
 969             outtmpl_dict = {'default': outtmpl_dict}
 970         # Remove spaces in the default template
 971         if self.params.get('restrictfilenames'):
 972             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 973         else:
 974             sanitize = lambda x: x
 975         outtmpl_dict.update({
 976             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 977             if outtmpl_dict.get(k) is None})
 978         for key, val in outtmpl_dict.items():
 979             if isinstance(val, bytes):
 980                 self.report_warning(
 981                     'Parameter outtmpl is bytes, but should be a unicode string. '
 982                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 983         return outtmpl_dict
 984
 985     def get_output_path(self, dir_type='', filename=None):
 986         paths = self.params.get('paths', {})
 987         assert isinstance(paths, dict)
 988         path = os.path.join(
 989             expand_path(paths.get('home', '').strip()),
 990             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 991             filename or '')
 992
 993         # Temporary fix for #4787
 994         # 'Treat' all problem characters by passing filename through preferredencoding
 995         # to workaround encoding issues with subprocess on python2 @ Windows
 996         if sys.version_info < (3, 0) and sys.platform == 'win32':
 997             path = encodeFilename(path, True).decode(preferredencoding())
 998         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 999
1000     @staticmethod
1001     def _outtmpl_expandpath(outtmpl):
1002         # expand_path translates '%%' into '%' and '$$' into '$'
1003         # correspondingly that is not what we want since we need to keep
1004         # '%%' intact for template dict substitution step. Working around
1005         # with boundary-alike separator hack.
1006         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1007         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1008
1009         # outtmpl should be expand_path'ed before template dict substitution
1010         # because meta fields may contain env variables we don't want to
1011         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1012         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1013         return expand_path(outtmpl).replace(sep, '')
1014
1015     @staticmethod
1016     def escape_outtmpl(outtmpl):
1017         ''' Escape any remaining strings like %s, %abc% etc. '''
1018         return re.sub(
1019             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1020             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1021             outtmpl)
1022
1023     @classmethod
1024     def validate_outtmpl(cls, outtmpl):
1025         ''' @return None or Exception object '''
1026         outtmpl = re.sub(
1027             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1028             lambda mobj: f'{mobj.group(0)[:-1]}s',
1029             cls._outtmpl_expandpath(outtmpl))
1030         try:
1031             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1032             return None
1033         except ValueError as err:
1034             return err
1035
1036     @staticmethod
1037     def _copy_infodict(info_dict):
1038         info_dict = dict(info_dict)
1039         for key in ('__original_infodict', '__postprocessors'):
1040             info_dict.pop(key, None)
1041         return info_dict
1042
1043     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1044         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1045         @param sanitize    Whether to sanitize the output as a filename.
1046                            For backward compatibility, a function can also be passed
1047         """
1048
1049         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1050
1051         info_dict = self._copy_infodict(info_dict)
1052         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1053             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1054             if info_dict.get('duration', None) is not None
1055             else None)
1056         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1057         info_dict['video_autonumber'] = self._num_videos
1058         if info_dict.get('resolution') is None:
1059             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1060
1061         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1062         # of %(field)s to %(field)0Nd for backward compatibility
1063         field_size_compat_map = {
1064             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1065             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1066             'autonumber': self.params.get('autonumber_size') or 5,
1067         }
1068
1069         TMPL_DICT = {}
1070         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1071         MATH_FUNCTIONS = {
1072             '+': float.__add__,
1073             '-': float.__sub__,
1074         }
1075         # Field is of the form key1.key2...
1076         # where keys (except first) can be string, int or slice
1077         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1078         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1079         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1080         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1081             (?P<negate>-)?
1082             (?P<fields>{field})
1083             (?P<maths>(?:{math_op}{math_field})*)
1084             (?:>(?P<strf_format>.+?))?
1085             (?P<alternate>(?<!\\),[^|&)]+)?
1086             (?:&(?P<replacement>.*?))?
1087             (?:\|(?P<default>.*?))?
1088             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1089
1090         def _traverse_infodict(k):
1091             k = k.split('.')
1092             if k[0] == '':
1093                 k.pop(0)
1094             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1095
1096         def get_value(mdict):
1097             # Object traversal
1098             value = _traverse_infodict(mdict['fields'])
1099             # Negative
1100             if mdict['negate']:
1101                 value = float_or_none(value)
1102                 if value is not None:
1103                     value *= -1
1104             # Do maths
1105             offset_key = mdict['maths']
1106             if offset_key:
1107                 value = float_or_none(value)
1108                 operator = None
1109                 while offset_key:
1110                     item = re.match(
1111                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1112                         offset_key).group(0)
1113                     offset_key = offset_key[len(item):]
1114                     if operator is None:
1115                         operator = MATH_FUNCTIONS[item]
1116                         continue
1117                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1118                     offset = float_or_none(item)
1119                     if offset is None:
1120                         offset = float_or_none(_traverse_infodict(item))
1121                     try:
1122                         value = operator(value, multiplier * offset)
1123                     except (TypeError, ZeroDivisionError):
1124                         return None
1125                     operator = None
1126             # Datetime formatting
1127             if mdict['strf_format']:
1128                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1129
1130             return value
1131
1132         na = self.params.get('outtmpl_na_placeholder', 'NA')
1133
1134         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1135             return sanitize_filename(str(value), restricted=restricted,
1136                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1137
1138         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1139         sanitize = bool(sanitize)
1140
1141         def _dumpjson_default(obj):
1142             if isinstance(obj, (set, LazyList)):
1143                 return list(obj)
1144             return repr(obj)
1145
1146         def create_key(outer_mobj):
1147             if not outer_mobj.group('has_key'):
1148                 return outer_mobj.group(0)
1149             key = outer_mobj.group('key')
1150             mobj = re.match(INTERNAL_FORMAT_RE, key)
1151             initial_field = mobj.group('fields') if mobj else ''
1152             value, replacement, default = None, None, na
1153             while mobj:
1154                 mobj = mobj.groupdict()
1155                 default = mobj['default'] if mobj['default'] is not None else default
1156                 value = get_value(mobj)
1157                 replacement = mobj['replacement']
1158                 if value is None and mobj['alternate']:
1159                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1160                 else:
1161                     break
1162
1163             fmt = outer_mobj.group('format')
1164             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1165                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1166
1167             value = default if value is None else value if replacement is None else replacement
1168
1169             flags = outer_mobj.group('conversion') or ''
1170             str_fmt = f'{fmt[:-1]}s'
1171             if fmt[-1] == 'l':  # list
1172                 delim = '\n' if '#' in flags else ', '
1173                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1174             elif fmt[-1] == 'j':  # json
1175                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1176             elif fmt[-1] == 'q':  # quoted
1177                 value = map(str, variadic(value) if '#' in flags else [value])
1178                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1179             elif fmt[-1] == 'B':  # bytes
1180                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1181                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1182             elif fmt[-1] == 'U':  # unicode normalized
1183                 value, fmt = unicodedata.normalize(
1184                     # "+" = compatibility equivalence, "#" = NFD
1185                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1186                     value), str_fmt
1187             elif fmt[-1] == 'D':  # decimal suffix
1188                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1189                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1190                                               factor=1024 if '#' in flags else 1000)
1191             elif fmt[-1] == 'S':  # filename sanitization
1192                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1193             elif fmt[-1] == 'c':
1194                 if value:
1195                     value = str(value)[0]
1196                 else:
1197                     fmt = str_fmt
1198             elif fmt[-1] not in 'rs':  # numeric
1199                 value = float_or_none(value)
1200                 if value is None:
1201                     value, fmt = default, 's'
1202
1203             if sanitize:
1204                 if fmt[-1] == 'r':
1205                     # If value is an object, sanitize might convert it to a string
1206                     # So we convert it to repr first
1207                     value, fmt = repr(value), str_fmt
1208                 if fmt[-1] in 'csr':
1209                     value = sanitizer(initial_field, value)
1210
1211             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1212             TMPL_DICT[key] = value
1213             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1214
1215         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1216
1217     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1218         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1219         return self.escape_outtmpl(outtmpl) % info_dict
1220
1221     def _prepare_filename(self, info_dict, tmpl_type='default'):
1222         try:
1223             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1224             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1225             if not filename:
1226                 return None
1227
1228             if tmpl_type in ('default', 'temp'):
1229                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1230                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1231                     filename = replace_extension(filename, ext, final_ext)
1232             else:
1233                 force_ext = OUTTMPL_TYPES[tmpl_type]
1234                 if force_ext:
1235                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1236
1237             # https://github.com/blackjack4494/youtube-dlc/issues/85
1238             trim_file_name = self.params.get('trim_file_name', False)
1239             if trim_file_name:
1240                 no_ext, *ext = filename.rsplit('.', 2)
1241                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1242
1243             return filename
1244         except ValueError as err:
1245             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1246             return None
1247
1248     def prepare_filename(self, info_dict, dir_type='', warn=False):
1249         """Generate the output filename."""
1250
1251         filename = self._prepare_filename(info_dict, dir_type or 'default')
1252         if not filename and dir_type not in ('', 'temp'):
1253             return ''
1254
1255         if warn:
1256             if not self.params.get('paths'):
1257                 pass
1258             elif filename == '-':
1259                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1260             elif os.path.isabs(filename):
1261                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1262         if filename == '-' or not filename:
1263             return filename
1264
1265         return self.get_output_path(dir_type, filename)
1266
1267     def _match_entry(self, info_dict, incomplete=False, silent=False):
1268         """ Returns None if the file should be downloaded """
1269
1270         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1271
1272         def check_filter():
1273             if 'title' in info_dict:
1274                 # This can happen when we're just evaluating the playlist
1275                 title = info_dict['title']
1276                 matchtitle = self.params.get('matchtitle', False)
1277                 if matchtitle:
1278                     if not re.search(matchtitle, title, re.IGNORECASE):
1279                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1280                 rejecttitle = self.params.get('rejecttitle', False)
1281                 if rejecttitle:
1282                     if re.search(rejecttitle, title, re.IGNORECASE):
1283                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1284             date = info_dict.get('upload_date')
1285             if date is not None:
1286                 dateRange = self.params.get('daterange', DateRange())
1287                 if date not in dateRange:
1288                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1289             view_count = info_dict.get('view_count')
1290             if view_count is not None:
1291                 min_views = self.params.get('min_views')
1292                 if min_views is not None and view_count < min_views:
1293                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1294                 max_views = self.params.get('max_views')
1295                 if max_views is not None and view_count > max_views:
1296                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1297             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1298                 return 'Skipping "%s" because it is age restricted' % video_title
1299
1300             match_filter = self.params.get('match_filter')
1301             if match_filter is not None:
1302                 try:
1303                     ret = match_filter(info_dict, incomplete=incomplete)
1304                 except TypeError:
1305                     # For backward compatibility
1306                     ret = None if incomplete else match_filter(info_dict)
1307                 if ret is not None:
1308                     return ret
1309             return None
1310
1311         if self.in_download_archive(info_dict):
1312             reason = '%s has already been recorded in the archive' % video_title
1313             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1314         else:
1315             reason = check_filter()
1316             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1317         if reason is not None:
1318             if not silent:
1319                 self.to_screen('[download] ' + reason)
1320             if self.params.get(break_opt, False):
1321                 raise break_err()
1322         return reason
1323
1324     @staticmethod
1325     def add_extra_info(info_dict, extra_info):
1326         '''Set the keys from extra_info in info dict if they are missing'''
1327         for key, value in extra_info.items():
1328             info_dict.setdefault(key, value)
1329
1330     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1331                      process=True, force_generic_extractor=False):
1332         """
1333         Return a list with a dictionary for each video extracted.
1334
1335         Arguments:
1336         url -- URL to extract
1337
1338         Keyword arguments:
1339         download -- whether to download videos during extraction
1340         ie_key -- extractor key hint
1341         extra_info -- dictionary containing the extra values to add to each result
1342         process -- whether to resolve all unresolved references (URLs, playlist items),
1343             must be True for download to work.
1344         force_generic_extractor -- force using the generic extractor
1345         """
1346
1347         if extra_info is None:
1348             extra_info = {}
1349
1350         if not ie_key and force_generic_extractor:
1351             ie_key = 'Generic'
1352
1353         if ie_key:
1354             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1355         else:
1356             ies = self._ies
1357
1358         for ie_key, ie in ies.items():
1359             if not ie.suitable(url):
1360                 continue
1361
1362             if not ie.working():
1363                 self.report_warning('The program functionality for this site has been marked as broken, '
1364                                     'and will probably not work.')
1365
1366             temp_id = ie.get_temp_id(url)
1367             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1368                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1369                 if self.params.get('break_on_existing', False):
1370                     raise ExistingVideoReached()
1371                 break
1372             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1373         else:
1374             self.report_error('no suitable InfoExtractor for URL %s' % url)
1375
1376     def __handle_extraction_exceptions(func):
1377         @functools.wraps(func)
1378         def wrapper(self, *args, **kwargs):
1379             while True:
1380                 try:
1381                     return func(self, *args, **kwargs)
1382                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1383                     raise
1384                 except ReExtractInfo as e:
1385                     if e.expected:
1386                         self.to_screen(f'{e}; Re-extracting data')
1387                     else:
1388                         self.to_stderr('\r')
1389                         self.report_warning(f'{e}; Re-extracting data')
1390                     continue
1391                 except GeoRestrictedError as e:
1392                     msg = e.msg
1393                     if e.countries:
1394                         msg += '\nThis video is available in %s.' % ', '.join(
1395                             map(ISO3166Utils.short2full, e.countries))
1396                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1397                     self.report_error(msg)
1398                 except ExtractorError as e:  # An error we somewhat expected
1399                     self.report_error(str(e), e.format_traceback())
1400                 except Exception as e:
1401                     if self.params.get('ignoreerrors'):
1402                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1403                     else:
1404                         raise
1405                 break
1406         return wrapper
1407
1408     def _wait_for_video(self, ie_result):
1409         if (not self.params.get('wait_for_video')
1410                 or ie_result.get('_type', 'video') != 'video'
1411                 or ie_result.get('formats') or ie_result.get('url')):
1412             return
1413
1414         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1415         last_msg = ''
1416
1417         def progress(msg):
1418             nonlocal last_msg
1419             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1420             last_msg = msg
1421
1422         min_wait, max_wait = self.params.get('wait_for_video')
1423         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1424         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1425             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1426             self.report_warning('Release time of video is not known')
1427         elif (diff or 0) <= 0:
1428             self.report_warning('Video should already be available according to extracted info')
1429         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1430         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1431
1432         wait_till = time.time() + diff
1433         try:
1434             while True:
1435                 diff = wait_till - time.time()
1436                 if diff <= 0:
1437                     progress('')
1438                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1439                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1440                 time.sleep(1)
1441         except KeyboardInterrupt:
1442             progress('')
1443             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1444         except BaseException as e:
1445             if not isinstance(e, ReExtractInfo):
1446                 self.to_screen('')
1447             raise
1448
1449     @__handle_extraction_exceptions
1450     def __extract_info(self, url, ie, download, extra_info, process):
1451         ie_result = ie.extract(url)
1452         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1453             return
1454         if isinstance(ie_result, list):
1455             # Backwards compatibility: old IE result format
1456             ie_result = {
1457                 '_type': 'compat_list',
1458                 'entries': ie_result,
1459             }
1460         if extra_info.get('original_url'):
1461             ie_result.setdefault('original_url', extra_info['original_url'])
1462         self.add_default_extra_info(ie_result, ie, url)
1463         if process:
1464             self._wait_for_video(ie_result)
1465             return self.process_ie_result(ie_result, download, extra_info)
1466         else:
1467             return ie_result
1468
1469     def add_default_extra_info(self, ie_result, ie, url):
1470         if url is not None:
1471             self.add_extra_info(ie_result, {
1472                 'webpage_url': url,
1473                 'original_url': url,
1474             })
1475         webpage_url = ie_result.get('webpage_url')
1476         if webpage_url:
1477             self.add_extra_info(ie_result, {
1478                 'webpage_url_basename': url_basename(webpage_url),
1479                 'webpage_url_domain': get_domain(webpage_url),
1480             })
1481         if ie is not None:
1482             self.add_extra_info(ie_result, {
1483                 'extractor': ie.IE_NAME,
1484                 'extractor_key': ie.ie_key(),
1485             })
1486
1487     def process_ie_result(self, ie_result, download=True, extra_info=None):
1488         """
1489         Take the result of the ie(may be modified) and resolve all unresolved
1490         references (URLs, playlist items).
1491
1492         It will also download the videos if 'download'.
1493         Returns the resolved ie_result.
1494         """
1495         if extra_info is None:
1496             extra_info = {}
1497         result_type = ie_result.get('_type', 'video')
1498
1499         if result_type in ('url', 'url_transparent'):
1500             ie_result['url'] = sanitize_url(ie_result['url'])
1501             if ie_result.get('original_url'):
1502                 extra_info.setdefault('original_url', ie_result['original_url'])
1503
1504             extract_flat = self.params.get('extract_flat', False)
1505             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1506                     or extract_flat is True):
1507                 info_copy = ie_result.copy()
1508                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1509                 if ie and not ie_result.get('id'):
1510                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1511                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1512                 self.add_extra_info(info_copy, extra_info)
1513                 info_copy, _ = self.pre_process(info_copy)
1514                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1515                 if self.params.get('force_write_download_archive', False):
1516                     self.record_download_archive(info_copy)
1517                 return ie_result
1518
1519         if result_type == 'video':
1520             self.add_extra_info(ie_result, extra_info)
1521             ie_result = self.process_video_result(ie_result, download=download)
1522             additional_urls = (ie_result or {}).get('additional_urls')
1523             if additional_urls:
1524                 # TODO: Improve MetadataParserPP to allow setting a list
1525                 if isinstance(additional_urls, compat_str):
1526                     additional_urls = [additional_urls]
1527                 self.to_screen(
1528                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1529                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1530                 ie_result['additional_entries'] = [
1531                     self.extract_info(
1532                         url, download, extra_info=extra_info,
1533                         force_generic_extractor=self.params.get('force_generic_extractor'))
1534                     for url in additional_urls
1535                 ]
1536             return ie_result
1537         elif result_type == 'url':
1538             # We have to add extra_info to the results because it may be
1539             # contained in a playlist
1540             return self.extract_info(
1541                 ie_result['url'], download,
1542                 ie_key=ie_result.get('ie_key'),
1543                 extra_info=extra_info)
1544         elif result_type == 'url_transparent':
1545             # Use the information from the embedding page
1546             info = self.extract_info(
1547                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1548                 extra_info=extra_info, download=False, process=False)
1549
1550             # extract_info may return None when ignoreerrors is enabled and
1551             # extraction failed with an error, don't crash and return early
1552             # in this case
1553             if not info:
1554                 return info
1555
1556             force_properties = dict(
1557                 (k, v) for k, v in ie_result.items() if v is not None)
1558             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1559                 if f in force_properties:
1560                     del force_properties[f]
1561             new_result = info.copy()
1562             new_result.update(force_properties)
1563
1564             # Extracted info may not be a video result (i.e.
1565             # info.get('_type', 'video') != video) but rather an url or
1566             # url_transparent. In such cases outer metadata (from ie_result)
1567             # should be propagated to inner one (info). For this to happen
1568             # _type of info should be overridden with url_transparent. This
1569             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1570             if new_result.get('_type') == 'url':
1571                 new_result['_type'] = 'url_transparent'
1572
1573             return self.process_ie_result(
1574                 new_result, download=download, extra_info=extra_info)
1575         elif result_type in ('playlist', 'multi_video'):
1576             # Protect from infinite recursion due to recursively nested playlists
1577             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1578             webpage_url = ie_result['webpage_url']
1579             if webpage_url in self._playlist_urls:
1580                 self.to_screen(
1581                     '[download] Skipping already downloaded playlist: %s'
1582                     % ie_result.get('title') or ie_result.get('id'))
1583                 return
1584
1585             self._playlist_level += 1
1586             self._playlist_urls.add(webpage_url)
1587             self._sanitize_thumbnails(ie_result)
1588             try:
1589                 return self.__process_playlist(ie_result, download)
1590             finally:
1591                 self._playlist_level -= 1
1592                 if not self._playlist_level:
1593                     self._playlist_urls.clear()
1594         elif result_type == 'compat_list':
1595             self.report_warning(
1596                 'Extractor %s returned a compat_list result. '
1597                 'It needs to be updated.' % ie_result.get('extractor'))
1598
1599             def _fixup(r):
1600                 self.add_extra_info(r, {
1601                     'extractor': ie_result['extractor'],
1602                     'webpage_url': ie_result['webpage_url'],
1603                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1604                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1605                     'extractor_key': ie_result['extractor_key'],
1606                 })
1607                 return r
1608             ie_result['entries'] = [
1609                 self.process_ie_result(_fixup(r), download, extra_info)
1610                 for r in ie_result['entries']
1611             ]
1612             return ie_result
1613         else:
1614             raise Exception('Invalid result type: %s' % result_type)
1615
1616     def _ensure_dir_exists(self, path):
1617         return make_dir(path, self.report_error)
1618
1619     @staticmethod
1620     def _playlist_infodict(ie_result, **kwargs):
1621         return {
1622             **ie_result,
1623             'playlist': ie_result.get('title') or ie_result.get('id'),
1624             'playlist_id': ie_result.get('id'),
1625             'playlist_title': ie_result.get('title'),
1626             'playlist_uploader': ie_result.get('uploader'),
1627             'playlist_uploader_id': ie_result.get('uploader_id'),
1628             'playlist_index': 0,
1629             **kwargs,
1630         }
1631
1632     def __process_playlist(self, ie_result, download):
1633         # We process each entry in the playlist
1634         playlist = ie_result.get('title') or ie_result.get('id')
1635         self.to_screen('[download] Downloading playlist: %s' % playlist)
1636
1637         if 'entries' not in ie_result:
1638             raise EntryNotInPlaylist('There are no entries')
1639
1640         MissingEntry = object()
1641         incomplete_entries = bool(ie_result.get('requested_entries'))
1642         if incomplete_entries:
1643             def fill_missing_entries(entries, indices):
1644                 ret = [MissingEntry] * max(indices)
1645                 for i, entry in zip(indices, entries):
1646                     ret[i - 1] = entry
1647                 return ret
1648             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1649
1650         playlist_results = []
1651
1652         playliststart = self.params.get('playliststart', 1)
1653         playlistend = self.params.get('playlistend')
1654         # For backwards compatibility, interpret -1 as whole list
1655         if playlistend == -1:
1656             playlistend = None
1657
1658         playlistitems_str = self.params.get('playlist_items')
1659         playlistitems = None
1660         if playlistitems_str is not None:
1661             def iter_playlistitems(format):
1662                 for string_segment in format.split(','):
1663                     if '-' in string_segment:
1664                         start, end = string_segment.split('-')
1665                         for item in range(int(start), int(end) + 1):
1666                             yield int(item)
1667                     else:
1668                         yield int(string_segment)
1669             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1670
1671         ie_entries = ie_result['entries']
1672         if isinstance(ie_entries, list):
1673             playlist_count = len(ie_entries)
1674             msg = f'Collected {playlist_count} videos; downloading %d of them'
1675             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1676
1677             def get_entry(i):
1678                 return ie_entries[i - 1]
1679         else:
1680             msg = 'Downloading %d videos'
1681             if not isinstance(ie_entries, (PagedList, LazyList)):
1682                 ie_entries = LazyList(ie_entries)
1683             elif isinstance(ie_entries, InAdvancePagedList):
1684                 if ie_entries._pagesize == 1:
1685                     playlist_count = ie_entries._pagecount
1686
1687             def get_entry(i):
1688                 return YoutubeDL.__handle_extraction_exceptions(
1689                     lambda self, i: ie_entries[i - 1]
1690                 )(self, i)
1691
1692         entries, broken = [], False
1693         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1694         for i in items:
1695             if i == 0:
1696                 continue
1697             if playlistitems is None and playlistend is not None and playlistend < i:
1698                 break
1699             entry = None
1700             try:
1701                 entry = get_entry(i)
1702                 if entry is MissingEntry:
1703                     raise EntryNotInPlaylist()
1704             except (IndexError, EntryNotInPlaylist):
1705                 if incomplete_entries:
1706                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1707                 elif not playlistitems:
1708                     break
1709             entries.append(entry)
1710             try:
1711                 if entry is not None:
1712                     self._match_entry(entry, incomplete=True, silent=True)
1713             except (ExistingVideoReached, RejectedVideoReached):
1714                 broken = True
1715                 break
1716         ie_result['entries'] = entries
1717
1718         # Save playlist_index before re-ordering
1719         entries = [
1720             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1721             for i, entry in enumerate(entries, 1)
1722             if entry is not None]
1723         n_entries = len(entries)
1724
1725         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1726             ie_result['playlist_count'] = n_entries
1727
1728         if not playlistitems and (playliststart != 1 or playlistend):
1729             playlistitems = list(range(playliststart, playliststart + n_entries))
1730         ie_result['requested_entries'] = playlistitems
1731
1732         _infojson_written = False
1733         write_playlist_files = self.params.get('allow_playlist_files', True)
1734         if write_playlist_files and self.params.get('list_thumbnails'):
1735             self.list_thumbnails(ie_result)
1736         if write_playlist_files and not self.params.get('simulate'):
1737             ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1738             _infojson_written = self._write_info_json(
1739                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1740             if _infojson_written is None:
1741                 return
1742             if self._write_description('playlist', ie_result,
1743                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1744                 return
1745             # TODO: This should be passed to ThumbnailsConvertor if necessary
1746             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1747
1748         if self.params.get('playlistreverse', False):
1749             entries = entries[::-1]
1750         if self.params.get('playlistrandom', False):
1751             random.shuffle(entries)
1752
1753         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1754
1755         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1756         failures = 0
1757         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1758         for i, entry_tuple in enumerate(entries, 1):
1759             playlist_index, entry = entry_tuple
1760             if 'playlist-index' in self.params.get('compat_opts', []):
1761                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1762             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1763             # This __x_forwarded_for_ip thing is a bit ugly but requires
1764             # minimal changes
1765             if x_forwarded_for:
1766                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1767             extra = {
1768                 'n_entries': n_entries,
1769                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1770                 'playlist_count': ie_result.get('playlist_count'),
1771                 'playlist_index': playlist_index,
1772                 'playlist_autonumber': i,
1773                 'playlist': playlist,
1774                 'playlist_id': ie_result.get('id'),
1775                 'playlist_title': ie_result.get('title'),
1776                 'playlist_uploader': ie_result.get('uploader'),
1777                 'playlist_uploader_id': ie_result.get('uploader_id'),
1778                 'extractor': ie_result['extractor'],
1779                 'webpage_url': ie_result['webpage_url'],
1780                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1781                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1782                 'extractor_key': ie_result['extractor_key'],
1783             }
1784
1785             if self._match_entry(entry, incomplete=True) is not None:
1786                 continue
1787
1788             entry_result = self.__process_iterable_entry(entry, download, extra)
1789             if not entry_result:
1790                 failures += 1
1791             if failures >= max_failures:
1792                 self.report_error(
1793                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1794                 break
1795             playlist_results.append(entry_result)
1796         ie_result['entries'] = playlist_results
1797
1798         # Write the updated info to json
1799         if _infojson_written and self._write_info_json(
1800                 'updated playlist', ie_result,
1801                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1802             return
1803
1804         ie_result = self.run_all_pps('playlist', ie_result)
1805         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1806         return ie_result
1807
1808     @__handle_extraction_exceptions
1809     def __process_iterable_entry(self, entry, download, extra_info):
1810         return self.process_ie_result(
1811             entry, download=download, extra_info=extra_info)
1812
1813     def _build_format_filter(self, filter_spec):
1814         " Returns a function to filter the formats according to the filter_spec "
1815
1816         OPERATORS = {
1817             '<': operator.lt,
1818             '<=': operator.le,
1819             '>': operator.gt,
1820             '>=': operator.ge,
1821             '=': operator.eq,
1822             '!=': operator.ne,
1823         }
1824         operator_rex = re.compile(r'''(?x)\s*
1825             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1826             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1827             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1828             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1829         m = operator_rex.fullmatch(filter_spec)
1830         if m:
1831             try:
1832                 comparison_value = int(m.group('value'))
1833             except ValueError:
1834                 comparison_value = parse_filesize(m.group('value'))
1835                 if comparison_value is None:
1836                     comparison_value = parse_filesize(m.group('value') + 'B')
1837                 if comparison_value is None:
1838                     raise ValueError(
1839                         'Invalid value %r in format specification %r' % (
1840                             m.group('value'), filter_spec))
1841             op = OPERATORS[m.group('op')]
1842
1843         if not m:
1844             STR_OPERATORS = {
1845                 '=': operator.eq,
1846                 '^=': lambda attr, value: attr.startswith(value),
1847                 '$=': lambda attr, value: attr.endswith(value),
1848                 '*=': lambda attr, value: value in attr,
1849                 '~=': lambda attr, value: value.search(attr) is not None
1850             }
1851             str_operator_rex = re.compile(r'''(?x)\s*
1852                 (?P<key>[a-zA-Z0-9._-]+)\s*
1853                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1854                 (?P<quote>["'])?
1855                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1856                 (?(quote)(?P=quote))\s*
1857                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1858             m = str_operator_rex.fullmatch(filter_spec)
1859             if m:
1860                 if m.group('op') == '~=':
1861                     comparison_value = re.compile(m.group('value'))
1862                 else:
1863                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1864                 str_op = STR_OPERATORS[m.group('op')]
1865                 if m.group('negation'):
1866                     op = lambda attr, value: not str_op(attr, value)
1867                 else:
1868                     op = str_op
1869
1870         if not m:
1871             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1872
1873         def _filter(f):
1874             actual_value = f.get(m.group('key'))
1875             if actual_value is None:
1876                 return m.group('none_inclusive')
1877             return op(actual_value, comparison_value)
1878         return _filter
1879
1880     def _check_formats(self, formats):
1881         for f in formats:
1882             self.to_screen('[info] Testing format %s' % f['format_id'])
1883             path = self.get_output_path('temp')
1884             if not self._ensure_dir_exists(f'{path}/'):
1885                 continue
1886             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1887             temp_file.close()
1888             try:
1889                 success, _ = self.dl(temp_file.name, f, test=True)
1890             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1891                 success = False
1892             finally:
1893                 if os.path.exists(temp_file.name):
1894                     try:
1895                         os.remove(temp_file.name)
1896                     except OSError:
1897                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1898             if success:
1899                 yield f
1900             else:
1901                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1902
1903     def _default_format_spec(self, info_dict, download=True):
1904
1905         def can_merge():
1906             merger = FFmpegMergerPP(self)
1907             return merger.available and merger.can_merge()
1908
1909         prefer_best = (
1910             not self.params.get('simulate')
1911             and download
1912             and (
1913                 not can_merge()
1914                 or info_dict.get('is_live', False)
1915                 or self.outtmpl_dict['default'] == '-'))
1916         compat = (
1917             prefer_best
1918             or self.params.get('allow_multiple_audio_streams', False)
1919             or 'format-spec' in self.params.get('compat_opts', []))
1920
1921         return (
1922             'best/bestvideo+bestaudio' if prefer_best
1923             else 'bestvideo*+bestaudio/best' if not compat
1924             else 'bestvideo+bestaudio/best')
1925
1926     def build_format_selector(self, format_spec):
1927         def syntax_error(note, start):
1928             message = (
1929                 'Invalid format specification: '
1930                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1931             return SyntaxError(message)
1932
1933         PICKFIRST = 'PICKFIRST'
1934         MERGE = 'MERGE'
1935         SINGLE = 'SINGLE'
1936         GROUP = 'GROUP'
1937         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1938
1939         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1940                                   'video': self.params.get('allow_multiple_video_streams', False)}
1941
1942         check_formats = self.params.get('check_formats') == 'selected'
1943
1944         def _parse_filter(tokens):
1945             filter_parts = []
1946             for type, string, start, _, _ in tokens:
1947                 if type == tokenize.OP and string == ']':
1948                     return ''.join(filter_parts)
1949                 else:
1950                     filter_parts.append(string)
1951
1952         def _remove_unused_ops(tokens):
1953             # Remove operators that we don't use and join them with the surrounding strings
1954             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1955             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1956             last_string, last_start, last_end, last_line = None, None, None, None
1957             for type, string, start, end, line in tokens:
1958                 if type == tokenize.OP and string == '[':
1959                     if last_string:
1960                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1961                         last_string = None
1962                     yield type, string, start, end, line
1963                     # everything inside brackets will be handled by _parse_filter
1964                     for type, string, start, end, line in tokens:
1965                         yield type, string, start, end, line
1966                         if type == tokenize.OP and string == ']':
1967                             break
1968                 elif type == tokenize.OP and string in ALLOWED_OPS:
1969                     if last_string:
1970                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1971                         last_string = None
1972                     yield type, string, start, end, line
1973                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1974                     if not last_string:
1975                         last_string = string
1976                         last_start = start
1977                         last_end = end
1978                     else:
1979                         last_string += string
1980             if last_string:
1981                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1982
1983         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1984             selectors = []
1985             current_selector = None
1986             for type, string, start, _, _ in tokens:
1987                 # ENCODING is only defined in python 3.x
1988                 if type == getattr(tokenize, 'ENCODING', None):
1989                     continue
1990                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1991                     current_selector = FormatSelector(SINGLE, string, [])
1992                 elif type == tokenize.OP:
1993                     if string == ')':
1994                         if not inside_group:
1995                             # ')' will be handled by the parentheses group
1996                             tokens.restore_last_token()
1997                         break
1998                     elif inside_merge and string in ['/', ',']:
1999                         tokens.restore_last_token()
2000                         break
2001                     elif inside_choice and string == ',':
2002                         tokens.restore_last_token()
2003                         break
2004                     elif string == ',':
2005                         if not current_selector:
2006                             raise syntax_error('"," must follow a format selector', start)
2007                         selectors.append(current_selector)
2008                         current_selector = None
2009                     elif string == '/':
2010                         if not current_selector:
2011                             raise syntax_error('"/" must follow a format selector', start)
2012                         first_choice = current_selector
2013                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2014                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2015                     elif string == '[':
2016                         if not current_selector:
2017                             current_selector = FormatSelector(SINGLE, 'best', [])
2018                         format_filter = _parse_filter(tokens)
2019                         current_selector.filters.append(format_filter)
2020                     elif string == '(':
2021                         if current_selector:
2022                             raise syntax_error('Unexpected "("', start)
2023                         group = _parse_format_selection(tokens, inside_group=True)
2024                         current_selector = FormatSelector(GROUP, group, [])
2025                     elif string == '+':
2026                         if not current_selector:
2027                             raise syntax_error('Unexpected "+"', start)
2028                         selector_1 = current_selector
2029                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2030                         if not selector_2:
2031                             raise syntax_error('Expected a selector', start)
2032                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2033                     else:
2034                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2035                 elif type == tokenize.ENDMARKER:
2036                     break
2037             if current_selector:
2038                 selectors.append(current_selector)
2039             return selectors
2040
2041         def _merge(formats_pair):
2042             format_1, format_2 = formats_pair
2043
2044             formats_info = []
2045             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2046             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2047
2048             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2049                 get_no_more = {'video': False, 'audio': False}
2050                 for (i, fmt_info) in enumerate(formats_info):
2051                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2052                         formats_info.pop(i)
2053                         continue
2054                     for aud_vid in ['audio', 'video']:
2055                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2056                             if get_no_more[aud_vid]:
2057                                 formats_info.pop(i)
2058                                 break
2059                             get_no_more[aud_vid] = True
2060
2061             if len(formats_info) == 1:
2062                 return formats_info[0]
2063
2064             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2065             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2066
2067             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2068             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2069
2070             output_ext = self.params.get('merge_output_format')
2071             if not output_ext:
2072                 if the_only_video:
2073                     output_ext = the_only_video['ext']
2074                 elif the_only_audio and not video_fmts:
2075                     output_ext = the_only_audio['ext']
2076                 else:
2077                     output_ext = 'mkv'
2078
2079             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2080
2081             new_dict = {
2082                 'requested_formats': formats_info,
2083                 'format': '+'.join(filtered('format')),
2084                 'format_id': '+'.join(filtered('format_id')),
2085                 'ext': output_ext,
2086                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2087                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2088                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2089                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2090                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2091             }
2092
2093             if the_only_video:
2094                 new_dict.update({
2095                     'width': the_only_video.get('width'),
2096                     'height': the_only_video.get('height'),
2097                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2098                     'fps': the_only_video.get('fps'),
2099                     'dynamic_range': the_only_video.get('dynamic_range'),
2100                     'vcodec': the_only_video.get('vcodec'),
2101                     'vbr': the_only_video.get('vbr'),
2102                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2103                 })
2104
2105             if the_only_audio:
2106                 new_dict.update({
2107                     'acodec': the_only_audio.get('acodec'),
2108                     'abr': the_only_audio.get('abr'),
2109                     'asr': the_only_audio.get('asr'),
2110                 })
2111
2112             return new_dict
2113
2114         def _check_formats(formats):
2115             if not check_formats:
2116                 yield from formats
2117                 return
2118             yield from self._check_formats(formats)
2119
2120         def _build_selector_function(selector):
2121             if isinstance(selector, list):  # ,
2122                 fs = [_build_selector_function(s) for s in selector]
2123
2124                 def selector_function(ctx):
2125                     for f in fs:
2126                         yield from f(ctx)
2127                 return selector_function
2128
2129             elif selector.type == GROUP:  # ()
2130                 selector_function = _build_selector_function(selector.selector)
2131
2132             elif selector.type == PICKFIRST:  # /
2133                 fs = [_build_selector_function(s) for s in selector.selector]
2134
2135                 def selector_function(ctx):
2136                     for f in fs:
2137                         picked_formats = list(f(ctx))
2138                         if picked_formats:
2139                             return picked_formats
2140                     return []
2141
2142             elif selector.type == MERGE:  # +
2143                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2144
2145                 def selector_function(ctx):
2146                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2147                         yield _merge(pair)
2148
2149             elif selector.type == SINGLE:  # atom
2150                 format_spec = selector.selector or 'best'
2151
2152                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2153                 if format_spec == 'all':
2154                     def selector_function(ctx):
2155                         yield from _check_formats(ctx['formats'][::-1])
2156                 elif format_spec == 'mergeall':
2157                     def selector_function(ctx):
2158                         formats = list(_check_formats(ctx['formats']))
2159                         if not formats:
2160                             return
2161                         merged_format = formats[-1]
2162                         for f in formats[-2::-1]:
2163                             merged_format = _merge((merged_format, f))
2164                         yield merged_format
2165
2166                 else:
2167                     format_fallback, format_reverse, format_idx = False, True, 1
2168                     mobj = re.match(
2169                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2170                         format_spec)
2171                     if mobj is not None:
2172                         format_idx = int_or_none(mobj.group('n'), default=1)
2173                         format_reverse = mobj.group('bw')[0] == 'b'
2174                         format_type = (mobj.group('type') or [None])[0]
2175                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2176                         format_modified = mobj.group('mod') is not None
2177
2178                         format_fallback = not format_type and not format_modified  # for b, w
2179                         _filter_f = (
2180                             (lambda f: f.get('%scodec' % format_type) != 'none')
2181                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2182                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2183                             if format_type  # bv, ba, wv, wa
2184                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2185                             if not format_modified  # b, w
2186                             else lambda f: True)  # b*, w*
2187                         filter_f = lambda f: _filter_f(f) and (
2188                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2189                     else:
2190                         if format_spec in self._format_selection_exts['audio']:
2191                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2192                         elif format_spec in self._format_selection_exts['video']:
2193                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2194                         elif format_spec in self._format_selection_exts['storyboards']:
2195                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2196                         else:
2197                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2198
2199                     def selector_function(ctx):
2200                         formats = list(ctx['formats'])
2201                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2202                         if format_fallback and ctx['incomplete_formats'] and not matches:
2203                             # for extractors with incomplete formats (audio only (soundcloud)
2204                             # or video only (imgur)) best/worst will fallback to
2205                             # best/worst {video,audio}-only format
2206                             matches = formats
2207                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2208                         try:
2209                             yield matches[format_idx - 1]
2210                         except IndexError:
2211                             return
2212
2213             filters = [self._build_format_filter(f) for f in selector.filters]
2214
2215             def final_selector(ctx):
2216                 ctx_copy = dict(ctx)
2217                 for _filter in filters:
2218                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2219                 return selector_function(ctx_copy)
2220             return final_selector
2221
2222         stream = io.BytesIO(format_spec.encode('utf-8'))
2223         try:
2224             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2225         except tokenize.TokenError:
2226             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2227
2228         class TokenIterator(object):
2229             def __init__(self, tokens):
2230                 self.tokens = tokens
2231                 self.counter = 0
2232
2233             def __iter__(self):
2234                 return self
2235
2236             def __next__(self):
2237                 if self.counter >= len(self.tokens):
2238                     raise StopIteration()
2239                 value = self.tokens[self.counter]
2240                 self.counter += 1
2241                 return value
2242
2243             next = __next__
2244
2245             def restore_last_token(self):
2246                 self.counter -= 1
2247
2248         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2249         return _build_selector_function(parsed_selector)
2250
2251     def _calc_headers(self, info_dict):
2252         res = std_headers.copy()
2253         res.update(info_dict.get('http_headers') or {})
2254
2255         cookies = self._calc_cookies(info_dict)
2256         if cookies:
2257             res['Cookie'] = cookies
2258
2259         if 'X-Forwarded-For' not in res:
2260             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2261             if x_forwarded_for_ip:
2262                 res['X-Forwarded-For'] = x_forwarded_for_ip
2263
2264         return res
2265
2266     def _calc_cookies(self, info_dict):
2267         pr = sanitized_Request(info_dict['url'])
2268         self.cookiejar.add_cookie_header(pr)
2269         return pr.get_header('Cookie')
2270
2271     def _sort_thumbnails(self, thumbnails):
2272         thumbnails.sort(key=lambda t: (
2273             t.get('preference') if t.get('preference') is not None else -1,
2274             t.get('width') if t.get('width') is not None else -1,
2275             t.get('height') if t.get('height') is not None else -1,
2276             t.get('id') if t.get('id') is not None else '',
2277             t.get('url')))
2278
2279     def _sanitize_thumbnails(self, info_dict):
2280         thumbnails = info_dict.get('thumbnails')
2281         if thumbnails is None:
2282             thumbnail = info_dict.get('thumbnail')
2283             if thumbnail:
2284                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2285         if not thumbnails:
2286             return
2287
2288         def check_thumbnails(thumbnails):
2289             for t in thumbnails:
2290                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2291                 try:
2292                     self.urlopen(HEADRequest(t['url']))
2293                 except network_exceptions as err:
2294                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2295                     continue
2296                 yield t
2297
2298         self._sort_thumbnails(thumbnails)
2299         for i, t in enumerate(thumbnails):
2300             if t.get('id') is None:
2301                 t['id'] = '%d' % i
2302             if t.get('width') and t.get('height'):
2303                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2304             t['url'] = sanitize_url(t['url'])
2305
2306         if self.params.get('check_formats') is True:
2307             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2308         else:
2309             info_dict['thumbnails'] = thumbnails
2310
2311     def process_video_result(self, info_dict, download=True):
2312         assert info_dict.get('_type', 'video') == 'video'
2313         self._num_videos += 1
2314
2315         if 'id' not in info_dict:
2316             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2317         elif not info_dict.get('id'):
2318             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2319
2320         info_dict['fulltitle'] = info_dict.get('title')
2321         if 'title' not in info_dict:
2322             raise ExtractorError('Missing "title" field in extractor result',
2323                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2324         elif not info_dict.get('title'):
2325             self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2326             info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2327
2328         def report_force_conversion(field, field_not, conversion):
2329             self.report_warning(
2330                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2331                 % (field, field_not, conversion))
2332
2333         def sanitize_string_field(info, string_field):
2334             field = info.get(string_field)
2335             if field is None or isinstance(field, compat_str):
2336                 return
2337             report_force_conversion(string_field, 'a string', 'string')
2338             info[string_field] = compat_str(field)
2339
2340         def sanitize_numeric_fields(info):
2341             for numeric_field in self._NUMERIC_FIELDS:
2342                 field = info.get(numeric_field)
2343                 if field is None or isinstance(field, compat_numeric_types):
2344                     continue
2345                 report_force_conversion(numeric_field, 'numeric', 'int')
2346                 info[numeric_field] = int_or_none(field)
2347
2348         sanitize_string_field(info_dict, 'id')
2349         sanitize_numeric_fields(info_dict)
2350
2351         if 'playlist' not in info_dict:
2352             # It isn't part of a playlist
2353             info_dict['playlist'] = None
2354             info_dict['playlist_index'] = None
2355
2356         self._sanitize_thumbnails(info_dict)
2357
2358         thumbnail = info_dict.get('thumbnail')
2359         thumbnails = info_dict.get('thumbnails')
2360         if thumbnail:
2361             info_dict['thumbnail'] = sanitize_url(thumbnail)
2362         elif thumbnails:
2363             info_dict['thumbnail'] = thumbnails[-1]['url']
2364
2365         if info_dict.get('display_id') is None and 'id' in info_dict:
2366             info_dict['display_id'] = info_dict['id']
2367
2368         if info_dict.get('duration') is not None:
2369             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2370
2371         for ts_key, date_key in (
2372                 ('timestamp', 'upload_date'),
2373                 ('release_timestamp', 'release_date'),
2374                 ('modified_timestamp', 'modified_date'),
2375         ):
2376             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2377                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2378                 # see http://bugs.python.org/issue1646728)
2379                 try:
2380                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2381                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2382                 except (ValueError, OverflowError, OSError):
2383                     pass
2384
2385         live_keys = ('is_live', 'was_live')
2386         live_status = info_dict.get('live_status')
2387         if live_status is None:
2388             for key in live_keys:
2389                 if info_dict.get(key) is False:
2390                     continue
2391                 if info_dict.get(key):
2392                     live_status = key
2393                 break
2394             if all(info_dict.get(key) is False for key in live_keys):
2395                 live_status = 'not_live'
2396         if live_status:
2397             info_dict['live_status'] = live_status
2398             for key in live_keys:
2399                 if info_dict.get(key) is None:
2400                     info_dict[key] = (live_status == key)
2401
2402         # Auto generate title fields corresponding to the *_number fields when missing
2403         # in order to always have clean titles. This is very common for TV series.
2404         for field in ('chapter', 'season', 'episode'):
2405             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2406                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2407
2408         for cc_kind in ('subtitles', 'automatic_captions'):
2409             cc = info_dict.get(cc_kind)
2410             if cc:
2411                 for _, subtitle in cc.items():
2412                     for subtitle_format in subtitle:
2413                         if subtitle_format.get('url'):
2414                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2415                         if subtitle_format.get('ext') is None:
2416                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2417
2418         automatic_captions = info_dict.get('automatic_captions')
2419         subtitles = info_dict.get('subtitles')
2420
2421         info_dict['requested_subtitles'] = self.process_subtitles(
2422             info_dict['id'], subtitles, automatic_captions)
2423
2424         if info_dict.get('formats') is None:
2425             # There's only one format available
2426             formats = [info_dict]
2427         else:
2428             formats = info_dict['formats']
2429
2430         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2431         if not self.params.get('allow_unplayable_formats'):
2432             formats = [f for f in formats if not f.get('has_drm')]
2433
2434         if info_dict.get('is_live'):
2435             get_from_start = bool(self.params.get('live_from_start'))
2436             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2437             if not get_from_start:
2438                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2439
2440         if not formats:
2441             self.raise_no_formats(info_dict)
2442
2443         def is_wellformed(f):
2444             url = f.get('url')
2445             if not url:
2446                 self.report_warning(
2447                     '"url" field is missing or empty - skipping format, '
2448                     'there is an error in extractor')
2449                 return False
2450             if isinstance(url, bytes):
2451                 sanitize_string_field(f, 'url')
2452             return True
2453
2454         # Filter out malformed formats for better extraction robustness
2455         formats = list(filter(is_wellformed, formats))
2456
2457         formats_dict = {}
2458
2459         # We check that all the formats have the format and format_id fields
2460         for i, format in enumerate(formats):
2461             sanitize_string_field(format, 'format_id')
2462             sanitize_numeric_fields(format)
2463             format['url'] = sanitize_url(format['url'])
2464             if not format.get('format_id'):
2465                 format['format_id'] = compat_str(i)
2466             else:
2467                 # Sanitize format_id from characters used in format selector expression
2468                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2469             format_id = format['format_id']
2470             if format_id not in formats_dict:
2471                 formats_dict[format_id] = []
2472             formats_dict[format_id].append(format)
2473
2474         # Make sure all formats have unique format_id
2475         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2476         for format_id, ambiguous_formats in formats_dict.items():
2477             ambigious_id = len(ambiguous_formats) > 1
2478             for i, format in enumerate(ambiguous_formats):
2479                 if ambigious_id:
2480                     format['format_id'] = '%s-%d' % (format_id, i)
2481                 if format.get('ext') is None:
2482                     format['ext'] = determine_ext(format['url']).lower()
2483                 # Ensure there is no conflict between id and ext in format selection
2484                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2485                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2486                     format['format_id'] = 'f%s' % format['format_id']
2487
2488         for i, format in enumerate(formats):
2489             if format.get('format') is None:
2490                 format['format'] = '{id} - {res}{note}'.format(
2491                     id=format['format_id'],
2492                     res=self.format_resolution(format),
2493                     note=format_field(format, 'format_note', ' (%s)'),
2494                 )
2495             if format.get('protocol') is None:
2496                 format['protocol'] = determine_protocol(format)
2497             if format.get('resolution') is None:
2498                 format['resolution'] = self.format_resolution(format, default=None)
2499             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2500                 format['dynamic_range'] = 'SDR'
2501             if (info_dict.get('duration') and format.get('tbr')
2502                     and not format.get('filesize') and not format.get('filesize_approx')):
2503                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2504
2505             # Add HTTP headers, so that external programs can use them from the
2506             # json output
2507             full_format_info = info_dict.copy()
2508             full_format_info.update(format)
2509             format['http_headers'] = self._calc_headers(full_format_info)
2510         # Remove private housekeeping stuff
2511         if '__x_forwarded_for_ip' in info_dict:
2512             del info_dict['__x_forwarded_for_ip']
2513
2514         # TODO Central sorting goes here
2515
2516         if self.params.get('check_formats') is True:
2517             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2518
2519         if not formats or formats[0] is not info_dict:
2520             # only set the 'formats' fields if the original info_dict list them
2521             # otherwise we end up with a circular reference, the first (and unique)
2522             # element in the 'formats' field in info_dict is info_dict itself,
2523             # which can't be exported to json
2524             info_dict['formats'] = formats
2525
2526         info_dict, _ = self.pre_process(info_dict)
2527
2528         # The pre-processors may have modified the formats
2529         formats = info_dict.get('formats', [info_dict])
2530
2531         list_only = self.params.get('simulate') is None and (
2532             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2533         interactive_format_selection = not list_only and self.format_selector == '-'
2534         if self.params.get('list_thumbnails'):
2535             self.list_thumbnails(info_dict)
2536         if self.params.get('listsubtitles'):
2537             if 'automatic_captions' in info_dict:
2538                 self.list_subtitles(
2539                     info_dict['id'], automatic_captions, 'automatic captions')
2540             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2541         if self.params.get('listformats') or interactive_format_selection:
2542             self.list_formats(info_dict)
2543         if list_only:
2544             # Without this printing, -F --print-json will not work
2545             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2546             return
2547
2548         format_selector = self.format_selector
2549         if format_selector is None:
2550             req_format = self._default_format_spec(info_dict, download=download)
2551             self.write_debug('Default format spec: %s' % req_format)
2552             format_selector = self.build_format_selector(req_format)
2553
2554         while True:
2555             if interactive_format_selection:
2556                 req_format = input(
2557                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2558                 try:
2559                     format_selector = self.build_format_selector(req_format)
2560                 except SyntaxError as err:
2561                     self.report_error(err, tb=False, is_error=False)
2562                     continue
2563
2564             # While in format selection we may need to have an access to the original
2565             # format set in order to calculate some metrics or do some processing.
2566             # For now we need to be able to guess whether original formats provided
2567             # by extractor are incomplete or not (i.e. whether extractor provides only
2568             # video-only or audio-only formats) for proper formats selection for
2569             # extractors with such incomplete formats (see
2570             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2571             # Since formats may be filtered during format selection and may not match
2572             # the original formats the results may be incorrect. Thus original formats
2573             # or pre-calculated metrics should be passed to format selection routines
2574             # as well.
2575             # We will pass a context object containing all necessary additional data
2576             # instead of just formats.
2577             # This fixes incorrect format selection issue (see
2578             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2579             incomplete_formats = (
2580                 # All formats are video-only or
2581                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2582                 # all formats are audio-only
2583                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2584
2585             ctx = {
2586                 'formats': formats,
2587                 'incomplete_formats': incomplete_formats,
2588             }
2589
2590             formats_to_download = list(format_selector(ctx))
2591             if interactive_format_selection and not formats_to_download:
2592                 self.report_error('Requested format is not available', tb=False, is_error=False)
2593                 continue
2594             break
2595
2596         if not formats_to_download:
2597             if not self.params.get('ignore_no_formats_error'):
2598                 raise ExtractorError('Requested format is not available', expected=True,
2599                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2600             self.report_warning('Requested format is not available')
2601             # Process what we can, even without any available formats.
2602             formats_to_download = [{}]
2603
2604         best_format = formats_to_download[-1]
2605         if download:
2606             if best_format:
2607                 self.to_screen(
2608                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2609                     + ', '.join([f['format_id'] for f in formats_to_download]))
2610             max_downloads_reached = False
2611             for i, fmt in enumerate(formats_to_download):
2612                 formats_to_download[i] = new_info = dict(info_dict)
2613                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2614                 new_info.update(fmt)
2615                 new_info['__original_infodict'] = info_dict
2616                 try:
2617                     self.process_info(new_info)
2618                 except MaxDownloadsReached:
2619                     max_downloads_reached = True
2620                 new_info.pop('__original_infodict')
2621                 # Remove copied info
2622                 for key, val in tuple(new_info.items()):
2623                     if info_dict.get(key) == val:
2624                         new_info.pop(key)
2625                 if max_downloads_reached:
2626                     break
2627
2628             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2629             assert write_archive.issubset({True, False, 'ignore'})
2630             if True in write_archive and False not in write_archive:
2631                 self.record_download_archive(info_dict)
2632
2633             info_dict['requested_downloads'] = formats_to_download
2634             info_dict = self.run_all_pps('after_video', info_dict)
2635             if max_downloads_reached:
2636                 raise MaxDownloadsReached()
2637
2638         # We update the info dict with the selected best quality format (backwards compatibility)
2639         info_dict.update(best_format)
2640         return info_dict
2641
2642     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2643         """Select the requested subtitles and their format"""
2644         available_subs = {}
2645         if normal_subtitles and self.params.get('writesubtitles'):
2646             available_subs.update(normal_subtitles)
2647         if automatic_captions and self.params.get('writeautomaticsub'):
2648             for lang, cap_info in automatic_captions.items():
2649                 if lang not in available_subs:
2650                     available_subs[lang] = cap_info
2651
2652         if (not self.params.get('writesubtitles') and not
2653                 self.params.get('writeautomaticsub') or not
2654                 available_subs):
2655             return None
2656
2657         all_sub_langs = available_subs.keys()
2658         if self.params.get('allsubtitles', False):
2659             requested_langs = all_sub_langs
2660         elif self.params.get('subtitleslangs', False):
2661             # A list is used so that the order of languages will be the same as
2662             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2663             requested_langs = []
2664             for lang_re in self.params.get('subtitleslangs'):
2665                 if lang_re == 'all':
2666                     requested_langs.extend(all_sub_langs)
2667                     continue
2668                 discard = lang_re[0] == '-'
2669                 if discard:
2670                     lang_re = lang_re[1:]
2671                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2672                 if discard:
2673                     for lang in current_langs:
2674                         while lang in requested_langs:
2675                             requested_langs.remove(lang)
2676                 else:
2677                     requested_langs.extend(current_langs)
2678             requested_langs = orderedSet(requested_langs)
2679         elif 'en' in available_subs:
2680             requested_langs = ['en']
2681         else:
2682             requested_langs = [list(all_sub_langs)[0]]
2683         if requested_langs:
2684             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2685
2686         formats_query = self.params.get('subtitlesformat', 'best')
2687         formats_preference = formats_query.split('/') if formats_query else []
2688         subs = {}
2689         for lang in requested_langs:
2690             formats = available_subs.get(lang)
2691             if formats is None:
2692                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2693                 continue
2694             for ext in formats_preference:
2695                 if ext == 'best':
2696                     f = formats[-1]
2697                     break
2698                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2699                 if matches:
2700                     f = matches[-1]
2701                     break
2702             else:
2703                 f = formats[-1]
2704                 self.report_warning(
2705                     'No subtitle format found matching "%s" for language %s, '
2706                     'using %s' % (formats_query, lang, f['ext']))
2707             subs[lang] = f
2708         return subs
2709
2710     def _forceprint(self, key, info_dict):
2711         if info_dict is None:
2712             return
2713         info_copy = info_dict.copy()
2714         info_copy['formats_table'] = self.render_formats_table(info_dict)
2715         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2716         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2717         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2718
2719         def format_tmpl(tmpl):
2720             mobj = re.match(r'\w+(=?)$', tmpl)
2721             if mobj and mobj.group(1):
2722                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2723             elif mobj:
2724                 return f'%({tmpl})s'
2725             return tmpl
2726
2727         for tmpl in self.params['forceprint'].get(key, []):
2728             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2729
2730         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2731             filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2732             tmpl = format_tmpl(tmpl)
2733             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2734             with io.open(filename, 'a', encoding='utf-8') as f:
2735                 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2736
2737     def __forced_printings(self, info_dict, filename, incomplete):
2738         def print_mandatory(field, actual_field=None):
2739             if actual_field is None:
2740                 actual_field = field
2741             if (self.params.get('force%s' % field, False)
2742                     and (not incomplete or info_dict.get(actual_field) is not None)):
2743                 self.to_stdout(info_dict[actual_field])
2744
2745         def print_optional(field):
2746             if (self.params.get('force%s' % field, False)
2747                     and info_dict.get(field) is not None):
2748                 self.to_stdout(info_dict[field])
2749
2750         info_dict = info_dict.copy()
2751         if filename is not None:
2752             info_dict['filename'] = filename
2753         if info_dict.get('requested_formats') is not None:
2754             # For RTMP URLs, also include the playpath
2755             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2756         elif 'url' in info_dict:
2757             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2758
2759         if (self.params.get('forcejson')
2760                 or self.params['forceprint'].get('video')
2761                 or self.params['print_to_file'].get('video')):
2762             self.post_extract(info_dict)
2763         self._forceprint('video', info_dict)
2764
2765         print_mandatory('title')
2766         print_mandatory('id')
2767         print_mandatory('url', 'urls')
2768         print_optional('thumbnail')
2769         print_optional('description')
2770         print_optional('filename')
2771         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2772             self.to_stdout(formatSeconds(info_dict['duration']))
2773         print_mandatory('format')
2774
2775         if self.params.get('forcejson'):
2776             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2777
2778     def dl(self, name, info, subtitle=False, test=False):
2779         if not info.get('url'):
2780             self.raise_no_formats(info, True)
2781
2782         if test:
2783             verbose = self.params.get('verbose')
2784             params = {
2785                 'test': True,
2786                 'quiet': self.params.get('quiet') or not verbose,
2787                 'verbose': verbose,
2788                 'noprogress': not verbose,
2789                 'nopart': True,
2790                 'skip_unavailable_fragments': False,
2791                 'keep_fragments': False,
2792                 'overwrites': True,
2793                 '_no_ytdl_file': True,
2794             }
2795         else:
2796             params = self.params
2797         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2798         if not test:
2799             for ph in self._progress_hooks:
2800                 fd.add_progress_hook(ph)
2801             urls = '", "'.join(
2802                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2803                 for f in info.get('requested_formats', []) or [info])
2804             self.write_debug('Invoking downloader on "%s"' % urls)
2805
2806         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2807         # But it may contain objects that are not deep-copyable
2808         new_info = self._copy_infodict(info)
2809         if new_info.get('http_headers') is None:
2810             new_info['http_headers'] = self._calc_headers(new_info)
2811         return fd.download(name, new_info, subtitle)
2812
2813     def existing_file(self, filepaths, *, default_overwrite=True):
2814         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2815         if existing_files and not self.params.get('overwrites', default_overwrite):
2816             return existing_files[0]
2817
2818         for file in existing_files:
2819             self.report_file_delete(file)
2820             os.remove(file)
2821         return None
2822
2823     def process_info(self, info_dict):
2824         """Process a single resolved IE result. (Modified it in-place)"""
2825
2826         assert info_dict.get('_type', 'video') == 'video'
2827         original_infodict = info_dict
2828
2829         if 'format' not in info_dict and 'ext' in info_dict:
2830             info_dict['format'] = info_dict['ext']
2831
2832         if self._match_entry(info_dict) is not None:
2833             info_dict['__write_download_archive'] = 'ignore'
2834             return
2835
2836         self.post_extract(info_dict)
2837         self._num_downloads += 1
2838
2839         # info_dict['_filename'] needs to be set for backward compatibility
2840         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2841         temp_filename = self.prepare_filename(info_dict, 'temp')
2842         files_to_move = {}
2843
2844         # Forced printings
2845         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2846
2847         if self.params.get('simulate'):
2848             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2849             return
2850
2851         if full_filename is None:
2852             return
2853         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2854             return
2855         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2856             return
2857
2858         if self._write_description('video', info_dict,
2859                                    self.prepare_filename(info_dict, 'description')) is None:
2860             return
2861
2862         sub_files = self._write_subtitles(info_dict, temp_filename)
2863         if sub_files is None:
2864             return
2865         files_to_move.update(dict(sub_files))
2866
2867         thumb_files = self._write_thumbnails(
2868             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2869         if thumb_files is None:
2870             return
2871         files_to_move.update(dict(thumb_files))
2872
2873         infofn = self.prepare_filename(info_dict, 'infojson')
2874         _infojson_written = self._write_info_json('video', info_dict, infofn)
2875         if _infojson_written:
2876             info_dict['infojson_filename'] = infofn
2877             # For backward compatibility, even though it was a private field
2878             info_dict['__infojson_filename'] = infofn
2879         elif _infojson_written is None:
2880             return
2881
2882         # Note: Annotations are deprecated
2883         annofn = None
2884         if self.params.get('writeannotations', False):
2885             annofn = self.prepare_filename(info_dict, 'annotation')
2886         if annofn:
2887             if not self._ensure_dir_exists(encodeFilename(annofn)):
2888                 return
2889             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2890                 self.to_screen('[info] Video annotations are already present')
2891             elif not info_dict.get('annotations'):
2892                 self.report_warning('There are no annotations to write.')
2893             else:
2894                 try:
2895                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2896                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2897                         annofile.write(info_dict['annotations'])
2898                 except (KeyError, TypeError):
2899                     self.report_warning('There are no annotations to write.')
2900                 except (OSError, IOError):
2901                     self.report_error('Cannot write annotations file: ' + annofn)
2902                     return
2903
2904         # Write internet shortcut files
2905         def _write_link_file(link_type):
2906             if 'webpage_url' not in info_dict:
2907                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2908                 return False
2909             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2910             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2911                 return False
2912             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2913                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2914                 return True
2915             try:
2916                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2917                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2918                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2919                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2920                     if link_type == 'desktop':
2921                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2922                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2923             except (OSError, IOError):
2924                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2925                 return False
2926             return True
2927
2928         write_links = {
2929             'url': self.params.get('writeurllink'),
2930             'webloc': self.params.get('writewebloclink'),
2931             'desktop': self.params.get('writedesktoplink'),
2932         }
2933         if self.params.get('writelink'):
2934             link_type = ('webloc' if sys.platform == 'darwin'
2935                          else 'desktop' if sys.platform.startswith('linux')
2936                          else 'url')
2937             write_links[link_type] = True
2938
2939         if any(should_write and not _write_link_file(link_type)
2940                for link_type, should_write in write_links.items()):
2941             return
2942
2943         def replace_info_dict(new_info):
2944             nonlocal info_dict
2945             if new_info == info_dict:
2946                 return
2947             info_dict.clear()
2948             info_dict.update(new_info)
2949
2950         try:
2951             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2952             replace_info_dict(new_info)
2953         except PostProcessingError as err:
2954             self.report_error('Preprocessing: %s' % str(err))
2955             return
2956
2957         if self.params.get('skip_download'):
2958             info_dict['filepath'] = temp_filename
2959             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2960             info_dict['__files_to_move'] = files_to_move
2961             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2962             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2963         else:
2964             # Download
2965             info_dict.setdefault('__postprocessors', [])
2966             try:
2967
2968                 def existing_video_file(*filepaths):
2969                     ext = info_dict.get('ext')
2970                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2971                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2972                                               default_overwrite=False)
2973                     if file:
2974                         info_dict['ext'] = os.path.splitext(file)[1][1:]
2975                     return file
2976
2977                 success = True
2978                 if info_dict.get('requested_formats') is not None:
2979
2980                     def compatible_formats(formats):
2981                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2982                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2983                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2984                         if len(video_formats) > 2 or len(audio_formats) > 2:
2985                             return False
2986
2987                         # Check extension
2988                         exts = set(format.get('ext') for format in formats)
2989                         COMPATIBLE_EXTS = (
2990                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2991                             set(('webm',)),
2992                         )
2993                         for ext_sets in COMPATIBLE_EXTS:
2994                             if ext_sets.issuperset(exts):
2995                                 return True
2996                         # TODO: Check acodec/vcodec
2997                         return False
2998
2999                     requested_formats = info_dict['requested_formats']
3000                     old_ext = info_dict['ext']
3001                     if self.params.get('merge_output_format') is None:
3002                         if not compatible_formats(requested_formats):
3003                             info_dict['ext'] = 'mkv'
3004                             self.report_warning(
3005                                 'Requested formats are incompatible for merge and will be merged into mkv')
3006                         if (info_dict['ext'] == 'webm'
3007                                 and info_dict.get('thumbnails')
3008                                 # check with type instead of pp_key, __name__, or isinstance
3009                                 # since we dont want any custom PPs to trigger this
3010                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3011                             info_dict['ext'] = 'mkv'
3012                             self.report_warning(
3013                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3014                     new_ext = info_dict['ext']
3015
3016                     def correct_ext(filename, ext=new_ext):
3017                         if filename == '-':
3018                             return filename
3019                         filename_real_ext = os.path.splitext(filename)[1][1:]
3020                         filename_wo_ext = (
3021                             os.path.splitext(filename)[0]
3022                             if filename_real_ext in (old_ext, new_ext)
3023                             else filename)
3024                         return '%s.%s' % (filename_wo_ext, ext)
3025
3026                     # Ensure filename always has a correct extension for successful merge
3027                     full_filename = correct_ext(full_filename)
3028                     temp_filename = correct_ext(temp_filename)
3029                     dl_filename = existing_video_file(full_filename, temp_filename)
3030                     info_dict['__real_download'] = False
3031
3032                     downloaded = []
3033                     merger = FFmpegMergerPP(self)
3034
3035                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3036                     if dl_filename is not None:
3037                         self.report_file_already_downloaded(dl_filename)
3038                     elif fd:
3039                         for f in requested_formats if fd != FFmpegFD else []:
3040                             f['filepath'] = fname = prepend_extension(
3041                                 correct_ext(temp_filename, info_dict['ext']),
3042                                 'f%s' % f['format_id'], info_dict['ext'])
3043                             downloaded.append(fname)
3044                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3045                         success, real_download = self.dl(temp_filename, info_dict)
3046                         info_dict['__real_download'] = real_download
3047                     else:
3048                         if self.params.get('allow_unplayable_formats'):
3049                             self.report_warning(
3050                                 'You have requested merging of multiple formats '
3051                                 'while also allowing unplayable formats to be downloaded. '
3052                                 'The formats won\'t be merged to prevent data corruption.')
3053                         elif not merger.available:
3054                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3055                             if not self.params.get('ignoreerrors'):
3056                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3057                                 return
3058                             self.report_warning(f'{msg}. The formats won\'t be merged')
3059
3060                         if temp_filename == '-':
3061                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3062                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3063                                       else 'but ffmpeg is not installed')
3064                             self.report_warning(
3065                                 f'You have requested downloading multiple formats to stdout {reason}. '
3066                                 'The formats will be streamed one after the other')
3067                             fname = temp_filename
3068                         for f in requested_formats:
3069                             new_info = dict(info_dict)
3070                             del new_info['requested_formats']
3071                             new_info.update(f)
3072                             if temp_filename != '-':
3073                                 fname = prepend_extension(
3074                                     correct_ext(temp_filename, new_info['ext']),
3075                                     'f%s' % f['format_id'], new_info['ext'])
3076                                 if not self._ensure_dir_exists(fname):
3077                                     return
3078                                 f['filepath'] = fname
3079                                 downloaded.append(fname)
3080                             partial_success, real_download = self.dl(fname, new_info)
3081                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3082                             success = success and partial_success
3083
3084                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3085                         info_dict['__postprocessors'].append(merger)
3086                         info_dict['__files_to_merge'] = downloaded
3087                         # Even if there were no downloads, it is being merged only now
3088                         info_dict['__real_download'] = True
3089                     else:
3090                         for file in downloaded:
3091                             files_to_move[file] = None
3092                 else:
3093                     # Just a single file
3094                     dl_filename = existing_video_file(full_filename, temp_filename)
3095                     if dl_filename is None or dl_filename == temp_filename:
3096                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3097                         # So we should try to resume the download
3098                         success, real_download = self.dl(temp_filename, info_dict)
3099                         info_dict['__real_download'] = real_download
3100                     else:
3101                         self.report_file_already_downloaded(dl_filename)
3102
3103                 dl_filename = dl_filename or temp_filename
3104                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3105
3106             except network_exceptions as err:
3107                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3108                 return
3109             except (OSError, IOError) as err:
3110                 raise UnavailableVideoError(err)
3111             except (ContentTooShortError, ) as err:
3112                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3113                 return
3114
3115             if success and full_filename != '-':
3116
3117                 def fixup():
3118                     do_fixup = True
3119                     fixup_policy = self.params.get('fixup')
3120                     vid = info_dict['id']
3121
3122                     if fixup_policy in ('ignore', 'never'):
3123                         return
3124                     elif fixup_policy == 'warn':
3125                         do_fixup = False
3126                     elif fixup_policy != 'force':
3127                         assert fixup_policy in ('detect_or_warn', None)
3128                         if not info_dict.get('__real_download'):
3129                             do_fixup = False
3130
3131                     def ffmpeg_fixup(cndn, msg, cls):
3132                         if not cndn:
3133                             return
3134                         if not do_fixup:
3135                             self.report_warning(f'{vid}: {msg}')
3136                             return
3137                         pp = cls(self)
3138                         if pp.available:
3139                             info_dict['__postprocessors'].append(pp)
3140                         else:
3141                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3142
3143                     stretched_ratio = info_dict.get('stretched_ratio')
3144                     ffmpeg_fixup(
3145                         stretched_ratio not in (1, None),
3146                         f'Non-uniform pixel ratio {stretched_ratio}',
3147                         FFmpegFixupStretchedPP)
3148
3149                     ffmpeg_fixup(
3150                         (info_dict.get('requested_formats') is None
3151                          and info_dict.get('container') == 'm4a_dash'
3152                          and info_dict.get('ext') == 'm4a'),
3153                         'writing DASH m4a. Only some players support this container',
3154                         FFmpegFixupM4aPP)
3155
3156                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3157                     downloader = downloader.__name__ if downloader else None
3158
3159                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3160                         ffmpeg_fixup(downloader == 'HlsFD',
3161                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3162                                      FFmpegFixupM3u8PP)
3163                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3164                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3165
3166                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3167                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3168
3169                 fixup()
3170                 try:
3171                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3172                 except PostProcessingError as err:
3173                     self.report_error('Postprocessing: %s' % str(err))
3174                     return
3175                 try:
3176                     for ph in self._post_hooks:
3177                         ph(info_dict['filepath'])
3178                 except Exception as err:
3179                     self.report_error('post hooks: %s' % str(err))
3180                     return
3181                 info_dict['__write_download_archive'] = True
3182
3183         if self.params.get('force_write_download_archive'):
3184             info_dict['__write_download_archive'] = True
3185
3186         # Make sure the info_dict was modified in-place
3187         assert info_dict is original_infodict
3188
3189         max_downloads = self.params.get('max_downloads')
3190         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3191             raise MaxDownloadsReached()
3192
3193     def __download_wrapper(self, func):
3194         @functools.wraps(func)
3195         def wrapper(*args, **kwargs):
3196             try:
3197                 res = func(*args, **kwargs)
3198             except UnavailableVideoError as e:
3199                 self.report_error(e)
3200             except MaxDownloadsReached as e:
3201                 self.to_screen(f'[info] {e}')
3202                 raise
3203             except DownloadCancelled as e:
3204                 self.to_screen(f'[info] {e}')
3205                 if not self.params.get('break_per_url'):
3206                     raise
3207             else:
3208                 if self.params.get('dump_single_json', False):
3209                     self.post_extract(res)
3210                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3211         return wrapper
3212
3213     def download(self, url_list):
3214         """Download a given list of URLs."""
3215         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3216         outtmpl = self.outtmpl_dict['default']
3217         if (len(url_list) > 1
3218                 and outtmpl != '-'
3219                 and '%' not in outtmpl
3220                 and self.params.get('max_downloads') != 1):
3221             raise SameFileError(outtmpl)
3222
3223         for url in url_list:
3224             self.__download_wrapper(self.extract_info)(
3225                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3226
3227         return self._download_retcode
3228
3229     def download_with_info_file(self, info_filename):
3230         with contextlib.closing(fileinput.FileInput(
3231                 [info_filename], mode='r',
3232                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3233             # FileInput doesn't have a read method, we can't call json.load
3234             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3235         try:
3236             self.__download_wrapper(self.process_ie_result)(info, download=True)
3237         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3238             if not isinstance(e, EntryNotInPlaylist):
3239                 self.to_stderr('\r')
3240             webpage_url = info.get('webpage_url')
3241             if webpage_url is not None:
3242                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3243                 return self.download([webpage_url])
3244             else:
3245                 raise
3246         return self._download_retcode
3247
3248     @staticmethod
3249     def sanitize_info(info_dict, remove_private_keys=False):
3250         ''' Sanitize the infodict for converting to json '''
3251         if info_dict is None:
3252             return info_dict
3253         info_dict.setdefault('epoch', int(time.time()))
3254         info_dict.setdefault('_type', 'video')
3255         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3256         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3257         if remove_private_keys:
3258             remove_keys |= {
3259                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3260                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3261             }
3262             reject = lambda k, v: k not in keep_keys and (
3263                 k.startswith('_') or k in remove_keys or v is None)
3264         else:
3265             reject = lambda k, v: k in remove_keys
3266
3267         def filter_fn(obj):
3268             if isinstance(obj, dict):
3269                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3270             elif isinstance(obj, (list, tuple, set, LazyList)):
3271                 return list(map(filter_fn, obj))
3272             elif obj is None or isinstance(obj, (str, int, float, bool)):
3273                 return obj
3274             else:
3275                 return repr(obj)
3276
3277         return filter_fn(info_dict)
3278
3279     @staticmethod
3280     def filter_requested_info(info_dict, actually_filter=True):
3281         ''' Alias of sanitize_info for backward compatibility '''
3282         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3283
3284     @staticmethod
3285     def post_extract(info_dict):
3286         def actual_post_extract(info_dict):
3287             if info_dict.get('_type') in ('playlist', 'multi_video'):
3288                 for video_dict in info_dict.get('entries', {}):
3289                     actual_post_extract(video_dict or {})
3290                 return
3291
3292             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3293             extra = post_extractor().items()
3294             info_dict.update(extra)
3295             info_dict.pop('__post_extractor', None)
3296
3297             original_infodict = info_dict.get('__original_infodict') or {}
3298             original_infodict.update(extra)
3299             original_infodict.pop('__post_extractor', None)
3300
3301         actual_post_extract(info_dict or {})
3302
3303     def run_pp(self, pp, infodict):
3304         files_to_delete = []
3305         if '__files_to_move' not in infodict:
3306             infodict['__files_to_move'] = {}
3307         try:
3308             files_to_delete, infodict = pp.run(infodict)
3309         except PostProcessingError as e:
3310             # Must be True and not 'only_download'
3311             if self.params.get('ignoreerrors') is True:
3312                 self.report_error(e)
3313                 return infodict
3314             raise
3315
3316         if not files_to_delete:
3317             return infodict
3318         if self.params.get('keepvideo', False):
3319             for f in files_to_delete:
3320                 infodict['__files_to_move'].setdefault(f, '')
3321         else:
3322             for old_filename in set(files_to_delete):
3323                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3324                 try:
3325                     os.remove(encodeFilename(old_filename))
3326                 except (IOError, OSError):
3327                     self.report_warning('Unable to remove downloaded original file')
3328                 if old_filename in infodict['__files_to_move']:
3329                     del infodict['__files_to_move'][old_filename]
3330         return infodict
3331
3332     def run_all_pps(self, key, info, *, additional_pps=None):
3333         self._forceprint(key, info)
3334         for pp in (additional_pps or []) + self._pps[key]:
3335             info = self.run_pp(pp, info)
3336         return info
3337
3338     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3339         info = dict(ie_info)
3340         info['__files_to_move'] = files_to_move or {}
3341         info = self.run_all_pps(key, info)
3342         return info, info.pop('__files_to_move', None)
3343
3344     def post_process(self, filename, info, files_to_move=None):
3345         """Run all the postprocessors on the given file."""
3346         info['filepath'] = filename
3347         info['__files_to_move'] = files_to_move or {}
3348         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3349         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3350         del info['__files_to_move']
3351         return self.run_all_pps('after_move', info)
3352
3353     def _make_archive_id(self, info_dict):
3354         video_id = info_dict.get('id')
3355         if not video_id:
3356             return
3357         # Future-proof against any change in case
3358         # and backwards compatibility with prior versions
3359         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3360         if extractor is None:
3361             url = str_or_none(info_dict.get('url'))
3362             if not url:
3363                 return
3364             # Try to find matching extractor for the URL and take its ie_key
3365             for ie_key, ie in self._ies.items():
3366                 if ie.suitable(url):
3367                     extractor = ie_key
3368                     break
3369             else:
3370                 return
3371         return '%s %s' % (extractor.lower(), video_id)
3372
3373     def in_download_archive(self, info_dict):
3374         fn = self.params.get('download_archive')
3375         if fn is None:
3376             return False
3377
3378         vid_id = self._make_archive_id(info_dict)
3379         if not vid_id:
3380             return False  # Incomplete video information
3381
3382         return vid_id in self.archive
3383
3384     def record_download_archive(self, info_dict):
3385         fn = self.params.get('download_archive')
3386         if fn is None:
3387             return
3388         vid_id = self._make_archive_id(info_dict)
3389         assert vid_id
3390         self.write_debug(f'Adding to archive: {vid_id}')
3391         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3392             archive_file.write(vid_id + '\n')
3393         self.archive.add(vid_id)
3394
3395     @staticmethod
3396     def format_resolution(format, default='unknown'):
3397         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3398             return 'audio only'
3399         if format.get('resolution') is not None:
3400             return format['resolution']
3401         if format.get('width') and format.get('height'):
3402             return '%dx%d' % (format['width'], format['height'])
3403         elif format.get('height'):
3404             return '%sp' % format['height']
3405         elif format.get('width'):
3406             return '%dx?' % format['width']
3407         return default
3408
3409     def _list_format_headers(self, *headers):
3410         if self.params.get('listformats_table', True) is not False:
3411             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3412         return headers
3413
3414     def _format_note(self, fdict):
3415         res = ''
3416         if fdict.get('ext') in ['f4f', 'f4m']:
3417             res += '(unsupported)'
3418         if fdict.get('language'):
3419             if res:
3420                 res += ' '
3421             res += '[%s]' % fdict['language']
3422         if fdict.get('format_note') is not None:
3423             if res:
3424                 res += ' '
3425             res += fdict['format_note']
3426         if fdict.get('tbr') is not None:
3427             if res:
3428                 res += ', '
3429             res += '%4dk' % fdict['tbr']
3430         if fdict.get('container') is not None:
3431             if res:
3432                 res += ', '
3433             res += '%s container' % fdict['container']
3434         if (fdict.get('vcodec') is not None
3435                 and fdict.get('vcodec') != 'none'):
3436             if res:
3437                 res += ', '
3438             res += fdict['vcodec']
3439             if fdict.get('vbr') is not None:
3440                 res += '@'
3441         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3442             res += 'video@'
3443         if fdict.get('vbr') is not None:
3444             res += '%4dk' % fdict['vbr']
3445         if fdict.get('fps') is not None:
3446             if res:
3447                 res += ', '
3448             res += '%sfps' % fdict['fps']
3449         if fdict.get('acodec') is not None:
3450             if res:
3451                 res += ', '
3452             if fdict['acodec'] == 'none':
3453                 res += 'video only'
3454             else:
3455                 res += '%-5s' % fdict['acodec']
3456         elif fdict.get('abr') is not None:
3457             if res:
3458                 res += ', '
3459             res += 'audio'
3460         if fdict.get('abr') is not None:
3461             res += '@%3dk' % fdict['abr']
3462         if fdict.get('asr') is not None:
3463             res += ' (%5dHz)' % fdict['asr']
3464         if fdict.get('filesize') is not None:
3465             if res:
3466                 res += ', '
3467             res += format_bytes(fdict['filesize'])
3468         elif fdict.get('filesize_approx') is not None:
3469             if res:
3470                 res += ', '
3471             res += '~' + format_bytes(fdict['filesize_approx'])
3472         return res
3473
3474     def render_formats_table(self, info_dict):
3475         if not info_dict.get('formats') and not info_dict.get('url'):
3476             return None
3477
3478         formats = info_dict.get('formats', [info_dict])
3479         if not self.params.get('listformats_table', True) is not False:
3480             table = [
3481                 [
3482                     format_field(f, 'format_id'),
3483                     format_field(f, 'ext'),
3484                     self.format_resolution(f),
3485                     self._format_note(f)
3486                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3487             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3488
3489         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3490         table = [
3491             [
3492                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3493                 format_field(f, 'ext'),
3494                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3495                 format_field(f, 'fps', '\t%d'),
3496                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3497                 delim,
3498                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3499                 format_field(f, 'tbr', '\t%dk'),
3500                 shorten_protocol_name(f.get('protocol', '')),
3501                 delim,
3502                 format_field(f, 'vcodec', default='unknown').replace(
3503                     'none', 'images' if f.get('acodec') == 'none'
3504                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3505                 format_field(f, 'vbr', '\t%dk'),
3506                 format_field(f, 'acodec', default='unknown').replace(
3507                     'none', '' if f.get('vcodec') == 'none'
3508                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3509                 format_field(f, 'abr', '\t%dk'),
3510                 format_field(f, 'asr', '\t%dHz'),
3511                 join_nonempty(
3512                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3513                     format_field(f, 'language', '[%s]'),
3514                     join_nonempty(format_field(f, 'format_note'),
3515                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3516                                   delim=', '),
3517                     delim=' '),
3518             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3519         header_line = self._list_format_headers(
3520             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3521             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3522
3523         return render_table(
3524             header_line, table, hide_empty=True,
3525             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3526
3527     def render_thumbnails_table(self, info_dict):
3528         thumbnails = list(info_dict.get('thumbnails') or [])
3529         if not thumbnails:
3530             return None
3531         return render_table(
3532             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3533             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3534
3535     def render_subtitles_table(self, video_id, subtitles):
3536         def _row(lang, formats):
3537             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3538             if len(set(names)) == 1:
3539                 names = [] if names[0] == 'unknown' else names[:1]
3540             return [lang, ', '.join(names), ', '.join(exts)]
3541
3542         if not subtitles:
3543             return None
3544         return render_table(
3545             self._list_format_headers('Language', 'Name', 'Formats'),
3546             [_row(lang, formats) for lang, formats in subtitles.items()],
3547             hide_empty=True)
3548
3549     def __list_table(self, video_id, name, func, *args):
3550         table = func(*args)
3551         if not table:
3552             self.to_screen(f'{video_id} has no {name}')
3553             return
3554         self.to_screen(f'[info] Available {name} for {video_id}:')
3555         self.to_stdout(table)
3556
3557     def list_formats(self, info_dict):
3558         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3559
3560     def list_thumbnails(self, info_dict):
3561         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3562
3563     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3564         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3565
3566     def urlopen(self, req):
3567         """ Start an HTTP download """
3568         if isinstance(req, compat_basestring):
3569             req = sanitized_Request(req)
3570         return self._opener.open(req, timeout=self._socket_timeout)
3571
3572     def print_debug_header(self):
3573         if not self.params.get('verbose'):
3574             return
3575
3576         def get_encoding(stream):
3577             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3578             if not supports_terminal_sequences(stream):
3579                 from .compat import WINDOWS_VT_MODE
3580                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3581             return ret
3582
3583         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3584             locale.getpreferredencoding(),
3585             sys.getfilesystemencoding(),
3586             get_encoding(self._screen_file), get_encoding(self._err_file),
3587             self.get_encoding())
3588
3589         logger = self.params.get('logger')
3590         if logger:
3591             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3592             write_debug(encoding_str)
3593         else:
3594             write_string(f'[debug] {encoding_str}\n', encoding=None)
3595             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3596
3597         source = detect_variant()
3598         write_debug(join_nonempty(
3599             'yt-dlp version', __version__,
3600             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3601             '' if source == 'unknown' else f'({source})',
3602             delim=' '))
3603         if not _LAZY_LOADER:
3604             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3605                 write_debug('Lazy loading extractors is forcibly disabled')
3606             else:
3607                 write_debug('Lazy loading extractors is disabled')
3608         if plugin_extractors or plugin_postprocessors:
3609             write_debug('Plugins: %s' % [
3610                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3611                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3612         if self.params.get('compat_opts'):
3613             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3614
3615         if source == 'source':
3616             try:
3617                 sp = Popen(
3618                     ['git', 'rev-parse', '--short', 'HEAD'],
3619                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3620                     cwd=os.path.dirname(os.path.abspath(__file__)))
3621                 out, err = sp.communicate_or_kill()
3622                 out = out.decode().strip()
3623                 if re.match('[0-9a-f]+', out):
3624                     write_debug('Git HEAD: %s' % out)
3625             except Exception:
3626                 try:
3627                     sys.exc_clear()
3628                 except Exception:
3629                     pass
3630
3631         def python_implementation():
3632             impl_name = platform.python_implementation()
3633             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3634                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3635             return impl_name
3636
3637         write_debug('Python version %s (%s %s) - %s' % (
3638             platform.python_version(),
3639             python_implementation(),
3640             platform.architecture()[0],
3641             platform_name()))
3642
3643         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3644         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3645         if ffmpeg_features:
3646             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3647
3648         exe_versions['rtmpdump'] = rtmpdump_version()
3649         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3650         exe_str = ', '.join(
3651             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3652         ) or 'none'
3653         write_debug('exe versions: %s' % exe_str)
3654
3655         from .downloader.websocket import has_websockets
3656         from .postprocessor.embedthumbnail import has_mutagen
3657         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3658
3659         lib_str = join_nonempty(
3660             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3661             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3662             has_mutagen and 'mutagen',
3663             SQLITE_AVAILABLE and 'sqlite',
3664             has_websockets and 'websockets',
3665             delim=', ') or 'none'
3666         write_debug('Optional libraries: %s' % lib_str)
3667
3668         proxy_map = {}
3669         for handler in self._opener.handlers:
3670             if hasattr(handler, 'proxies'):
3671                 proxy_map.update(handler.proxies)
3672         write_debug(f'Proxy map: {proxy_map}')
3673
3674         # Not implemented
3675         if False and self.params.get('call_home'):
3676             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3677             write_debug('Public IP address: %s' % ipaddr)
3678             latest_version = self.urlopen(
3679                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3680             if version_tuple(latest_version) > version_tuple(__version__):
3681                 self.report_warning(
3682                     'You are using an outdated version (newest version: %s)! '
3683                     'See https://yt-dl.org/update if you need help updating.' %
3684                     latest_version)
3685
3686     def _setup_opener(self):
3687         timeout_val = self.params.get('socket_timeout')
3688         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3689
3690         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3691         opts_cookiefile = self.params.get('cookiefile')
3692         opts_proxy = self.params.get('proxy')
3693
3694         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3695
3696         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3697         if opts_proxy is not None:
3698             if opts_proxy == '':
3699                 proxies = {}
3700             else:
3701                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3702         else:
3703             proxies = compat_urllib_request.getproxies()
3704             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3705             if 'http' in proxies and 'https' not in proxies:
3706                 proxies['https'] = proxies['http']
3707         proxy_handler = PerRequestProxyHandler(proxies)
3708
3709         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3710         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3711         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3712         redirect_handler = YoutubeDLRedirectHandler()
3713         data_handler = compat_urllib_request_DataHandler()
3714
3715         # When passing our own FileHandler instance, build_opener won't add the
3716         # default FileHandler and allows us to disable the file protocol, which
3717         # can be used for malicious purposes (see
3718         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3719         file_handler = compat_urllib_request.FileHandler()
3720
3721         def file_open(*args, **kwargs):
3722             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3723         file_handler.file_open = file_open
3724
3725         opener = compat_urllib_request.build_opener(
3726             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3727
3728         # Delete the default user-agent header, which would otherwise apply in
3729         # cases where our custom HTTP handler doesn't come into play
3730         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3731         opener.addheaders = []
3732         self._opener = opener
3733
3734     def encode(self, s):
3735         if isinstance(s, bytes):
3736             return s  # Already encoded
3737
3738         try:
3739             return s.encode(self.get_encoding())
3740         except UnicodeEncodeError as err:
3741             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3742             raise
3743
3744     def get_encoding(self):
3745         encoding = self.params.get('encoding')
3746         if encoding is None:
3747             encoding = preferredencoding()
3748         return encoding
3749
3750     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3751         ''' Write infojson and returns True = written, False = skip, None = error '''
3752         if overwrite is None:
3753             overwrite = self.params.get('overwrites', True)
3754         if not self.params.get('writeinfojson'):
3755             return False
3756         elif not infofn:
3757             self.write_debug(f'Skipping writing {label} infojson')
3758             return False
3759         elif not self._ensure_dir_exists(infofn):
3760             return None
3761         elif not overwrite and os.path.exists(infofn):
3762             self.to_screen(f'[info] {label.title()} metadata is already present')
3763         else:
3764             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3765             try:
3766                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3767             except (OSError, IOError):
3768                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3769                 return None
3770         return True
3771
3772     def _write_description(self, label, ie_result, descfn):
3773         ''' Write description and returns True = written, False = skip, None = error '''
3774         if not self.params.get('writedescription'):
3775             return False
3776         elif not descfn:
3777             self.write_debug(f'Skipping writing {label} description')
3778             return False
3779         elif not self._ensure_dir_exists(descfn):
3780             return None
3781         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3782             self.to_screen(f'[info] {label.title()} description is already present')
3783         elif ie_result.get('description') is None:
3784             self.report_warning(f'There\'s no {label} description to write')
3785             return False
3786         else:
3787             try:
3788                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3789                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3790                     descfile.write(ie_result['description'])
3791             except (OSError, IOError):
3792                 self.report_error(f'Cannot write {label} description file {descfn}')
3793                 return None
3794         return True
3795
3796     def _write_subtitles(self, info_dict, filename):
3797         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3798         ret = []
3799         subtitles = info_dict.get('requested_subtitles')
3800         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3801             # subtitles download errors are already managed as troubles in relevant IE
3802             # that way it will silently go on when used with unsupporting IE
3803             return ret
3804
3805         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3806         if not sub_filename_base:
3807             self.to_screen('[info] Skipping writing video subtitles')
3808             return ret
3809         for sub_lang, sub_info in subtitles.items():
3810             sub_format = sub_info['ext']
3811             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3812             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3813             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3814             if existing_sub:
3815                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3816                 sub_info['filepath'] = existing_sub
3817                 ret.append((existing_sub, sub_filename_final))
3818                 continue
3819
3820             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3821             if sub_info.get('data') is not None:
3822                 try:
3823                     # Use newline='' to prevent conversion of newline characters
3824                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3825                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3826                         subfile.write(sub_info['data'])
3827                     sub_info['filepath'] = sub_filename
3828                     ret.append((sub_filename, sub_filename_final))
3829                     continue
3830                 except (OSError, IOError):
3831                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3832                     return None
3833
3834             try:
3835                 sub_copy = sub_info.copy()
3836                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3837                 self.dl(sub_filename, sub_copy, subtitle=True)
3838                 sub_info['filepath'] = sub_filename
3839                 ret.append((sub_filename, sub_filename_final))
3840             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3841                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3842                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3843                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3844         return ret
3845
3846     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3847         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3848         write_all = self.params.get('write_all_thumbnails', False)
3849         thumbnails, ret = [], []
3850         if write_all or self.params.get('writethumbnail', False):
3851             thumbnails = info_dict.get('thumbnails') or []
3852         multiple = write_all and len(thumbnails) > 1
3853
3854         if thumb_filename_base is None:
3855             thumb_filename_base = filename
3856         if thumbnails and not thumb_filename_base:
3857             self.write_debug(f'Skipping writing {label} thumbnail')
3858             return ret
3859
3860         for idx, t in list(enumerate(thumbnails))[::-1]:
3861             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3862             thumb_display_id = f'{label} thumbnail {t["id"]}'
3863             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3864             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3865
3866             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3867             if existing_thumb:
3868                 self.to_screen('[info] %s is already present' % (
3869                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3870                 t['filepath'] = existing_thumb
3871                 ret.append((existing_thumb, thumb_filename_final))
3872             else:
3873                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3874                 try:
3875                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3876                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3877                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3878                         shutil.copyfileobj(uf, thumbf)
3879                     ret.append((thumb_filename, thumb_filename_final))
3880                     t['filepath'] = thumb_filename
3881                 except network_exceptions as err:
3882                     thumbnails.pop(idx)
3883                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3884             if ret and not write_all:
3885                 break
3886         return ret