yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_brotli,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     format_decimal_suffix,
  72     formatSeconds,
  73     GeoRestrictedError,
  74     get_domain,
  75     has_certifi,
  76     HEADRequest,
  77     InAdvancePagedList,
  78     int_or_none,
  79     iri_to_uri,
  80     ISO3166Utils,
  81     join_nonempty,
  82     LazyList,
  83     LINK_TEMPLATES,
  84     locked_file,
  85     make_dir,
  86     make_HTTPS_handler,
  87     MaxDownloadsReached,
  88     merge_headers,
  89     network_exceptions,
  90     number_of_digits,
  91     orderedSet,
  92     OUTTMPL_TYPES,
  93     PagedList,
  94     parse_filesize,
  95     PerRequestProxyHandler,
  96     platform_name,
  97     Popen,
  98     POSTPROCESS_WHEN,
  99     PostProcessingError,
 100     preferredencoding,
 101     prepend_extension,
 102     ReExtractInfo,
 103     register_socks_protocols,
 104     RejectedVideoReached,
 105     remove_terminal_sequences,
 106     render_table,
 107     replace_extension,
 108     SameFileError,
 109     sanitize_filename,
 110     sanitize_path,
 111     sanitize_url,
 112     sanitized_Request,
 113     std_headers,
 114     STR_FORMAT_RE_TMPL,
 115     STR_FORMAT_TYPES,
 116     str_or_none,
 117     strftime_or_none,
 118     subtitles_filename,
 119     supports_terminal_sequences,
 120     timetuple_from_msec,
 121     to_high_limit_path,
 122     traverse_obj,
 123     try_get,
 124     UnavailableVideoError,
 125     url_basename,
 126     variadic,
 127     version_tuple,
 128     write_json_file,
 129     write_string,
 130     YoutubeDLCookieProcessor,
 131     YoutubeDLHandler,
 132     YoutubeDLRedirectHandler,
 133 )
 134 from .cache import Cache
 135 from .minicurses import format_text
 136 from .extractor import (
 137     gen_extractor_classes,
 138     get_info_extractor,
 139     _LAZY_LOADER,
 140     _PLUGIN_CLASSES as plugin_extractors
 141 )
 142 from .extractor.openload import PhantomJSwrapper
 143 from .downloader import (
 144     FFmpegFD,
 145     get_suitable_downloader,
 146     shorten_protocol_name
 147 )
 148 from .downloader.rtmp import rtmpdump_version
 149 from .postprocessor import (
 150     get_postprocessor,
 151     EmbedThumbnailPP,
 152     FFmpegFixupDuplicateMoovPP,
 153     FFmpegFixupDurationPP,
 154     FFmpegFixupM3u8PP,
 155     FFmpegFixupM4aPP,
 156     FFmpegFixupStretchedPP,
 157     FFmpegFixupTimestampPP,
 158     FFmpegMergerPP,
 159     FFmpegPostProcessor,
 160     MoveFilesAfterDownloadPP,
 161     _PLUGIN_CLASSES as plugin_postprocessors
 162 )
 163 from .update import detect_variant
 164 from .version import __version__, RELEASE_GIT_HEAD
 165
 166 if compat_os_name == 'nt':
 167     import ctypes
 168
 169
 170 class YoutubeDL(object):
 171     """YoutubeDL class.
 172
 173     YoutubeDL objects are the ones responsible of downloading the
 174     actual video file and writing it to disk if the user has requested
 175     it, among some other tasks. In most cases there should be one per
 176     program. As, given a video URL, the downloader doesn't know how to
 177     extract all the needed information, task that InfoExtractors do, it
 178     has to pass the URL to one of them.
 179
 180     For this, YoutubeDL objects have a method that allows
 181     InfoExtractors to be registered in a given order. When it is passed
 182     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 183     finds that reports being able to handle it. The InfoExtractor extracts
 184     all the information about the video or videos the URL refers to, and
 185     YoutubeDL process the extracted information, possibly using a File
 186     Downloader to download the video.
 187
 188     YoutubeDL objects accept a lot of parameters. In order not to saturate
 189     the object constructor with arguments, it receives a dictionary of
 190     options instead. These options are available through the params
 191     attribute for the InfoExtractors to use. The YoutubeDL also
 192     registers itself as the downloader in charge for the InfoExtractors
 193     that are added to it, so this is a "mutual registration".
 194
 195     Available options:
 196
 197     username:          Username for authentication purposes.
 198     password:          Password for authentication purposes.
 199     videopassword:     Password for accessing a video.
 200     ap_mso:            Adobe Pass multiple-system operator identifier.
 201     ap_username:       Multiple-system operator account username.
 202     ap_password:       Multiple-system operator account password.
 203     usenetrc:          Use netrc for authentication instead.
 204     verbose:           Print additional info to stdout.
 205     quiet:             Do not print messages to stdout.
 206     no_warnings:       Do not print out anything for warnings.
 207     forceprint:        A dict with keys WHEN mapped to a list of templates to
 208                        print to stdout. The allowed keys are video or any of the
 209                        items in utils.POSTPROCESS_WHEN.
 210                        For compatibility, a single list is also accepted
 211     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 212                        a list of tuples with (template, filename)
 213     forceurl:          Force printing final URL. (Deprecated)
 214     forcetitle:        Force printing title. (Deprecated)
 215     forceid:           Force printing ID. (Deprecated)
 216     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 217     forcedescription:  Force printing description. (Deprecated)
 218     forcefilename:     Force printing final filename. (Deprecated)
 219     forceduration:     Force printing duration. (Deprecated)
 220     forcejson:         Force printing info_dict as JSON.
 221     dump_single_json:  Force printing the info_dict of the whole playlist
 222                        (or video) as a single JSON line.
 223     force_write_download_archive: Force writing download archive regardless
 224                        of 'skip_download' or 'simulate'.
 225     simulate:          Do not download the video files. If unset (or None),
 226                        simulate only if listsubtitles, listformats or list_thumbnails is used
 227     format:            Video format code. see "FORMAT SELECTION" for more details.
 228                        You can also pass a function. The function takes 'ctx' as
 229                        argument and returns the formats to download.
 230                        See "build_format_selector" for an implementation
 231     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 232     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 233                        extracting metadata even if the video is not actually
 234                        available for download (experimental)
 235     format_sort:       A list of fields by which to sort the video formats.
 236                        See "Sorting Formats" for more details.
 237     format_sort_force: Force the given format_sort. see "Sorting Formats"
 238                        for more details.
 239     prefer_free_formats: Whether to prefer video formats with free containers
 240                        over non-free ones of same quality.
 241     allow_multiple_video_streams:   Allow multiple video streams to be merged
 242                        into a single file
 243     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 244                        into a single file
 245     check_formats      Whether to test if the formats are downloadable.
 246                        Can be True (check all), False (check none),
 247                        'selected' (check selected formats),
 248                        or None (check only if requested by extractor)
 249     paths:             Dictionary of output paths. The allowed keys are 'home'
 250                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 251     outtmpl:           Dictionary of templates for output names. Allowed keys
 252                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 253                        For compatibility with youtube-dl, a single string can also be used
 254     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 255     restrictfilenames: Do not allow "&" and spaces in file names
 256     trim_file_name:    Limit length of filename (extension excluded)
 257     windowsfilenames:  Force the filenames to be windows compatible
 258     ignoreerrors:      Do not stop on download/postprocessing errors.
 259                        Can be 'only_download' to ignore only download errors.
 260                        Default is 'only_download' for CLI, but False for API
 261     skip_playlist_after_errors: Number of allowed failures until the rest of
 262                        the playlist is skipped
 263     force_generic_extractor: Force downloader to use the generic extractor
 264     overwrites:        Overwrite all video and metadata files if True,
 265                        overwrite only non-video files if None
 266                        and don't overwrite any file if False
 267                        For compatibility with youtube-dl,
 268                        "nooverwrites" may also be used instead
 269     playliststart:     Playlist item to start at.
 270     playlistend:       Playlist item to end at.
 271     playlist_items:    Specific indices of playlist to download.
 272     playlistreverse:   Download playlist items in reverse order.
 273     playlistrandom:    Download playlist items in random order.
 274     matchtitle:        Download only matching titles.
 275     rejecttitle:       Reject downloads for matching titles.
 276     logger:            Log messages to a logging.Logger instance.
 277     logtostderr:       Log messages to stderr instead of stdout.
 278     consoletitle:       Display progress in console window's titlebar.
 279     writedescription:  Write the video description to a .description file
 280     writeinfojson:     Write the video description to a .info.json file
 281     clean_infojson:    Remove private fields from the infojson
 282     getcomments:       Extract video comments. This will not be written to disk
 283                        unless writeinfojson is also given
 284     writeannotations:  Write the video annotations to a .annotations.xml file
 285     writethumbnail:    Write the thumbnail image to a file
 286     allow_playlist_files: Whether to write playlists' description, infojson etc
 287                        also to disk when using the 'write*' options
 288     write_all_thumbnails:  Write all thumbnail formats to files
 289     writelink:         Write an internet shortcut file, depending on the
 290                        current platform (.url/.webloc/.desktop)
 291     writeurllink:      Write a Windows internet shortcut file (.url)
 292     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 293     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 294     writesubtitles:    Write the video subtitles to a file
 295     writeautomaticsub: Write the automatically generated subtitles to a file
 296     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 297                        Downloads all the subtitles of the video
 298                        (requires writesubtitles or writeautomaticsub)
 299     listsubtitles:     Lists all available subtitles for the video
 300     subtitlesformat:   The format code for subtitles
 301     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 302                        The list may contain "all" to refer to all the available
 303                        subtitles. The language can be prefixed with a "-" to
 304                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 305     keepvideo:         Keep the video file after post-processing
 306     daterange:         A DateRange object, download only if the upload_date is in the range.
 307     skip_download:     Skip the actual download of the video file
 308     cachedir:          Location of the cache files in the filesystem.
 309                        False to disable filesystem cache.
 310     noplaylist:        Download single video instead of a playlist if in doubt.
 311     age_limit:         An integer representing the user's age in years.
 312                        Unsuitable videos for the given age are skipped.
 313     min_views:         An integer representing the minimum view count the video
 314                        must have in order to not be skipped.
 315                        Videos without view count information are always
 316                        downloaded. None for no limit.
 317     max_views:         An integer representing the maximum view count.
 318                        Videos that are more popular than that are not
 319                        downloaded.
 320                        Videos without view count information are always
 321                        downloaded. None for no limit.
 322     download_archive:  File name of a file where all downloads are recorded.
 323                        Videos already present in the file are not downloaded
 324                        again.
 325     break_on_existing: Stop the download process after attempting to download a
 326                        file that is in the archive.
 327     break_on_reject:   Stop the download process when encountering a video that
 328                        has been filtered out.
 329     break_per_url:     Whether break_on_reject and break_on_existing
 330                        should act on each input URL as opposed to for the entire queue
 331     cookiefile:        File name where cookies should be read from and dumped to
 332     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 333                        name/pathfrom where cookies are loaded, and the name of the
 334                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 335     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 336                        support RFC 5746 secure renegotiation
 337     nocheckcertificate:  Do not verify SSL certificates
 338     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 339                        At the moment, this is only supported by YouTube.
 340     http_headers:      A dictionary of custom headers to be used for all requests
 341     proxy:             URL of the proxy server to use
 342     geo_verification_proxy:  URL of the proxy to use for IP address verification
 343                        on geo-restricted sites.
 344     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 345     bidi_workaround:   Work around buggy terminals without bidirectional text
 346                        support, using fridibi
 347     debug_printtraffic:Print out sent and received HTTP traffic
 348     include_ads:       Download ads as well (deprecated)
 349     default_search:    Prepend this string if an input url is not valid.
 350                        'auto' for elaborate guessing
 351     encoding:          Use this encoding instead of the system-specified.
 352     extract_flat:      Do not resolve URLs, return the immediate result.
 353                        Pass in 'in_playlist' to only show this behavior for
 354                        playlist items.
 355     wait_for_video:    If given, wait for scheduled streams to become available.
 356                        The value should be a tuple containing the range
 357                        (min_secs, max_secs) to wait between retries
 358     postprocessors:    A list of dictionaries, each with an entry
 359                        * key:  The name of the postprocessor. See
 360                                yt_dlp/postprocessor/__init__.py for a list.
 361                        * when: When to run the postprocessor. Allowed values are
 362                                the entries of utils.POSTPROCESS_WHEN
 363                                Assumed to be 'post_process' if not given
 364     post_hooks:        Deprecated - Register a custom postprocessor instead
 365                        A list of functions that get called as the final step
 366                        for each video file, after all postprocessors have been
 367                        called. The filename will be passed as the only argument.
 368     progress_hooks:    A list of functions that get called on download
 369                        progress, with a dictionary with the entries
 370                        * status: One of "downloading", "error", or "finished".
 371                                  Check this first and ignore unknown values.
 372                        * info_dict: The extracted info_dict
 373
 374                        If status is one of "downloading", or "finished", the
 375                        following properties may also be present:
 376                        * filename: The final filename (always present)
 377                        * tmpfilename: The filename we're currently writing to
 378                        * downloaded_bytes: Bytes on disk
 379                        * total_bytes: Size of the whole file, None if unknown
 380                        * total_bytes_estimate: Guess of the eventual file size,
 381                                                None if unavailable.
 382                        * elapsed: The number of seconds since download started.
 383                        * eta: The estimated time in seconds, None if unknown
 384                        * speed: The download speed in bytes/second, None if
 385                                 unknown
 386                        * fragment_index: The counter of the currently
 387                                          downloaded video fragment.
 388                        * fragment_count: The number of fragments (= individual
 389                                          files that will be merged)
 390
 391                        Progress hooks are guaranteed to be called at least once
 392                        (with status "finished") if the download is successful.
 393     postprocessor_hooks:  A list of functions that get called on postprocessing
 394                        progress, with a dictionary with the entries
 395                        * status: One of "started", "processing", or "finished".
 396                                  Check this first and ignore unknown values.
 397                        * postprocessor: Name of the postprocessor
 398                        * info_dict: The extracted info_dict
 399
 400                        Progress hooks are guaranteed to be called at least twice
 401                        (with status "started" and "finished") if the processing is successful.
 402     merge_output_format: Extension to use when merging formats.
 403     final_ext:         Expected final extension; used to detect when the file was
 404                        already downloaded and converted
 405     fixup:             Automatically correct known faults of the file.
 406                        One of:
 407                        - "never": do nothing
 408                        - "warn": only emit a warning
 409                        - "detect_or_warn": check whether we can do anything
 410                                            about it, warn otherwise (default)
 411     source_address:    Client-side IP address to bind to.
 412     call_home:         Boolean, true iff we are allowed to contact the
 413                        yt-dlp servers for debugging. (BROKEN)
 414     sleep_interval_requests: Number of seconds to sleep between requests
 415                        during extraction
 416     sleep_interval:    Number of seconds to sleep before each download when
 417                        used alone or a lower bound of a range for randomized
 418                        sleep before each download (minimum possible number
 419                        of seconds to sleep) when used along with
 420                        max_sleep_interval.
 421     max_sleep_interval:Upper bound of a range for randomized sleep before each
 422                        download (maximum possible number of seconds to sleep).
 423                        Must only be used along with sleep_interval.
 424                        Actual sleep time will be a random float from range
 425                        [sleep_interval; max_sleep_interval].
 426     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 427     listformats:       Print an overview of available video formats and exit.
 428     list_thumbnails:   Print a table of all thumbnails and exit.
 429     match_filter:      A function that gets called with the info_dict of
 430                        every video.
 431                        If it returns a message, the video is ignored.
 432                        If it returns None, the video is downloaded.
 433                        match_filter_func in utils.py is one example for this.
 434     no_color:          Do not emit color codes in output.
 435     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 436                        HTTP header
 437     geo_bypass_country:
 438                        Two-letter ISO 3166-2 country code that will be used for
 439                        explicit geographic restriction bypassing via faking
 440                        X-Forwarded-For HTTP header
 441     geo_bypass_ip_block:
 442                        IP range in CIDR notation that will be used similarly to
 443                        geo_bypass_country
 444
 445     The following options determine which downloader is picked:
 446     external_downloader: A dictionary of protocol keys and the executable of the
 447                        external downloader to use for it. The allowed protocols
 448                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 449                        Set the value to 'native' to use the native downloader
 450     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 451                        or {'m3u8': 'ffmpeg'} instead.
 452                        Use the native HLS downloader instead of ffmpeg/avconv
 453                        if True, otherwise use ffmpeg/avconv if False, otherwise
 454                        use downloader suggested by extractor if None.
 455     compat_opts:       Compatibility options. See "Differences in default behavior".
 456                        The following options do not work when used through the API:
 457                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 458                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 459                        Refer __init__.py for their implementation
 460     progress_template: Dictionary of templates for progress outputs.
 461                        Allowed keys are 'download', 'postprocess',
 462                        'download-title' (console title) and 'postprocess-title'.
 463                        The template is mapped on a dictionary with keys 'progress' and 'info'
 464
 465     The following parameters are not used by YoutubeDL itself, they are used by
 466     the downloader (see yt_dlp/downloader/common.py):
 467     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 468     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 469     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 470     external_downloader_args, concurrent_fragment_downloads.
 471
 472     The following options are used by the post processors:
 473     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 474                        otherwise prefer ffmpeg. (avconv support is deprecated)
 475     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 476                        to the binary or its containing directory.
 477     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 478                        and a list of additional command-line arguments for the
 479                        postprocessor/executable. The dict can also have "PP+EXE" keys
 480                        which are used when the given exe is used by the given PP.
 481                        Use 'default' as the name for arguments to passed to all PP
 482                        For compatibility with youtube-dl, a single list of args
 483                        can also be used
 484
 485     The following options are used by the extractors:
 486     extractor_retries: Number of times to retry for known errors
 487     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 488     hls_split_discontinuity: Split HLS playlists to different formats at
 489                        discontinuities such as ad breaks (default: False)
 490     extractor_args:    A dictionary of arguments to be passed to the extractors.
 491                        See "EXTRACTOR ARGUMENTS" for details.
 492                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 493     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 494     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 495                        If True (default), DASH manifests and related
 496                        data will be downloaded and processed by extractor.
 497                        You can reduce network I/O by disabling it if you don't
 498                        care about DASH. (only for youtube)
 499     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 500                        If True (default), HLS manifests and related
 501                        data will be downloaded and processed by extractor.
 502                        You can reduce network I/O by disabling it if you don't
 503                        care about HLS. (only for youtube)
 504     """
 505
 506     _NUMERIC_FIELDS = set((
 507         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 508         'timestamp', 'release_timestamp',
 509         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 510         'average_rating', 'comment_count', 'age_limit',
 511         'start_time', 'end_time',
 512         'chapter_number', 'season_number', 'episode_number',
 513         'track_number', 'disc_number', 'release_year',
 514     ))
 515
 516     _format_selection_exts = {
 517         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 518         'video': {'mp4', 'flv', 'webm', '3gp'},
 519         'storyboards': {'mhtml'},
 520     }
 521
 522     def __init__(self, params=None, auto_init=True):
 523         """Create a FileDownloader object with the given options.
 524         @param auto_init    Whether to load the default extractors and print header (if verbose).
 525                             Set to 'no_verbose_header' to not print the header
 526         """
 527         if params is None:
 528             params = {}
 529         self.params = params
 530         self._ies = {}
 531         self._ies_instances = {}
 532         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 533         self._printed_messages = set()
 534         self._first_webpage_request = True
 535         self._post_hooks = []
 536         self._progress_hooks = []
 537         self._postprocessor_hooks = []
 538         self._download_retcode = 0
 539         self._num_downloads = 0
 540         self._num_videos = 0
 541         self._playlist_level = 0
 542         self._playlist_urls = set()
 543         self.cache = Cache(self)
 544
 545         windows_enable_vt_mode()
 546         self._out_files = {
 547             'error': sys.stderr,
 548             'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
 549             'console': None if compat_os_name == 'nt' else next(
 550                 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
 551         }
 552         self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
 553         self._allow_colors = {
 554             type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
 555             for type_ in ('screen', 'error')
 556         }
 557
 558         if sys.version_info < (3, 6):
 559             self.report_warning(
 560                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 561
 562         if self.params.get('allow_unplayable_formats'):
 563             self.report_warning(
 564                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 565                 'This is a developer option intended for debugging. \n'
 566                 '         If you experience any issues while using this option, '
 567                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 568
 569         def check_deprecated(param, option, suggestion):
 570             if self.params.get(param) is not None:
 571                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 572                 return True
 573             return False
 574
 575         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 576             if self.params.get('geo_verification_proxy') is None:
 577                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 578
 579         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 580         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 581         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 582
 583         for msg in self.params.get('_warnings', []):
 584             self.report_warning(msg)
 585         for msg in self.params.get('_deprecation_warnings', []):
 586             self.deprecation_warning(msg)
 587
 588         if 'list-formats' in self.params.get('compat_opts', []):
 589             self.params['listformats_table'] = False
 590
 591         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 592             # nooverwrites was unnecessarily changed to overwrites
 593             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 594             # This ensures compatibility with both keys
 595             self.params['overwrites'] = not self.params['nooverwrites']
 596         elif self.params.get('overwrites') is None:
 597             self.params.pop('overwrites', None)
 598         else:
 599             self.params['nooverwrites'] = not self.params['overwrites']
 600
 601         self.params.setdefault('forceprint', {})
 602         self.params.setdefault('print_to_file', {})
 603
 604         # Compatibility with older syntax
 605         if not isinstance(params['forceprint'], dict):
 606             self.params['forceprint'] = {'video': params['forceprint']}
 607
 608         if self.params.get('bidi_workaround', False):
 609             try:
 610                 import pty
 611                 master, slave = pty.openpty()
 612                 width = compat_get_terminal_size().columns
 613                 if width is None:
 614                     width_args = []
 615                 else:
 616                     width_args = ['-w', str(width)]
 617                 sp_kwargs = dict(
 618                     stdin=subprocess.PIPE,
 619                     stdout=slave,
 620                     stderr=self._out_files['error'])
 621                 try:
 622                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 623                 except OSError:
 624                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 625                 self._output_channel = os.fdopen(master, 'rb')
 626             except OSError as ose:
 627                 if ose.errno == errno.ENOENT:
 628                     self.report_warning(
 629                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 630                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 631                 else:
 632                     raise
 633
 634         if (sys.platform != 'win32'
 635                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 636                 and not self.params.get('restrictfilenames', False)):
 637             # Unicode filesystem API will throw errors (#1474, #13027)
 638             self.report_warning(
 639                 'Assuming --restrict-filenames since file system encoding '
 640                 'cannot encode all characters. '
 641                 'Set the LC_ALL environment variable to fix this.')
 642             self.params['restrictfilenames'] = True
 643
 644         self.outtmpl_dict = self.parse_outtmpl()
 645
 646         # Creating format selector here allows us to catch syntax errors before the extraction
 647         self.format_selector = (
 648             self.params.get('format') if self.params.get('format') in (None, '-')
 649             else self.params['format'] if callable(self.params['format'])
 650             else self.build_format_selector(self.params['format']))
 651
 652         # Set http_headers defaults according to std_headers
 653         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 654
 655         self._setup_opener()
 656
 657         if auto_init:
 658             if auto_init != 'no_verbose_header':
 659                 self.print_debug_header()
 660             self.add_default_info_extractors()
 661
 662         hooks = {
 663             'post_hooks': self.add_post_hook,
 664             'progress_hooks': self.add_progress_hook,
 665             'postprocessor_hooks': self.add_postprocessor_hook,
 666         }
 667         for opt, fn in hooks.items():
 668             for ph in self.params.get(opt, []):
 669                 fn(ph)
 670
 671         for pp_def_raw in self.params.get('postprocessors', []):
 672             pp_def = dict(pp_def_raw)
 673             when = pp_def.pop('when', 'post_process')
 674             self.add_post_processor(
 675                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 676                 when=when)
 677
 678         register_socks_protocols()
 679
 680         def preload_download_archive(fn):
 681             """Preload the archive, if any is specified"""
 682             if fn is None:
 683                 return False
 684             self.write_debug(f'Loading archive file {fn!r}')
 685             try:
 686                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 687                     for line in archive_file:
 688                         self.archive.add(line.strip())
 689             except IOError as ioe:
 690                 if ioe.errno != errno.ENOENT:
 691                     raise
 692                 return False
 693             return True
 694
 695         self.archive = set()
 696         preload_download_archive(self.params.get('download_archive'))
 697
 698     def warn_if_short_id(self, argv):
 699         # short YouTube ID starting with dash?
 700         idxs = [
 701             i for i, a in enumerate(argv)
 702             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 703         if idxs:
 704             correct_argv = (
 705                 ['yt-dlp']
 706                 + [a for i, a in enumerate(argv) if i not in idxs]
 707                 + ['--'] + [argv[i] for i in idxs]
 708             )
 709             self.report_warning(
 710                 'Long argument string detected. '
 711                 'Use -- to separate parameters and URLs, like this:\n%s' %
 712                 args_to_str(correct_argv))
 713
 714     def add_info_extractor(self, ie):
 715         """Add an InfoExtractor object to the end of the list."""
 716         ie_key = ie.ie_key()
 717         self._ies[ie_key] = ie
 718         if not isinstance(ie, type):
 719             self._ies_instances[ie_key] = ie
 720             ie.set_downloader(self)
 721
 722     def _get_info_extractor_class(self, ie_key):
 723         ie = self._ies.get(ie_key)
 724         if ie is None:
 725             ie = get_info_extractor(ie_key)
 726             self.add_info_extractor(ie)
 727         return ie
 728
 729     def get_info_extractor(self, ie_key):
 730         """
 731         Get an instance of an IE with name ie_key, it will try to get one from
 732         the _ies list, if there's no instance it will create a new one and add
 733         it to the extractor list.
 734         """
 735         ie = self._ies_instances.get(ie_key)
 736         if ie is None:
 737             ie = get_info_extractor(ie_key)()
 738             self.add_info_extractor(ie)
 739         return ie
 740
 741     def add_default_info_extractors(self):
 742         """
 743         Add the InfoExtractors returned by gen_extractors to the end of the list
 744         """
 745         for ie in gen_extractor_classes():
 746             self.add_info_extractor(ie)
 747
 748     def add_post_processor(self, pp, when='post_process'):
 749         """Add a PostProcessor object to the end of the chain."""
 750         self._pps[when].append(pp)
 751         pp.set_downloader(self)
 752
 753     def add_post_hook(self, ph):
 754         """Add the post hook"""
 755         self._post_hooks.append(ph)
 756
 757     def add_progress_hook(self, ph):
 758         """Add the download progress hook"""
 759         self._progress_hooks.append(ph)
 760
 761     def add_postprocessor_hook(self, ph):
 762         """Add the postprocessing progress hook"""
 763         self._postprocessor_hooks.append(ph)
 764         for pps in self._pps.values():
 765             for pp in pps:
 766                 pp.add_progress_hook(ph)
 767
 768     def _bidi_workaround(self, message):
 769         if not hasattr(self, '_output_channel'):
 770             return message
 771
 772         assert hasattr(self, '_output_process')
 773         assert isinstance(message, compat_str)
 774         line_count = message.count('\n') + 1
 775         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 776         self._output_process.stdin.flush()
 777         res = ''.join(self._output_channel.readline().decode('utf-8')
 778                       for _ in range(line_count))
 779         return res[:-len('\n')]
 780
 781     def _write_string(self, message, out=None, only_once=False):
 782         if only_once:
 783             if message in self._printed_messages:
 784                 return
 785             self._printed_messages.add(message)
 786         write_string(message, out=out, encoding=self.params.get('encoding'))
 787
 788     def to_stdout(self, message, skip_eol=False, quiet=None):
 789         """Print message to stdout"""
 790         if quiet is not None:
 791             self.deprecation_warning('"ydl.to_stdout" no longer accepts the argument quiet. Use "ydl.to_screen" instead')
 792         self._write_string(
 793             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 794             self._out_files['print'])
 795
 796     def to_screen(self, message, skip_eol=False, quiet=None):
 797         """Print message to screen if not in quiet mode"""
 798         if self.params.get('logger'):
 799             self.params['logger'].debug(message)
 800             return
 801         if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
 802             return
 803         self._write_string(
 804             '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 805             self._out_files['screen'])
 806
 807     def to_stderr(self, message, only_once=False):
 808         """Print message to stderr"""
 809         assert isinstance(message, compat_str)
 810         if self.params.get('logger'):
 811             self.params['logger'].error(message)
 812         else:
 813             self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
 814
 815     def _send_console_code(self, code):
 816         if compat_os_name == 'nt' or not self._out_files['console']:
 817             return
 818         self._write_string(code, self._out_files['console'])
 819
 820     def to_console_title(self, message):
 821         if not self.params.get('consoletitle', False):
 822             return
 823         message = remove_terminal_sequences(message)
 824         if compat_os_name == 'nt':
 825             if ctypes.windll.kernel32.GetConsoleWindow():
 826                 # c_wchar_p() might not be necessary if `message` is
 827                 # already of type unicode()
 828                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 829         else:
 830             self._send_console_code(f'\033]0;{message}\007')
 831
 832     def save_console_title(self):
 833         if not self.params.get('consoletitle') or self.params.get('simulate'):
 834             return
 835         self._send_console_code('\033[22;0t')  # Save the title on stack
 836
 837     def restore_console_title(self):
 838         if not self.params.get('consoletitle') or self.params.get('simulate'):
 839             return
 840         self._send_console_code('\033[23;0t')  # Restore the title from stack
 841
 842     def __enter__(self):
 843         self.save_console_title()
 844         return self
 845
 846     def __exit__(self, *args):
 847         self.restore_console_title()
 848
 849         if self.params.get('cookiefile') is not None:
 850             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 851
 852     def trouble(self, message=None, tb=None, is_error=True):
 853         """Determine action to take when a download problem appears.
 854
 855         Depending on if the downloader has been configured to ignore
 856         download errors or not, this method may throw an exception or
 857         not when errors are found, after printing the message.
 858
 859         @param tb          If given, is additional traceback information
 860         @param is_error    Whether to raise error according to ignorerrors
 861         """
 862         if message is not None:
 863             self.to_stderr(message)
 864         if self.params.get('verbose'):
 865             if tb is None:
 866                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 867                     tb = ''
 868                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 869                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 870                     tb += encode_compat_str(traceback.format_exc())
 871                 else:
 872                     tb_data = traceback.format_list(traceback.extract_stack())
 873                     tb = ''.join(tb_data)
 874             if tb:
 875                 self.to_stderr(tb)
 876         if not is_error:
 877             return
 878         if not self.params.get('ignoreerrors'):
 879             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 880                 exc_info = sys.exc_info()[1].exc_info
 881             else:
 882                 exc_info = sys.exc_info()
 883             raise DownloadError(message, exc_info)
 884         self._download_retcode = 1
 885
 886     class Styles(Enum):
 887         HEADERS = 'yellow'
 888         EMPHASIS = 'light blue'
 889         ID = 'green'
 890         DELIM = 'blue'
 891         ERROR = 'red'
 892         WARNING = 'yellow'
 893         SUPPRESS = 'light black'
 894
 895     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 896         if test_encoding:
 897             original_text = text
 898             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 899             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 900             text = text.encode(encoding, 'ignore').decode(encoding)
 901             if fallback is not None and text != original_text:
 902                 text = fallback
 903         if isinstance(f, self.Styles):
 904             f = f.value
 905         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 906
 907     def _format_screen(self, *args, **kwargs):
 908         return self._format_text(
 909             self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
 910
 911     def _format_err(self, *args, **kwargs):
 912         return self._format_text(
 913             self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
 914
 915     def report_warning(self, message, only_once=False):
 916         '''
 917         Print the message to stderr, it will be prefixed with 'WARNING:'
 918         If stderr is a tty file the 'WARNING:' will be colored
 919         '''
 920         if self.params.get('logger') is not None:
 921             self.params['logger'].warning(message)
 922         else:
 923             if self.params.get('no_warnings'):
 924                 return
 925             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 926
 927     def deprecation_warning(self, message):
 928         if self.params.get('logger') is not None:
 929             self.params['logger'].warning('DeprecationWarning: {message}')
 930         else:
 931             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 932
 933     def report_error(self, message, *args, **kwargs):
 934         '''
 935         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 936         in red if stderr is a tty file.
 937         '''
 938         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 939
 940     def write_debug(self, message, only_once=False):
 941         '''Log debug message or Print message to stderr'''
 942         if not self.params.get('verbose', False):
 943             return
 944         message = '[debug] %s' % message
 945         if self.params.get('logger'):
 946             self.params['logger'].debug(message)
 947         else:
 948             self.to_stderr(message, only_once)
 949
 950     def report_file_already_downloaded(self, file_name):
 951         """Report file has already been fully downloaded."""
 952         try:
 953             self.to_screen('[download] %s has already been downloaded' % file_name)
 954         except UnicodeEncodeError:
 955             self.to_screen('[download] The file has already been downloaded')
 956
 957     def report_file_delete(self, file_name):
 958         """Report that existing file will be deleted."""
 959         try:
 960             self.to_screen('Deleting existing file %s' % file_name)
 961         except UnicodeEncodeError:
 962             self.to_screen('Deleting existing file')
 963
 964     def raise_no_formats(self, info, forced=False, *, msg=None):
 965         has_drm = info.get('__has_drm')
 966         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
 967         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
 968         if forced or not ignored:
 969             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 970                                  expected=has_drm or ignored or expected)
 971         else:
 972             self.report_warning(msg)
 973
 974     def parse_outtmpl(self):
 975         outtmpl_dict = self.params.get('outtmpl', {})
 976         if not isinstance(outtmpl_dict, dict):
 977             outtmpl_dict = {'default': outtmpl_dict}
 978         # Remove spaces in the default template
 979         if self.params.get('restrictfilenames'):
 980             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 981         else:
 982             sanitize = lambda x: x
 983         outtmpl_dict.update({
 984             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 985             if outtmpl_dict.get(k) is None})
 986         for key, val in outtmpl_dict.items():
 987             if isinstance(val, bytes):
 988                 self.report_warning(
 989                     'Parameter outtmpl is bytes, but should be a unicode string. '
 990                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 991         return outtmpl_dict
 992
 993     def get_output_path(self, dir_type='', filename=None):
 994         paths = self.params.get('paths', {})
 995         assert isinstance(paths, dict)
 996         path = os.path.join(
 997             expand_path(paths.get('home', '').strip()),
 998             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 999             filename or '')
1000
1001         # Temporary fix for #4787
1002         # 'Treat' all problem characters by passing filename through preferredencoding
1003         # to workaround encoding issues with subprocess on python2 @ Windows
1004         if sys.version_info < (3, 0) and sys.platform == 'win32':
1005             path = encodeFilename(path, True).decode(preferredencoding())
1006         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1007
1008     @staticmethod
1009     def _outtmpl_expandpath(outtmpl):
1010         # expand_path translates '%%' into '%' and '$$' into '$'
1011         # correspondingly that is not what we want since we need to keep
1012         # '%%' intact for template dict substitution step. Working around
1013         # with boundary-alike separator hack.
1014         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1015         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1016
1017         # outtmpl should be expand_path'ed before template dict substitution
1018         # because meta fields may contain env variables we don't want to
1019         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1020         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1021         return expand_path(outtmpl).replace(sep, '')
1022
1023     @staticmethod
1024     def escape_outtmpl(outtmpl):
1025         ''' Escape any remaining strings like %s, %abc% etc. '''
1026         return re.sub(
1027             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1028             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1029             outtmpl)
1030
1031     @classmethod
1032     def validate_outtmpl(cls, outtmpl):
1033         ''' @return None or Exception object '''
1034         outtmpl = re.sub(
1035             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1036             lambda mobj: f'{mobj.group(0)[:-1]}s',
1037             cls._outtmpl_expandpath(outtmpl))
1038         try:
1039             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1040             return None
1041         except ValueError as err:
1042             return err
1043
1044     @staticmethod
1045     def _copy_infodict(info_dict):
1046         info_dict = dict(info_dict)
1047         info_dict.pop('__postprocessors', None)
1048         return info_dict
1049
1050     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1051         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1052         @param sanitize    Whether to sanitize the output as a filename.
1053                            For backward compatibility, a function can also be passed
1054         """
1055
1056         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1057
1058         info_dict = self._copy_infodict(info_dict)
1059         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1060             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1061             if info_dict.get('duration', None) is not None
1062             else None)
1063         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1064         info_dict['video_autonumber'] = self._num_videos
1065         if info_dict.get('resolution') is None:
1066             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1067
1068         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1069         # of %(field)s to %(field)0Nd for backward compatibility
1070         field_size_compat_map = {
1071             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1072             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1073             'autonumber': self.params.get('autonumber_size') or 5,
1074         }
1075
1076         TMPL_DICT = {}
1077         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1078         MATH_FUNCTIONS = {
1079             '+': float.__add__,
1080             '-': float.__sub__,
1081         }
1082         # Field is of the form key1.key2...
1083         # where keys (except first) can be string, int or slice
1084         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1085         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1086         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1087         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1088             (?P<negate>-)?
1089             (?P<fields>{field})
1090             (?P<maths>(?:{math_op}{math_field})*)
1091             (?:>(?P<strf_format>.+?))?
1092             (?P<alternate>(?<!\\),[^|&)]+)?
1093             (?:&(?P<replacement>.*?))?
1094             (?:\|(?P<default>.*?))?
1095             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1096
1097         def _traverse_infodict(k):
1098             k = k.split('.')
1099             if k[0] == '':
1100                 k.pop(0)
1101             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1102
1103         def get_value(mdict):
1104             # Object traversal
1105             value = _traverse_infodict(mdict['fields'])
1106             # Negative
1107             if mdict['negate']:
1108                 value = float_or_none(value)
1109                 if value is not None:
1110                     value *= -1
1111             # Do maths
1112             offset_key = mdict['maths']
1113             if offset_key:
1114                 value = float_or_none(value)
1115                 operator = None
1116                 while offset_key:
1117                     item = re.match(
1118                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1119                         offset_key).group(0)
1120                     offset_key = offset_key[len(item):]
1121                     if operator is None:
1122                         operator = MATH_FUNCTIONS[item]
1123                         continue
1124                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1125                     offset = float_or_none(item)
1126                     if offset is None:
1127                         offset = float_or_none(_traverse_infodict(item))
1128                     try:
1129                         value = operator(value, multiplier * offset)
1130                     except (TypeError, ZeroDivisionError):
1131                         return None
1132                     operator = None
1133             # Datetime formatting
1134             if mdict['strf_format']:
1135                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1136
1137             return value
1138
1139         na = self.params.get('outtmpl_na_placeholder', 'NA')
1140
1141         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1142             return sanitize_filename(str(value), restricted=restricted,
1143                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1144
1145         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1146         sanitize = bool(sanitize)
1147
1148         def _dumpjson_default(obj):
1149             if isinstance(obj, (set, LazyList)):
1150                 return list(obj)
1151             return repr(obj)
1152
1153         def create_key(outer_mobj):
1154             if not outer_mobj.group('has_key'):
1155                 return outer_mobj.group(0)
1156             key = outer_mobj.group('key')
1157             mobj = re.match(INTERNAL_FORMAT_RE, key)
1158             initial_field = mobj.group('fields') if mobj else ''
1159             value, replacement, default = None, None, na
1160             while mobj:
1161                 mobj = mobj.groupdict()
1162                 default = mobj['default'] if mobj['default'] is not None else default
1163                 value = get_value(mobj)
1164                 replacement = mobj['replacement']
1165                 if value is None and mobj['alternate']:
1166                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1167                 else:
1168                     break
1169
1170             fmt = outer_mobj.group('format')
1171             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1172                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1173
1174             value = default if value is None else value if replacement is None else replacement
1175
1176             flags = outer_mobj.group('conversion') or ''
1177             str_fmt = f'{fmt[:-1]}s'
1178             if fmt[-1] == 'l':  # list
1179                 delim = '\n' if '#' in flags else ', '
1180                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1181             elif fmt[-1] == 'j':  # json
1182                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1183             elif fmt[-1] == 'q':  # quoted
1184                 value = map(str, variadic(value) if '#' in flags else [value])
1185                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1186             elif fmt[-1] == 'B':  # bytes
1187                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1188                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1189             elif fmt[-1] == 'U':  # unicode normalized
1190                 value, fmt = unicodedata.normalize(
1191                     # "+" = compatibility equivalence, "#" = NFD
1192                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1193                     value), str_fmt
1194             elif fmt[-1] == 'D':  # decimal suffix
1195                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1196                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1197                                               factor=1024 if '#' in flags else 1000)
1198             elif fmt[-1] == 'S':  # filename sanitization
1199                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1200             elif fmt[-1] == 'c':
1201                 if value:
1202                     value = str(value)[0]
1203                 else:
1204                     fmt = str_fmt
1205             elif fmt[-1] not in 'rs':  # numeric
1206                 value = float_or_none(value)
1207                 if value is None:
1208                     value, fmt = default, 's'
1209
1210             if sanitize:
1211                 if fmt[-1] == 'r':
1212                     # If value is an object, sanitize might convert it to a string
1213                     # So we convert it to repr first
1214                     value, fmt = repr(value), str_fmt
1215                 if fmt[-1] in 'csr':
1216                     value = sanitizer(initial_field, value)
1217
1218             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1219             TMPL_DICT[key] = value
1220             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1221
1222         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1223
1224     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1225         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1226         return self.escape_outtmpl(outtmpl) % info_dict
1227
1228     def _prepare_filename(self, info_dict, tmpl_type='default'):
1229         try:
1230             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1231             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1232             if not filename:
1233                 return None
1234
1235             if tmpl_type in ('default', 'temp'):
1236                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1237                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1238                     filename = replace_extension(filename, ext, final_ext)
1239             else:
1240                 force_ext = OUTTMPL_TYPES[tmpl_type]
1241                 if force_ext:
1242                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1243
1244             # https://github.com/blackjack4494/youtube-dlc/issues/85
1245             trim_file_name = self.params.get('trim_file_name', False)
1246             if trim_file_name:
1247                 no_ext, *ext = filename.rsplit('.', 2)
1248                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1249
1250             return filename
1251         except ValueError as err:
1252             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1253             return None
1254
1255     def prepare_filename(self, info_dict, dir_type='', warn=False):
1256         """Generate the output filename."""
1257
1258         filename = self._prepare_filename(info_dict, dir_type or 'default')
1259         if not filename and dir_type not in ('', 'temp'):
1260             return ''
1261
1262         if warn:
1263             if not self.params.get('paths'):
1264                 pass
1265             elif filename == '-':
1266                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1267             elif os.path.isabs(filename):
1268                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1269         if filename == '-' or not filename:
1270             return filename
1271
1272         return self.get_output_path(dir_type, filename)
1273
1274     def _match_entry(self, info_dict, incomplete=False, silent=False):
1275         """ Returns None if the file should be downloaded """
1276
1277         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1278
1279         def check_filter():
1280             if 'title' in info_dict:
1281                 # This can happen when we're just evaluating the playlist
1282                 title = info_dict['title']
1283                 matchtitle = self.params.get('matchtitle', False)
1284                 if matchtitle:
1285                     if not re.search(matchtitle, title, re.IGNORECASE):
1286                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1287                 rejecttitle = self.params.get('rejecttitle', False)
1288                 if rejecttitle:
1289                     if re.search(rejecttitle, title, re.IGNORECASE):
1290                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1291             date = info_dict.get('upload_date')
1292             if date is not None:
1293                 dateRange = self.params.get('daterange', DateRange())
1294                 if date not in dateRange:
1295                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1296             view_count = info_dict.get('view_count')
1297             if view_count is not None:
1298                 min_views = self.params.get('min_views')
1299                 if min_views is not None and view_count < min_views:
1300                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1301                 max_views = self.params.get('max_views')
1302                 if max_views is not None and view_count > max_views:
1303                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1304             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1305                 return 'Skipping "%s" because it is age restricted' % video_title
1306
1307             match_filter = self.params.get('match_filter')
1308             if match_filter is not None:
1309                 try:
1310                     ret = match_filter(info_dict, incomplete=incomplete)
1311                 except TypeError:
1312                     # For backward compatibility
1313                     ret = None if incomplete else match_filter(info_dict)
1314                 if ret is not None:
1315                     return ret
1316             return None
1317
1318         if self.in_download_archive(info_dict):
1319             reason = '%s has already been recorded in the archive' % video_title
1320             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1321         else:
1322             reason = check_filter()
1323             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1324         if reason is not None:
1325             if not silent:
1326                 self.to_screen('[download] ' + reason)
1327             if self.params.get(break_opt, False):
1328                 raise break_err()
1329         return reason
1330
1331     @staticmethod
1332     def add_extra_info(info_dict, extra_info):
1333         '''Set the keys from extra_info in info dict if they are missing'''
1334         for key, value in extra_info.items():
1335             info_dict.setdefault(key, value)
1336
1337     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1338                      process=True, force_generic_extractor=False):
1339         """
1340         Return a list with a dictionary for each video extracted.
1341
1342         Arguments:
1343         url -- URL to extract
1344
1345         Keyword arguments:
1346         download -- whether to download videos during extraction
1347         ie_key -- extractor key hint
1348         extra_info -- dictionary containing the extra values to add to each result
1349         process -- whether to resolve all unresolved references (URLs, playlist items),
1350             must be True for download to work.
1351         force_generic_extractor -- force using the generic extractor
1352         """
1353
1354         if extra_info is None:
1355             extra_info = {}
1356
1357         if not ie_key and force_generic_extractor:
1358             ie_key = 'Generic'
1359
1360         if ie_key:
1361             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1362         else:
1363             ies = self._ies
1364
1365         for ie_key, ie in ies.items():
1366             if not ie.suitable(url):
1367                 continue
1368
1369             if not ie.working():
1370                 self.report_warning('The program functionality for this site has been marked as broken, '
1371                                     'and will probably not work.')
1372
1373             temp_id = ie.get_temp_id(url)
1374             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1375                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1376                 if self.params.get('break_on_existing', False):
1377                     raise ExistingVideoReached()
1378                 break
1379             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1380         else:
1381             self.report_error('no suitable InfoExtractor for URL %s' % url)
1382
1383     def __handle_extraction_exceptions(func):
1384         @functools.wraps(func)
1385         def wrapper(self, *args, **kwargs):
1386             while True:
1387                 try:
1388                     return func(self, *args, **kwargs)
1389                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1390                     raise
1391                 except ReExtractInfo as e:
1392                     if e.expected:
1393                         self.to_screen(f'{e}; Re-extracting data')
1394                     else:
1395                         self.to_stderr('\r')
1396                         self.report_warning(f'{e}; Re-extracting data')
1397                     continue
1398                 except GeoRestrictedError as e:
1399                     msg = e.msg
1400                     if e.countries:
1401                         msg += '\nThis video is available in %s.' % ', '.join(
1402                             map(ISO3166Utils.short2full, e.countries))
1403                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1404                     self.report_error(msg)
1405                 except ExtractorError as e:  # An error we somewhat expected
1406                     self.report_error(str(e), e.format_traceback())
1407                 except Exception as e:
1408                     if self.params.get('ignoreerrors'):
1409                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1410                     else:
1411                         raise
1412                 break
1413         return wrapper
1414
1415     def _wait_for_video(self, ie_result):
1416         if (not self.params.get('wait_for_video')
1417                 or ie_result.get('_type', 'video') != 'video'
1418                 or ie_result.get('formats') or ie_result.get('url')):
1419             return
1420
1421         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1422         last_msg = ''
1423
1424         def progress(msg):
1425             nonlocal last_msg
1426             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1427             last_msg = msg
1428
1429         min_wait, max_wait = self.params.get('wait_for_video')
1430         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1431         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1432             diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1433             self.report_warning('Release time of video is not known')
1434         elif (diff or 0) <= 0:
1435             self.report_warning('Video should already be available according to extracted info')
1436         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1437         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1438
1439         wait_till = time.time() + diff
1440         try:
1441             while True:
1442                 diff = wait_till - time.time()
1443                 if diff <= 0:
1444                     progress('')
1445                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1446                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1447                 time.sleep(1)
1448         except KeyboardInterrupt:
1449             progress('')
1450             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1451         except BaseException as e:
1452             if not isinstance(e, ReExtractInfo):
1453                 self.to_screen('')
1454             raise
1455
1456     @__handle_extraction_exceptions
1457     def __extract_info(self, url, ie, download, extra_info, process):
1458         ie_result = ie.extract(url)
1459         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1460             return
1461         if isinstance(ie_result, list):
1462             # Backwards compatibility: old IE result format
1463             ie_result = {
1464                 '_type': 'compat_list',
1465                 'entries': ie_result,
1466             }
1467         if extra_info.get('original_url'):
1468             ie_result.setdefault('original_url', extra_info['original_url'])
1469         self.add_default_extra_info(ie_result, ie, url)
1470         if process:
1471             self._wait_for_video(ie_result)
1472             return self.process_ie_result(ie_result, download, extra_info)
1473         else:
1474             return ie_result
1475
1476     def add_default_extra_info(self, ie_result, ie, url):
1477         if url is not None:
1478             self.add_extra_info(ie_result, {
1479                 'webpage_url': url,
1480                 'original_url': url,
1481             })
1482         webpage_url = ie_result.get('webpage_url')
1483         if webpage_url:
1484             self.add_extra_info(ie_result, {
1485                 'webpage_url_basename': url_basename(webpage_url),
1486                 'webpage_url_domain': get_domain(webpage_url),
1487             })
1488         if ie is not None:
1489             self.add_extra_info(ie_result, {
1490                 'extractor': ie.IE_NAME,
1491                 'extractor_key': ie.ie_key(),
1492             })
1493
1494     def process_ie_result(self, ie_result, download=True, extra_info=None):
1495         """
1496         Take the result of the ie(may be modified) and resolve all unresolved
1497         references (URLs, playlist items).
1498
1499         It will also download the videos if 'download'.
1500         Returns the resolved ie_result.
1501         """
1502         if extra_info is None:
1503             extra_info = {}
1504         result_type = ie_result.get('_type', 'video')
1505
1506         if result_type in ('url', 'url_transparent'):
1507             ie_result['url'] = sanitize_url(ie_result['url'])
1508             if ie_result.get('original_url'):
1509                 extra_info.setdefault('original_url', ie_result['original_url'])
1510
1511             extract_flat = self.params.get('extract_flat', False)
1512             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1513                     or extract_flat is True):
1514                 info_copy = ie_result.copy()
1515                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1516                 if ie and not ie_result.get('id'):
1517                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1518                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1519                 self.add_extra_info(info_copy, extra_info)
1520                 info_copy, _ = self.pre_process(info_copy)
1521                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1522                 if self.params.get('force_write_download_archive', False):
1523                     self.record_download_archive(info_copy)
1524                 return ie_result
1525
1526         if result_type == 'video':
1527             self.add_extra_info(ie_result, extra_info)
1528             ie_result = self.process_video_result(ie_result, download=download)
1529             additional_urls = (ie_result or {}).get('additional_urls')
1530             if additional_urls:
1531                 # TODO: Improve MetadataParserPP to allow setting a list
1532                 if isinstance(additional_urls, compat_str):
1533                     additional_urls = [additional_urls]
1534                 self.to_screen(
1535                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1536                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1537                 ie_result['additional_entries'] = [
1538                     self.extract_info(
1539                         url, download, extra_info=extra_info,
1540                         force_generic_extractor=self.params.get('force_generic_extractor'))
1541                     for url in additional_urls
1542                 ]
1543             return ie_result
1544         elif result_type == 'url':
1545             # We have to add extra_info to the results because it may be
1546             # contained in a playlist
1547             return self.extract_info(
1548                 ie_result['url'], download,
1549                 ie_key=ie_result.get('ie_key'),
1550                 extra_info=extra_info)
1551         elif result_type == 'url_transparent':
1552             # Use the information from the embedding page
1553             info = self.extract_info(
1554                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1555                 extra_info=extra_info, download=False, process=False)
1556
1557             # extract_info may return None when ignoreerrors is enabled and
1558             # extraction failed with an error, don't crash and return early
1559             # in this case
1560             if not info:
1561                 return info
1562
1563             force_properties = dict(
1564                 (k, v) for k, v in ie_result.items() if v is not None)
1565             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1566                 if f in force_properties:
1567                     del force_properties[f]
1568             new_result = info.copy()
1569             new_result.update(force_properties)
1570
1571             # Extracted info may not be a video result (i.e.
1572             # info.get('_type', 'video') != video) but rather an url or
1573             # url_transparent. In such cases outer metadata (from ie_result)
1574             # should be propagated to inner one (info). For this to happen
1575             # _type of info should be overridden with url_transparent. This
1576             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1577             if new_result.get('_type') == 'url':
1578                 new_result['_type'] = 'url_transparent'
1579
1580             return self.process_ie_result(
1581                 new_result, download=download, extra_info=extra_info)
1582         elif result_type in ('playlist', 'multi_video'):
1583             # Protect from infinite recursion due to recursively nested playlists
1584             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1585             webpage_url = ie_result['webpage_url']
1586             if webpage_url in self._playlist_urls:
1587                 self.to_screen(
1588                     '[download] Skipping already downloaded playlist: %s'
1589                     % ie_result.get('title') or ie_result.get('id'))
1590                 return
1591
1592             self._playlist_level += 1
1593             self._playlist_urls.add(webpage_url)
1594             self._fill_common_fields(ie_result, False)
1595             self._sanitize_thumbnails(ie_result)
1596             try:
1597                 return self.__process_playlist(ie_result, download)
1598             finally:
1599                 self._playlist_level -= 1
1600                 if not self._playlist_level:
1601                     self._playlist_urls.clear()
1602         elif result_type == 'compat_list':
1603             self.report_warning(
1604                 'Extractor %s returned a compat_list result. '
1605                 'It needs to be updated.' % ie_result.get('extractor'))
1606
1607             def _fixup(r):
1608                 self.add_extra_info(r, {
1609                     'extractor': ie_result['extractor'],
1610                     'webpage_url': ie_result['webpage_url'],
1611                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1612                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1613                     'extractor_key': ie_result['extractor_key'],
1614                 })
1615                 return r
1616             ie_result['entries'] = [
1617                 self.process_ie_result(_fixup(r), download, extra_info)
1618                 for r in ie_result['entries']
1619             ]
1620             return ie_result
1621         else:
1622             raise Exception('Invalid result type: %s' % result_type)
1623
1624     def _ensure_dir_exists(self, path):
1625         return make_dir(path, self.report_error)
1626
1627     @staticmethod
1628     def _playlist_infodict(ie_result, **kwargs):
1629         return {
1630             **ie_result,
1631             'playlist': ie_result.get('title') or ie_result.get('id'),
1632             'playlist_id': ie_result.get('id'),
1633             'playlist_title': ie_result.get('title'),
1634             'playlist_uploader': ie_result.get('uploader'),
1635             'playlist_uploader_id': ie_result.get('uploader_id'),
1636             'playlist_index': 0,
1637             **kwargs,
1638         }
1639
1640     def __process_playlist(self, ie_result, download):
1641         # We process each entry in the playlist
1642         playlist = ie_result.get('title') or ie_result.get('id')
1643         self.to_screen('[download] Downloading playlist: %s' % playlist)
1644
1645         if 'entries' not in ie_result:
1646             raise EntryNotInPlaylist('There are no entries')
1647
1648         MissingEntry = object()
1649         incomplete_entries = bool(ie_result.get('requested_entries'))
1650         if incomplete_entries:
1651             def fill_missing_entries(entries, indices):
1652                 ret = [MissingEntry] * max(indices)
1653                 for i, entry in zip(indices, entries):
1654                     ret[i - 1] = entry
1655                 return ret
1656             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1657
1658         playlist_results = []
1659
1660         playliststart = self.params.get('playliststart', 1)
1661         playlistend = self.params.get('playlistend')
1662         # For backwards compatibility, interpret -1 as whole list
1663         if playlistend == -1:
1664             playlistend = None
1665
1666         playlistitems_str = self.params.get('playlist_items')
1667         playlistitems = None
1668         if playlistitems_str is not None:
1669             def iter_playlistitems(format):
1670                 for string_segment in format.split(','):
1671                     if '-' in string_segment:
1672                         start, end = string_segment.split('-')
1673                         for item in range(int(start), int(end) + 1):
1674                             yield int(item)
1675                     else:
1676                         yield int(string_segment)
1677             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1678
1679         ie_entries = ie_result['entries']
1680         if isinstance(ie_entries, list):
1681             playlist_count = len(ie_entries)
1682             msg = f'Collected {playlist_count} videos; downloading %d of them'
1683             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1684
1685             def get_entry(i):
1686                 return ie_entries[i - 1]
1687         else:
1688             msg = 'Downloading %d videos'
1689             if not isinstance(ie_entries, (PagedList, LazyList)):
1690                 ie_entries = LazyList(ie_entries)
1691             elif isinstance(ie_entries, InAdvancePagedList):
1692                 if ie_entries._pagesize == 1:
1693                     playlist_count = ie_entries._pagecount
1694
1695             def get_entry(i):
1696                 return YoutubeDL.__handle_extraction_exceptions(
1697                     lambda self, i: ie_entries[i - 1]
1698                 )(self, i)
1699
1700         entries, broken = [], False
1701         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1702         for i in items:
1703             if i == 0:
1704                 continue
1705             if playlistitems is None and playlistend is not None and playlistend < i:
1706                 break
1707             entry = None
1708             try:
1709                 entry = get_entry(i)
1710                 if entry is MissingEntry:
1711                     raise EntryNotInPlaylist()
1712             except (IndexError, EntryNotInPlaylist):
1713                 if incomplete_entries:
1714                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1715                 elif not playlistitems:
1716                     break
1717             entries.append(entry)
1718             try:
1719                 if entry is not None:
1720                     self._match_entry(entry, incomplete=True, silent=True)
1721             except (ExistingVideoReached, RejectedVideoReached):
1722                 broken = True
1723                 break
1724         ie_result['entries'] = entries
1725
1726         # Save playlist_index before re-ordering
1727         entries = [
1728             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1729             for i, entry in enumerate(entries, 1)
1730             if entry is not None]
1731         n_entries = len(entries)
1732
1733         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1734             ie_result['playlist_count'] = n_entries
1735
1736         if not playlistitems and (playliststart != 1 or playlistend):
1737             playlistitems = list(range(playliststart, playliststart + n_entries))
1738         ie_result['requested_entries'] = playlistitems
1739
1740         _infojson_written = False
1741         write_playlist_files = self.params.get('allow_playlist_files', True)
1742         if write_playlist_files and self.params.get('list_thumbnails'):
1743             self.list_thumbnails(ie_result)
1744         if write_playlist_files and not self.params.get('simulate'):
1745             ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1746             _infojson_written = self._write_info_json(
1747                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1748             if _infojson_written is None:
1749                 return
1750             if self._write_description('playlist', ie_result,
1751                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1752                 return
1753             # TODO: This should be passed to ThumbnailsConvertor if necessary
1754             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1755
1756         if self.params.get('playlistreverse', False):
1757             entries = entries[::-1]
1758         if self.params.get('playlistrandom', False):
1759             random.shuffle(entries)
1760
1761         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1762
1763         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1764         failures = 0
1765         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1766         for i, entry_tuple in enumerate(entries, 1):
1767             playlist_index, entry = entry_tuple
1768             if 'playlist-index' in self.params.get('compat_opts', []):
1769                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1770             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1771             # This __x_forwarded_for_ip thing is a bit ugly but requires
1772             # minimal changes
1773             if x_forwarded_for:
1774                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1775             extra = {
1776                 'n_entries': n_entries,
1777                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1778                 'playlist_count': ie_result.get('playlist_count'),
1779                 'playlist_index': playlist_index,
1780                 'playlist_autonumber': i,
1781                 'playlist': playlist,
1782                 'playlist_id': ie_result.get('id'),
1783                 'playlist_title': ie_result.get('title'),
1784                 'playlist_uploader': ie_result.get('uploader'),
1785                 'playlist_uploader_id': ie_result.get('uploader_id'),
1786                 'extractor': ie_result['extractor'],
1787                 'webpage_url': ie_result['webpage_url'],
1788                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1789                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1790                 'extractor_key': ie_result['extractor_key'],
1791             }
1792
1793             if self._match_entry(entry, incomplete=True) is not None:
1794                 continue
1795
1796             entry_result = self.__process_iterable_entry(entry, download, extra)
1797             if not entry_result:
1798                 failures += 1
1799             if failures >= max_failures:
1800                 self.report_error(
1801                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1802                 break
1803             playlist_results.append(entry_result)
1804         ie_result['entries'] = playlist_results
1805
1806         # Write the updated info to json
1807         if _infojson_written and self._write_info_json(
1808                 'updated playlist', ie_result,
1809                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1810             return
1811
1812         ie_result = self.run_all_pps('playlist', ie_result)
1813         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1814         return ie_result
1815
1816     @__handle_extraction_exceptions
1817     def __process_iterable_entry(self, entry, download, extra_info):
1818         return self.process_ie_result(
1819             entry, download=download, extra_info=extra_info)
1820
1821     def _build_format_filter(self, filter_spec):
1822         " Returns a function to filter the formats according to the filter_spec "
1823
1824         OPERATORS = {
1825             '<': operator.lt,
1826             '<=': operator.le,
1827             '>': operator.gt,
1828             '>=': operator.ge,
1829             '=': operator.eq,
1830             '!=': operator.ne,
1831         }
1832         operator_rex = re.compile(r'''(?x)\s*
1833             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1834             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1835             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1836             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1837         m = operator_rex.fullmatch(filter_spec)
1838         if m:
1839             try:
1840                 comparison_value = int(m.group('value'))
1841             except ValueError:
1842                 comparison_value = parse_filesize(m.group('value'))
1843                 if comparison_value is None:
1844                     comparison_value = parse_filesize(m.group('value') + 'B')
1845                 if comparison_value is None:
1846                     raise ValueError(
1847                         'Invalid value %r in format specification %r' % (
1848                             m.group('value'), filter_spec))
1849             op = OPERATORS[m.group('op')]
1850
1851         if not m:
1852             STR_OPERATORS = {
1853                 '=': operator.eq,
1854                 '^=': lambda attr, value: attr.startswith(value),
1855                 '$=': lambda attr, value: attr.endswith(value),
1856                 '*=': lambda attr, value: value in attr,
1857                 '~=': lambda attr, value: value.search(attr) is not None
1858             }
1859             str_operator_rex = re.compile(r'''(?x)\s*
1860                 (?P<key>[a-zA-Z0-9._-]+)\s*
1861                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1862                 (?P<quote>["'])?
1863                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1864                 (?(quote)(?P=quote))\s*
1865                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1866             m = str_operator_rex.fullmatch(filter_spec)
1867             if m:
1868                 if m.group('op') == '~=':
1869                     comparison_value = re.compile(m.group('value'))
1870                 else:
1871                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1872                 str_op = STR_OPERATORS[m.group('op')]
1873                 if m.group('negation'):
1874                     op = lambda attr, value: not str_op(attr, value)
1875                 else:
1876                     op = str_op
1877
1878         if not m:
1879             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1880
1881         def _filter(f):
1882             actual_value = f.get(m.group('key'))
1883             if actual_value is None:
1884                 return m.group('none_inclusive')
1885             return op(actual_value, comparison_value)
1886         return _filter
1887
1888     def _check_formats(self, formats):
1889         for f in formats:
1890             self.to_screen('[info] Testing format %s' % f['format_id'])
1891             path = self.get_output_path('temp')
1892             if not self._ensure_dir_exists(f'{path}/'):
1893                 continue
1894             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1895             temp_file.close()
1896             try:
1897                 success, _ = self.dl(temp_file.name, f, test=True)
1898             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1899                 success = False
1900             finally:
1901                 if os.path.exists(temp_file.name):
1902                     try:
1903                         os.remove(temp_file.name)
1904                     except OSError:
1905                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1906             if success:
1907                 yield f
1908             else:
1909                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1910
1911     def _default_format_spec(self, info_dict, download=True):
1912
1913         def can_merge():
1914             merger = FFmpegMergerPP(self)
1915             return merger.available and merger.can_merge()
1916
1917         prefer_best = (
1918             not self.params.get('simulate')
1919             and download
1920             and (
1921                 not can_merge()
1922                 or info_dict.get('is_live', False)
1923                 or self.outtmpl_dict['default'] == '-'))
1924         compat = (
1925             prefer_best
1926             or self.params.get('allow_multiple_audio_streams', False)
1927             or 'format-spec' in self.params.get('compat_opts', []))
1928
1929         return (
1930             'best/bestvideo+bestaudio' if prefer_best
1931             else 'bestvideo*+bestaudio/best' if not compat
1932             else 'bestvideo+bestaudio/best')
1933
1934     def build_format_selector(self, format_spec):
1935         def syntax_error(note, start):
1936             message = (
1937                 'Invalid format specification: '
1938                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1939             return SyntaxError(message)
1940
1941         PICKFIRST = 'PICKFIRST'
1942         MERGE = 'MERGE'
1943         SINGLE = 'SINGLE'
1944         GROUP = 'GROUP'
1945         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1946
1947         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1948                                   'video': self.params.get('allow_multiple_video_streams', False)}
1949
1950         check_formats = self.params.get('check_formats') == 'selected'
1951
1952         def _parse_filter(tokens):
1953             filter_parts = []
1954             for type, string, start, _, _ in tokens:
1955                 if type == tokenize.OP and string == ']':
1956                     return ''.join(filter_parts)
1957                 else:
1958                     filter_parts.append(string)
1959
1960         def _remove_unused_ops(tokens):
1961             # Remove operators that we don't use and join them with the surrounding strings
1962             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1963             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1964             last_string, last_start, last_end, last_line = None, None, None, None
1965             for type, string, start, end, line in tokens:
1966                 if type == tokenize.OP and string == '[':
1967                     if last_string:
1968                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1969                         last_string = None
1970                     yield type, string, start, end, line
1971                     # everything inside brackets will be handled by _parse_filter
1972                     for type, string, start, end, line in tokens:
1973                         yield type, string, start, end, line
1974                         if type == tokenize.OP and string == ']':
1975                             break
1976                 elif type == tokenize.OP and string in ALLOWED_OPS:
1977                     if last_string:
1978                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1979                         last_string = None
1980                     yield type, string, start, end, line
1981                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1982                     if not last_string:
1983                         last_string = string
1984                         last_start = start
1985                         last_end = end
1986                     else:
1987                         last_string += string
1988             if last_string:
1989                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1990
1991         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1992             selectors = []
1993             current_selector = None
1994             for type, string, start, _, _ in tokens:
1995                 # ENCODING is only defined in python 3.x
1996                 if type == getattr(tokenize, 'ENCODING', None):
1997                     continue
1998                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1999                     current_selector = FormatSelector(SINGLE, string, [])
2000                 elif type == tokenize.OP:
2001                     if string == ')':
2002                         if not inside_group:
2003                             # ')' will be handled by the parentheses group
2004                             tokens.restore_last_token()
2005                         break
2006                     elif inside_merge and string in ['/', ',']:
2007                         tokens.restore_last_token()
2008                         break
2009                     elif inside_choice and string == ',':
2010                         tokens.restore_last_token()
2011                         break
2012                     elif string == ',':
2013                         if not current_selector:
2014                             raise syntax_error('"," must follow a format selector', start)
2015                         selectors.append(current_selector)
2016                         current_selector = None
2017                     elif string == '/':
2018                         if not current_selector:
2019                             raise syntax_error('"/" must follow a format selector', start)
2020                         first_choice = current_selector
2021                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2022                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2023                     elif string == '[':
2024                         if not current_selector:
2025                             current_selector = FormatSelector(SINGLE, 'best', [])
2026                         format_filter = _parse_filter(tokens)
2027                         current_selector.filters.append(format_filter)
2028                     elif string == '(':
2029                         if current_selector:
2030                             raise syntax_error('Unexpected "("', start)
2031                         group = _parse_format_selection(tokens, inside_group=True)
2032                         current_selector = FormatSelector(GROUP, group, [])
2033                     elif string == '+':
2034                         if not current_selector:
2035                             raise syntax_error('Unexpected "+"', start)
2036                         selector_1 = current_selector
2037                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2038                         if not selector_2:
2039                             raise syntax_error('Expected a selector', start)
2040                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2041                     else:
2042                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2043                 elif type == tokenize.ENDMARKER:
2044                     break
2045             if current_selector:
2046                 selectors.append(current_selector)
2047             return selectors
2048
2049         def _merge(formats_pair):
2050             format_1, format_2 = formats_pair
2051
2052             formats_info = []
2053             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2054             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2055
2056             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2057                 get_no_more = {'video': False, 'audio': False}
2058                 for (i, fmt_info) in enumerate(formats_info):
2059                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2060                         formats_info.pop(i)
2061                         continue
2062                     for aud_vid in ['audio', 'video']:
2063                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2064                             if get_no_more[aud_vid]:
2065                                 formats_info.pop(i)
2066                                 break
2067                             get_no_more[aud_vid] = True
2068
2069             if len(formats_info) == 1:
2070                 return formats_info[0]
2071
2072             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2073             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2074
2075             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2076             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2077
2078             output_ext = self.params.get('merge_output_format')
2079             if not output_ext:
2080                 if the_only_video:
2081                     output_ext = the_only_video['ext']
2082                 elif the_only_audio and not video_fmts:
2083                     output_ext = the_only_audio['ext']
2084                 else:
2085                     output_ext = 'mkv'
2086
2087             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2088
2089             new_dict = {
2090                 'requested_formats': formats_info,
2091                 'format': '+'.join(filtered('format')),
2092                 'format_id': '+'.join(filtered('format_id')),
2093                 'ext': output_ext,
2094                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2095                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2096                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2097                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2098                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2099             }
2100
2101             if the_only_video:
2102                 new_dict.update({
2103                     'width': the_only_video.get('width'),
2104                     'height': the_only_video.get('height'),
2105                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2106                     'fps': the_only_video.get('fps'),
2107                     'dynamic_range': the_only_video.get('dynamic_range'),
2108                     'vcodec': the_only_video.get('vcodec'),
2109                     'vbr': the_only_video.get('vbr'),
2110                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2111                 })
2112
2113             if the_only_audio:
2114                 new_dict.update({
2115                     'acodec': the_only_audio.get('acodec'),
2116                     'abr': the_only_audio.get('abr'),
2117                     'asr': the_only_audio.get('asr'),
2118                 })
2119
2120             return new_dict
2121
2122         def _check_formats(formats):
2123             if not check_formats:
2124                 yield from formats
2125                 return
2126             yield from self._check_formats(formats)
2127
2128         def _build_selector_function(selector):
2129             if isinstance(selector, list):  # ,
2130                 fs = [_build_selector_function(s) for s in selector]
2131
2132                 def selector_function(ctx):
2133                     for f in fs:
2134                         yield from f(ctx)
2135                 return selector_function
2136
2137             elif selector.type == GROUP:  # ()
2138                 selector_function = _build_selector_function(selector.selector)
2139
2140             elif selector.type == PICKFIRST:  # /
2141                 fs = [_build_selector_function(s) for s in selector.selector]
2142
2143                 def selector_function(ctx):
2144                     for f in fs:
2145                         picked_formats = list(f(ctx))
2146                         if picked_formats:
2147                             return picked_formats
2148                     return []
2149
2150             elif selector.type == MERGE:  # +
2151                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2152
2153                 def selector_function(ctx):
2154                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2155                         yield _merge(pair)
2156
2157             elif selector.type == SINGLE:  # atom
2158                 format_spec = selector.selector or 'best'
2159
2160                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2161                 if format_spec == 'all':
2162                     def selector_function(ctx):
2163                         yield from _check_formats(ctx['formats'][::-1])
2164                 elif format_spec == 'mergeall':
2165                     def selector_function(ctx):
2166                         formats = list(_check_formats(ctx['formats']))
2167                         if not formats:
2168                             return
2169                         merged_format = formats[-1]
2170                         for f in formats[-2::-1]:
2171                             merged_format = _merge((merged_format, f))
2172                         yield merged_format
2173
2174                 else:
2175                     format_fallback, format_reverse, format_idx = False, True, 1
2176                     mobj = re.match(
2177                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2178                         format_spec)
2179                     if mobj is not None:
2180                         format_idx = int_or_none(mobj.group('n'), default=1)
2181                         format_reverse = mobj.group('bw')[0] == 'b'
2182                         format_type = (mobj.group('type') or [None])[0]
2183                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2184                         format_modified = mobj.group('mod') is not None
2185
2186                         format_fallback = not format_type and not format_modified  # for b, w
2187                         _filter_f = (
2188                             (lambda f: f.get('%scodec' % format_type) != 'none')
2189                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2190                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2191                             if format_type  # bv, ba, wv, wa
2192                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2193                             if not format_modified  # b, w
2194                             else lambda f: True)  # b*, w*
2195                         filter_f = lambda f: _filter_f(f) and (
2196                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2197                     else:
2198                         if format_spec in self._format_selection_exts['audio']:
2199                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2200                         elif format_spec in self._format_selection_exts['video']:
2201                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2202                         elif format_spec in self._format_selection_exts['storyboards']:
2203                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2204                         else:
2205                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2206
2207                     def selector_function(ctx):
2208                         formats = list(ctx['formats'])
2209                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2210                         if format_fallback and ctx['incomplete_formats'] and not matches:
2211                             # for extractors with incomplete formats (audio only (soundcloud)
2212                             # or video only (imgur)) best/worst will fallback to
2213                             # best/worst {video,audio}-only format
2214                             matches = formats
2215                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2216                         try:
2217                             yield matches[format_idx - 1]
2218                         except IndexError:
2219                             return
2220
2221             filters = [self._build_format_filter(f) for f in selector.filters]
2222
2223             def final_selector(ctx):
2224                 ctx_copy = dict(ctx)
2225                 for _filter in filters:
2226                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2227                 return selector_function(ctx_copy)
2228             return final_selector
2229
2230         stream = io.BytesIO(format_spec.encode('utf-8'))
2231         try:
2232             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2233         except tokenize.TokenError:
2234             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2235
2236         class TokenIterator(object):
2237             def __init__(self, tokens):
2238                 self.tokens = tokens
2239                 self.counter = 0
2240
2241             def __iter__(self):
2242                 return self
2243
2244             def __next__(self):
2245                 if self.counter >= len(self.tokens):
2246                     raise StopIteration()
2247                 value = self.tokens[self.counter]
2248                 self.counter += 1
2249                 return value
2250
2251             next = __next__
2252
2253             def restore_last_token(self):
2254                 self.counter -= 1
2255
2256         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2257         return _build_selector_function(parsed_selector)
2258
2259     def _calc_headers(self, info_dict):
2260         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2261
2262         cookies = self._calc_cookies(info_dict)
2263         if cookies:
2264             res['Cookie'] = cookies
2265
2266         if 'X-Forwarded-For' not in res:
2267             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2268             if x_forwarded_for_ip:
2269                 res['X-Forwarded-For'] = x_forwarded_for_ip
2270
2271         return res
2272
2273     def _calc_cookies(self, info_dict):
2274         pr = sanitized_Request(info_dict['url'])
2275         self.cookiejar.add_cookie_header(pr)
2276         return pr.get_header('Cookie')
2277
2278     def _sort_thumbnails(self, thumbnails):
2279         thumbnails.sort(key=lambda t: (
2280             t.get('preference') if t.get('preference') is not None else -1,
2281             t.get('width') if t.get('width') is not None else -1,
2282             t.get('height') if t.get('height') is not None else -1,
2283             t.get('id') if t.get('id') is not None else '',
2284             t.get('url')))
2285
2286     def _sanitize_thumbnails(self, info_dict):
2287         thumbnails = info_dict.get('thumbnails')
2288         if thumbnails is None:
2289             thumbnail = info_dict.get('thumbnail')
2290             if thumbnail:
2291                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2292         if not thumbnails:
2293             return
2294
2295         def check_thumbnails(thumbnails):
2296             for t in thumbnails:
2297                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2298                 try:
2299                     self.urlopen(HEADRequest(t['url']))
2300                 except network_exceptions as err:
2301                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2302                     continue
2303                 yield t
2304
2305         self._sort_thumbnails(thumbnails)
2306         for i, t in enumerate(thumbnails):
2307             if t.get('id') is None:
2308                 t['id'] = '%d' % i
2309             if t.get('width') and t.get('height'):
2310                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2311             t['url'] = sanitize_url(t['url'])
2312
2313         if self.params.get('check_formats') is True:
2314             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2315         else:
2316             info_dict['thumbnails'] = thumbnails
2317
2318     def _fill_common_fields(self, info_dict, is_video=True):
2319         # TODO: move sanitization here
2320         if is_video:
2321             # playlists are allowed to lack "title"
2322             info_dict['fulltitle'] = info_dict.get('title')
2323             if 'title' not in info_dict:
2324                 raise ExtractorError('Missing "title" field in extractor result',
2325                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2326             elif not info_dict.get('title'):
2327                 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2328                 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2329
2330         if info_dict.get('duration') is not None:
2331             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2332
2333         for ts_key, date_key in (
2334                 ('timestamp', 'upload_date'),
2335                 ('release_timestamp', 'release_date'),
2336                 ('modified_timestamp', 'modified_date'),
2337         ):
2338             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2339                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2340                 # see http://bugs.python.org/issue1646728)
2341                 try:
2342                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2343                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2344                 except (ValueError, OverflowError, OSError):
2345                     pass
2346
2347         live_keys = ('is_live', 'was_live')
2348         live_status = info_dict.get('live_status')
2349         if live_status is None:
2350             for key in live_keys:
2351                 if info_dict.get(key) is False:
2352                     continue
2353                 if info_dict.get(key):
2354                     live_status = key
2355                 break
2356             if all(info_dict.get(key) is False for key in live_keys):
2357                 live_status = 'not_live'
2358         if live_status:
2359             info_dict['live_status'] = live_status
2360             for key in live_keys:
2361                 if info_dict.get(key) is None:
2362                     info_dict[key] = (live_status == key)
2363
2364         # Auto generate title fields corresponding to the *_number fields when missing
2365         # in order to always have clean titles. This is very common for TV series.
2366         for field in ('chapter', 'season', 'episode'):
2367             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2368                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2369
2370     def process_video_result(self, info_dict, download=True):
2371         assert info_dict.get('_type', 'video') == 'video'
2372         self._num_videos += 1
2373
2374         if 'id' not in info_dict:
2375             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2376         elif not info_dict.get('id'):
2377             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2378
2379         def report_force_conversion(field, field_not, conversion):
2380             self.report_warning(
2381                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2382                 % (field, field_not, conversion))
2383
2384         def sanitize_string_field(info, string_field):
2385             field = info.get(string_field)
2386             if field is None or isinstance(field, compat_str):
2387                 return
2388             report_force_conversion(string_field, 'a string', 'string')
2389             info[string_field] = compat_str(field)
2390
2391         def sanitize_numeric_fields(info):
2392             for numeric_field in self._NUMERIC_FIELDS:
2393                 field = info.get(numeric_field)
2394                 if field is None or isinstance(field, compat_numeric_types):
2395                     continue
2396                 report_force_conversion(numeric_field, 'numeric', 'int')
2397                 info[numeric_field] = int_or_none(field)
2398
2399         sanitize_string_field(info_dict, 'id')
2400         sanitize_numeric_fields(info_dict)
2401         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2402             self.report_warning('"duration" field is negative, there is an error in extractor')
2403
2404         if 'playlist' not in info_dict:
2405             # It isn't part of a playlist
2406             info_dict['playlist'] = None
2407             info_dict['playlist_index'] = None
2408
2409         self._sanitize_thumbnails(info_dict)
2410
2411         thumbnail = info_dict.get('thumbnail')
2412         thumbnails = info_dict.get('thumbnails')
2413         if thumbnail:
2414             info_dict['thumbnail'] = sanitize_url(thumbnail)
2415         elif thumbnails:
2416             info_dict['thumbnail'] = thumbnails[-1]['url']
2417
2418         if info_dict.get('display_id') is None and 'id' in info_dict:
2419             info_dict['display_id'] = info_dict['id']
2420
2421         self._fill_common_fields(info_dict)
2422
2423         for cc_kind in ('subtitles', 'automatic_captions'):
2424             cc = info_dict.get(cc_kind)
2425             if cc:
2426                 for _, subtitle in cc.items():
2427                     for subtitle_format in subtitle:
2428                         if subtitle_format.get('url'):
2429                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2430                         if subtitle_format.get('ext') is None:
2431                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2432
2433         automatic_captions = info_dict.get('automatic_captions')
2434         subtitles = info_dict.get('subtitles')
2435
2436         info_dict['requested_subtitles'] = self.process_subtitles(
2437             info_dict['id'], subtitles, automatic_captions)
2438
2439         if info_dict.get('formats') is None:
2440             # There's only one format available
2441             formats = [info_dict]
2442         else:
2443             formats = info_dict['formats']
2444
2445         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2446         if not self.params.get('allow_unplayable_formats'):
2447             formats = [f for f in formats if not f.get('has_drm')]
2448
2449         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2450         if not get_from_start:
2451             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2452         if info_dict.get('is_live') and formats:
2453             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2454             if get_from_start and not formats:
2455                 self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2456                                                      'If you want to download from the current time, pass --no-live-from-start')
2457
2458         if not formats:
2459             self.raise_no_formats(info_dict)
2460
2461         def is_wellformed(f):
2462             url = f.get('url')
2463             if not url:
2464                 self.report_warning(
2465                     '"url" field is missing or empty - skipping format, '
2466                     'there is an error in extractor')
2467                 return False
2468             if isinstance(url, bytes):
2469                 sanitize_string_field(f, 'url')
2470             return True
2471
2472         # Filter out malformed formats for better extraction robustness
2473         formats = list(filter(is_wellformed, formats))
2474
2475         formats_dict = {}
2476
2477         # We check that all the formats have the format and format_id fields
2478         for i, format in enumerate(formats):
2479             sanitize_string_field(format, 'format_id')
2480             sanitize_numeric_fields(format)
2481             format['url'] = sanitize_url(format['url'])
2482             if not format.get('format_id'):
2483                 format['format_id'] = compat_str(i)
2484             else:
2485                 # Sanitize format_id from characters used in format selector expression
2486                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2487             format_id = format['format_id']
2488             if format_id not in formats_dict:
2489                 formats_dict[format_id] = []
2490             formats_dict[format_id].append(format)
2491
2492         # Make sure all formats have unique format_id
2493         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2494         for format_id, ambiguous_formats in formats_dict.items():
2495             ambigious_id = len(ambiguous_formats) > 1
2496             for i, format in enumerate(ambiguous_formats):
2497                 if ambigious_id:
2498                     format['format_id'] = '%s-%d' % (format_id, i)
2499                 if format.get('ext') is None:
2500                     format['ext'] = determine_ext(format['url']).lower()
2501                 # Ensure there is no conflict between id and ext in format selection
2502                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2503                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2504                     format['format_id'] = 'f%s' % format['format_id']
2505
2506         for i, format in enumerate(formats):
2507             if format.get('format') is None:
2508                 format['format'] = '{id} - {res}{note}'.format(
2509                     id=format['format_id'],
2510                     res=self.format_resolution(format),
2511                     note=format_field(format, 'format_note', ' (%s)'),
2512                 )
2513             if format.get('protocol') is None:
2514                 format['protocol'] = determine_protocol(format)
2515             if format.get('resolution') is None:
2516                 format['resolution'] = self.format_resolution(format, default=None)
2517             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2518                 format['dynamic_range'] = 'SDR'
2519             if (info_dict.get('duration') and format.get('tbr')
2520                     and not format.get('filesize') and not format.get('filesize_approx')):
2521                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2522
2523             # Add HTTP headers, so that external programs can use them from the
2524             # json output
2525             full_format_info = info_dict.copy()
2526             full_format_info.update(format)
2527             format['http_headers'] = self._calc_headers(full_format_info)
2528         # Remove private housekeeping stuff
2529         if '__x_forwarded_for_ip' in info_dict:
2530             del info_dict['__x_forwarded_for_ip']
2531
2532         if self.params.get('check_formats') is True:
2533             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2534
2535         if not formats or formats[0] is not info_dict:
2536             # only set the 'formats' fields if the original info_dict list them
2537             # otherwise we end up with a circular reference, the first (and unique)
2538             # element in the 'formats' field in info_dict is info_dict itself,
2539             # which can't be exported to json
2540             info_dict['formats'] = formats
2541
2542         info_dict, _ = self.pre_process(info_dict)
2543
2544         if self._match_entry(info_dict) is not None:
2545             return info_dict
2546
2547         self.post_extract(info_dict)
2548         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2549
2550         # The pre-processors may have modified the formats
2551         formats = info_dict.get('formats', [info_dict])
2552
2553         list_only = self.params.get('simulate') is None and (
2554             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2555         interactive_format_selection = not list_only and self.format_selector == '-'
2556         if self.params.get('list_thumbnails'):
2557             self.list_thumbnails(info_dict)
2558         if self.params.get('listsubtitles'):
2559             if 'automatic_captions' in info_dict:
2560                 self.list_subtitles(
2561                     info_dict['id'], automatic_captions, 'automatic captions')
2562             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2563         if self.params.get('listformats') or interactive_format_selection:
2564             self.list_formats(info_dict)
2565         if list_only:
2566             # Without this printing, -F --print-json will not work
2567             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2568             return
2569
2570         format_selector = self.format_selector
2571         if format_selector is None:
2572             req_format = self._default_format_spec(info_dict, download=download)
2573             self.write_debug('Default format spec: %s' % req_format)
2574             format_selector = self.build_format_selector(req_format)
2575
2576         while True:
2577             if interactive_format_selection:
2578                 req_format = input(
2579                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2580                 try:
2581                     format_selector = self.build_format_selector(req_format)
2582                 except SyntaxError as err:
2583                     self.report_error(err, tb=False, is_error=False)
2584                     continue
2585
2586             # While in format selection we may need to have an access to the original
2587             # format set in order to calculate some metrics or do some processing.
2588             # For now we need to be able to guess whether original formats provided
2589             # by extractor are incomplete or not (i.e. whether extractor provides only
2590             # video-only or audio-only formats) for proper formats selection for
2591             # extractors with such incomplete formats (see
2592             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2593             # Since formats may be filtered during format selection and may not match
2594             # the original formats the results may be incorrect. Thus original formats
2595             # or pre-calculated metrics should be passed to format selection routines
2596             # as well.
2597             # We will pass a context object containing all necessary additional data
2598             # instead of just formats.
2599             # This fixes incorrect format selection issue (see
2600             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2601             incomplete_formats = (
2602                 # All formats are video-only or
2603                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2604                 # all formats are audio-only
2605                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2606
2607             ctx = {
2608                 'formats': formats,
2609                 'incomplete_formats': incomplete_formats,
2610             }
2611
2612             formats_to_download = list(format_selector(ctx))
2613             if interactive_format_selection and not formats_to_download:
2614                 self.report_error('Requested format is not available', tb=False, is_error=False)
2615                 continue
2616             break
2617
2618         if not formats_to_download:
2619             if not self.params.get('ignore_no_formats_error'):
2620                 raise ExtractorError('Requested format is not available', expected=True,
2621                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2622             self.report_warning('Requested format is not available')
2623             # Process what we can, even without any available formats.
2624             formats_to_download = [{}]
2625
2626         best_format = formats_to_download[-1]
2627         if download:
2628             if best_format:
2629                 self.to_screen(
2630                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2631                     + ', '.join([f['format_id'] for f in formats_to_download]))
2632             max_downloads_reached = False
2633             for i, fmt in enumerate(formats_to_download):
2634                 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
2635                 new_info.update(fmt)
2636                 try:
2637                     self.process_info(new_info)
2638                 except MaxDownloadsReached:
2639                     max_downloads_reached = True
2640                 # Remove copied info
2641                 for key, val in tuple(new_info.items()):
2642                     if info_dict.get(key) == val:
2643                         new_info.pop(key)
2644                 if max_downloads_reached:
2645                     break
2646
2647             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2648             assert write_archive.issubset({True, False, 'ignore'})
2649             if True in write_archive and False not in write_archive:
2650                 self.record_download_archive(info_dict)
2651
2652             info_dict['requested_downloads'] = formats_to_download
2653             info_dict = self.run_all_pps('after_video', info_dict)
2654             if max_downloads_reached:
2655                 raise MaxDownloadsReached()
2656
2657         # We update the info dict with the selected best quality format (backwards compatibility)
2658         info_dict.update(best_format)
2659         return info_dict
2660
2661     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2662         """Select the requested subtitles and their format"""
2663         available_subs = {}
2664         if normal_subtitles and self.params.get('writesubtitles'):
2665             available_subs.update(normal_subtitles)
2666         if automatic_captions and self.params.get('writeautomaticsub'):
2667             for lang, cap_info in automatic_captions.items():
2668                 if lang not in available_subs:
2669                     available_subs[lang] = cap_info
2670
2671         if (not self.params.get('writesubtitles') and not
2672                 self.params.get('writeautomaticsub') or not
2673                 available_subs):
2674             return None
2675
2676         all_sub_langs = available_subs.keys()
2677         if self.params.get('allsubtitles', False):
2678             requested_langs = all_sub_langs
2679         elif self.params.get('subtitleslangs', False):
2680             # A list is used so that the order of languages will be the same as
2681             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2682             requested_langs = []
2683             for lang_re in self.params.get('subtitleslangs'):
2684                 discard = lang_re[0] == '-'
2685                 if discard:
2686                     lang_re = lang_re[1:]
2687                 if lang_re == 'all':
2688                     if discard:
2689                         requested_langs = []
2690                     else:
2691                         requested_langs.extend(all_sub_langs)
2692                     continue
2693                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2694                 if discard:
2695                     for lang in current_langs:
2696                         while lang in requested_langs:
2697                             requested_langs.remove(lang)
2698                 else:
2699                     requested_langs.extend(current_langs)
2700             requested_langs = orderedSet(requested_langs)
2701         elif 'en' in available_subs:
2702             requested_langs = ['en']
2703         else:
2704             requested_langs = [list(all_sub_langs)[0]]
2705         if requested_langs:
2706             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2707
2708         formats_query = self.params.get('subtitlesformat', 'best')
2709         formats_preference = formats_query.split('/') if formats_query else []
2710         subs = {}
2711         for lang in requested_langs:
2712             formats = available_subs.get(lang)
2713             if formats is None:
2714                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2715                 continue
2716             for ext in formats_preference:
2717                 if ext == 'best':
2718                     f = formats[-1]
2719                     break
2720                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2721                 if matches:
2722                     f = matches[-1]
2723                     break
2724             else:
2725                 f = formats[-1]
2726                 self.report_warning(
2727                     'No subtitle format found matching "%s" for language %s, '
2728                     'using %s' % (formats_query, lang, f['ext']))
2729             subs[lang] = f
2730         return subs
2731
2732     def _forceprint(self, key, info_dict):
2733         if info_dict is None:
2734             return
2735         info_copy = info_dict.copy()
2736         info_copy['formats_table'] = self.render_formats_table(info_dict)
2737         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2738         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2739         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2740
2741         def format_tmpl(tmpl):
2742             mobj = re.match(r'\w+(=?)$', tmpl)
2743             if mobj and mobj.group(1):
2744                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2745             elif mobj:
2746                 return f'%({tmpl})s'
2747             return tmpl
2748
2749         for tmpl in self.params['forceprint'].get(key, []):
2750             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2751
2752         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2753             filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2754             tmpl = format_tmpl(tmpl)
2755             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2756             if self._ensure_dir_exists(filename):
2757                 with io.open(filename, 'a', encoding='utf-8') as f:
2758                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2759
2760     def __forced_printings(self, info_dict, filename, incomplete):
2761         def print_mandatory(field, actual_field=None):
2762             if actual_field is None:
2763                 actual_field = field
2764             if (self.params.get('force%s' % field, False)
2765                     and (not incomplete or info_dict.get(actual_field) is not None)):
2766                 self.to_stdout(info_dict[actual_field])
2767
2768         def print_optional(field):
2769             if (self.params.get('force%s' % field, False)
2770                     and info_dict.get(field) is not None):
2771                 self.to_stdout(info_dict[field])
2772
2773         info_dict = info_dict.copy()
2774         if filename is not None:
2775             info_dict['filename'] = filename
2776         if info_dict.get('requested_formats') is not None:
2777             # For RTMP URLs, also include the playpath
2778             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2779         elif info_dict.get('url'):
2780             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2781
2782         if (self.params.get('forcejson')
2783                 or self.params['forceprint'].get('video')
2784                 or self.params['print_to_file'].get('video')):
2785             self.post_extract(info_dict)
2786         self._forceprint('video', info_dict)
2787
2788         print_mandatory('title')
2789         print_mandatory('id')
2790         print_mandatory('url', 'urls')
2791         print_optional('thumbnail')
2792         print_optional('description')
2793         print_optional('filename')
2794         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2795             self.to_stdout(formatSeconds(info_dict['duration']))
2796         print_mandatory('format')
2797
2798         if self.params.get('forcejson'):
2799             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2800
2801     def dl(self, name, info, subtitle=False, test=False):
2802         if not info.get('url'):
2803             self.raise_no_formats(info, True)
2804
2805         if test:
2806             verbose = self.params.get('verbose')
2807             params = {
2808                 'test': True,
2809                 'quiet': self.params.get('quiet') or not verbose,
2810                 'verbose': verbose,
2811                 'noprogress': not verbose,
2812                 'nopart': True,
2813                 'skip_unavailable_fragments': False,
2814                 'keep_fragments': False,
2815                 'overwrites': True,
2816                 '_no_ytdl_file': True,
2817             }
2818         else:
2819             params = self.params
2820         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2821         if not test:
2822             for ph in self._progress_hooks:
2823                 fd.add_progress_hook(ph)
2824             urls = '", "'.join(
2825                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2826                 for f in info.get('requested_formats', []) or [info])
2827             self.write_debug('Invoking downloader on "%s"' % urls)
2828
2829         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2830         # But it may contain objects that are not deep-copyable
2831         new_info = self._copy_infodict(info)
2832         if new_info.get('http_headers') is None:
2833             new_info['http_headers'] = self._calc_headers(new_info)
2834         return fd.download(name, new_info, subtitle)
2835
2836     def existing_file(self, filepaths, *, default_overwrite=True):
2837         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2838         if existing_files and not self.params.get('overwrites', default_overwrite):
2839             return existing_files[0]
2840
2841         for file in existing_files:
2842             self.report_file_delete(file)
2843             os.remove(file)
2844         return None
2845
2846     def process_info(self, info_dict):
2847         """Process a single resolved IE result. (Modifies it in-place)"""
2848
2849         assert info_dict.get('_type', 'video') == 'video'
2850         original_infodict = info_dict
2851
2852         if 'format' not in info_dict and 'ext' in info_dict:
2853             info_dict['format'] = info_dict['ext']
2854
2855         # This is mostly just for backward compatibility of process_info
2856         # As a side-effect, this allows for format-specific filters
2857         if self._match_entry(info_dict) is not None:
2858             info_dict['__write_download_archive'] = 'ignore'
2859             return
2860
2861         # Does nothing under normal operation - for backward compatibility of process_info
2862         self.post_extract(info_dict)
2863         self._num_downloads += 1
2864
2865         # info_dict['_filename'] needs to be set for backward compatibility
2866         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2867         temp_filename = self.prepare_filename(info_dict, 'temp')
2868         files_to_move = {}
2869
2870         # Forced printings
2871         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2872
2873         if self.params.get('simulate'):
2874             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2875             return
2876
2877         if full_filename is None:
2878             return
2879         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2880             return
2881         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2882             return
2883
2884         if self._write_description('video', info_dict,
2885                                    self.prepare_filename(info_dict, 'description')) is None:
2886             return
2887
2888         sub_files = self._write_subtitles(info_dict, temp_filename)
2889         if sub_files is None:
2890             return
2891         files_to_move.update(dict(sub_files))
2892
2893         thumb_files = self._write_thumbnails(
2894             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2895         if thumb_files is None:
2896             return
2897         files_to_move.update(dict(thumb_files))
2898
2899         infofn = self.prepare_filename(info_dict, 'infojson')
2900         _infojson_written = self._write_info_json('video', info_dict, infofn)
2901         if _infojson_written:
2902             info_dict['infojson_filename'] = infofn
2903             # For backward compatibility, even though it was a private field
2904             info_dict['__infojson_filename'] = infofn
2905         elif _infojson_written is None:
2906             return
2907
2908         # Note: Annotations are deprecated
2909         annofn = None
2910         if self.params.get('writeannotations', False):
2911             annofn = self.prepare_filename(info_dict, 'annotation')
2912         if annofn:
2913             if not self._ensure_dir_exists(encodeFilename(annofn)):
2914                 return
2915             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2916                 self.to_screen('[info] Video annotations are already present')
2917             elif not info_dict.get('annotations'):
2918                 self.report_warning('There are no annotations to write.')
2919             else:
2920                 try:
2921                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2922                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2923                         annofile.write(info_dict['annotations'])
2924                 except (KeyError, TypeError):
2925                     self.report_warning('There are no annotations to write.')
2926                 except (OSError, IOError):
2927                     self.report_error('Cannot write annotations file: ' + annofn)
2928                     return
2929
2930         # Write internet shortcut files
2931         def _write_link_file(link_type):
2932             url = try_get(info_dict['webpage_url'], iri_to_uri)
2933             if not url:
2934                 self.report_warning(
2935                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2936                 return True
2937             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2938             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2939                 return False
2940             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2941                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2942                 return True
2943             try:
2944                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2945                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2946                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2947                     template_vars = {'url': url}
2948                     if link_type == 'desktop':
2949                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2950                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2951             except (OSError, IOError):
2952                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2953                 return False
2954             return True
2955
2956         write_links = {
2957             'url': self.params.get('writeurllink'),
2958             'webloc': self.params.get('writewebloclink'),
2959             'desktop': self.params.get('writedesktoplink'),
2960         }
2961         if self.params.get('writelink'):
2962             link_type = ('webloc' if sys.platform == 'darwin'
2963                          else 'desktop' if sys.platform.startswith('linux')
2964                          else 'url')
2965             write_links[link_type] = True
2966
2967         if any(should_write and not _write_link_file(link_type)
2968                for link_type, should_write in write_links.items()):
2969             return
2970
2971         def replace_info_dict(new_info):
2972             nonlocal info_dict
2973             if new_info == info_dict:
2974                 return
2975             info_dict.clear()
2976             info_dict.update(new_info)
2977
2978         try:
2979             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2980             replace_info_dict(new_info)
2981         except PostProcessingError as err:
2982             self.report_error('Preprocessing: %s' % str(err))
2983             return
2984
2985         if self.params.get('skip_download'):
2986             info_dict['filepath'] = temp_filename
2987             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2988             info_dict['__files_to_move'] = files_to_move
2989             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2990             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2991         else:
2992             # Download
2993             info_dict.setdefault('__postprocessors', [])
2994             try:
2995
2996                 def existing_video_file(*filepaths):
2997                     ext = info_dict.get('ext')
2998                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2999                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3000                                               default_overwrite=False)
3001                     if file:
3002                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3003                     return file
3004
3005                 success = True
3006                 if info_dict.get('requested_formats') is not None:
3007
3008                     def compatible_formats(formats):
3009                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3010                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
3011                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
3012                         if len(video_formats) > 2 or len(audio_formats) > 2:
3013                             return False
3014
3015                         # Check extension
3016                         exts = set(format.get('ext') for format in formats)
3017                         COMPATIBLE_EXTS = (
3018                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
3019                             set(('webm',)),
3020                         )
3021                         for ext_sets in COMPATIBLE_EXTS:
3022                             if ext_sets.issuperset(exts):
3023                                 return True
3024                         # TODO: Check acodec/vcodec
3025                         return False
3026
3027                     requested_formats = info_dict['requested_formats']
3028                     old_ext = info_dict['ext']
3029                     if self.params.get('merge_output_format') is None:
3030                         if not compatible_formats(requested_formats):
3031                             info_dict['ext'] = 'mkv'
3032                             self.report_warning(
3033                                 'Requested formats are incompatible for merge and will be merged into mkv')
3034                         if (info_dict['ext'] == 'webm'
3035                                 and info_dict.get('thumbnails')
3036                                 # check with type instead of pp_key, __name__, or isinstance
3037                                 # since we dont want any custom PPs to trigger this
3038                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3039                             info_dict['ext'] = 'mkv'
3040                             self.report_warning(
3041                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3042                     new_ext = info_dict['ext']
3043
3044                     def correct_ext(filename, ext=new_ext):
3045                         if filename == '-':
3046                             return filename
3047                         filename_real_ext = os.path.splitext(filename)[1][1:]
3048                         filename_wo_ext = (
3049                             os.path.splitext(filename)[0]
3050                             if filename_real_ext in (old_ext, new_ext)
3051                             else filename)
3052                         return '%s.%s' % (filename_wo_ext, ext)
3053
3054                     # Ensure filename always has a correct extension for successful merge
3055                     full_filename = correct_ext(full_filename)
3056                     temp_filename = correct_ext(temp_filename)
3057                     dl_filename = existing_video_file(full_filename, temp_filename)
3058                     info_dict['__real_download'] = False
3059
3060                     downloaded = []
3061                     merger = FFmpegMergerPP(self)
3062
3063                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3064                     if dl_filename is not None:
3065                         self.report_file_already_downloaded(dl_filename)
3066                     elif fd:
3067                         for f in requested_formats if fd != FFmpegFD else []:
3068                             f['filepath'] = fname = prepend_extension(
3069                                 correct_ext(temp_filename, info_dict['ext']),
3070                                 'f%s' % f['format_id'], info_dict['ext'])
3071                             downloaded.append(fname)
3072                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3073                         success, real_download = self.dl(temp_filename, info_dict)
3074                         info_dict['__real_download'] = real_download
3075                     else:
3076                         if self.params.get('allow_unplayable_formats'):
3077                             self.report_warning(
3078                                 'You have requested merging of multiple formats '
3079                                 'while also allowing unplayable formats to be downloaded. '
3080                                 'The formats won\'t be merged to prevent data corruption.')
3081                         elif not merger.available:
3082                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3083                             if not self.params.get('ignoreerrors'):
3084                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3085                                 return
3086                             self.report_warning(f'{msg}. The formats won\'t be merged')
3087
3088                         if temp_filename == '-':
3089                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3090                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3091                                       else 'but ffmpeg is not installed')
3092                             self.report_warning(
3093                                 f'You have requested downloading multiple formats to stdout {reason}. '
3094                                 'The formats will be streamed one after the other')
3095                             fname = temp_filename
3096                         for f in requested_formats:
3097                             new_info = dict(info_dict)
3098                             del new_info['requested_formats']
3099                             new_info.update(f)
3100                             if temp_filename != '-':
3101                                 fname = prepend_extension(
3102                                     correct_ext(temp_filename, new_info['ext']),
3103                                     'f%s' % f['format_id'], new_info['ext'])
3104                                 if not self._ensure_dir_exists(fname):
3105                                     return
3106                                 f['filepath'] = fname
3107                                 downloaded.append(fname)
3108                             partial_success, real_download = self.dl(fname, new_info)
3109                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3110                             success = success and partial_success
3111
3112                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3113                         info_dict['__postprocessors'].append(merger)
3114                         info_dict['__files_to_merge'] = downloaded
3115                         # Even if there were no downloads, it is being merged only now
3116                         info_dict['__real_download'] = True
3117                     else:
3118                         for file in downloaded:
3119                             files_to_move[file] = None
3120                 else:
3121                     # Just a single file
3122                     dl_filename = existing_video_file(full_filename, temp_filename)
3123                     if dl_filename is None or dl_filename == temp_filename:
3124                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3125                         # So we should try to resume the download
3126                         success, real_download = self.dl(temp_filename, info_dict)
3127                         info_dict['__real_download'] = real_download
3128                     else:
3129                         self.report_file_already_downloaded(dl_filename)
3130
3131                 dl_filename = dl_filename or temp_filename
3132                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3133
3134             except network_exceptions as err:
3135                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3136                 return
3137             except (OSError, IOError) as err:
3138                 raise UnavailableVideoError(err)
3139             except (ContentTooShortError, ) as err:
3140                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3141                 return
3142
3143             if success and full_filename != '-':
3144
3145                 def fixup():
3146                     do_fixup = True
3147                     fixup_policy = self.params.get('fixup')
3148                     vid = info_dict['id']
3149
3150                     if fixup_policy in ('ignore', 'never'):
3151                         return
3152                     elif fixup_policy == 'warn':
3153                         do_fixup = False
3154                     elif fixup_policy != 'force':
3155                         assert fixup_policy in ('detect_or_warn', None)
3156                         if not info_dict.get('__real_download'):
3157                             do_fixup = False
3158
3159                     def ffmpeg_fixup(cndn, msg, cls):
3160                         if not cndn:
3161                             return
3162                         if not do_fixup:
3163                             self.report_warning(f'{vid}: {msg}')
3164                             return
3165                         pp = cls(self)
3166                         if pp.available:
3167                             info_dict['__postprocessors'].append(pp)
3168                         else:
3169                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3170
3171                     stretched_ratio = info_dict.get('stretched_ratio')
3172                     ffmpeg_fixup(
3173                         stretched_ratio not in (1, None),
3174                         f'Non-uniform pixel ratio {stretched_ratio}',
3175                         FFmpegFixupStretchedPP)
3176
3177                     ffmpeg_fixup(
3178                         (info_dict.get('requested_formats') is None
3179                          and info_dict.get('container') == 'm4a_dash'
3180                          and info_dict.get('ext') == 'm4a'),
3181                         'writing DASH m4a. Only some players support this container',
3182                         FFmpegFixupM4aPP)
3183
3184                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3185                     downloader = downloader.__name__ if downloader else None
3186
3187                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3188                         ffmpeg_fixup(downloader == 'HlsFD',
3189                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3190                                      FFmpegFixupM3u8PP)
3191                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3192                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3193
3194                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3195                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3196
3197                 fixup()
3198                 try:
3199                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3200                 except PostProcessingError as err:
3201                     self.report_error('Postprocessing: %s' % str(err))
3202                     return
3203                 try:
3204                     for ph in self._post_hooks:
3205                         ph(info_dict['filepath'])
3206                 except Exception as err:
3207                     self.report_error('post hooks: %s' % str(err))
3208                     return
3209                 info_dict['__write_download_archive'] = True
3210
3211         if self.params.get('force_write_download_archive'):
3212             info_dict['__write_download_archive'] = True
3213
3214         # Make sure the info_dict was modified in-place
3215         assert info_dict is original_infodict
3216
3217         max_downloads = self.params.get('max_downloads')
3218         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3219             raise MaxDownloadsReached()
3220
3221     def __download_wrapper(self, func):
3222         @functools.wraps(func)
3223         def wrapper(*args, **kwargs):
3224             try:
3225                 res = func(*args, **kwargs)
3226             except UnavailableVideoError as e:
3227                 self.report_error(e)
3228             except MaxDownloadsReached as e:
3229                 self.to_screen(f'[info] {e}')
3230                 raise
3231             except DownloadCancelled as e:
3232                 self.to_screen(f'[info] {e}')
3233                 if not self.params.get('break_per_url'):
3234                     raise
3235             else:
3236                 if self.params.get('dump_single_json', False):
3237                     self.post_extract(res)
3238                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3239         return wrapper
3240
3241     def download(self, url_list):
3242         """Download a given list of URLs."""
3243         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3244         outtmpl = self.outtmpl_dict['default']
3245         if (len(url_list) > 1
3246                 and outtmpl != '-'
3247                 and '%' not in outtmpl
3248                 and self.params.get('max_downloads') != 1):
3249             raise SameFileError(outtmpl)
3250
3251         for url in url_list:
3252             self.__download_wrapper(self.extract_info)(
3253                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3254
3255         return self._download_retcode
3256
3257     def download_with_info_file(self, info_filename):
3258         with contextlib.closing(fileinput.FileInput(
3259                 [info_filename], mode='r',
3260                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3261             # FileInput doesn't have a read method, we can't call json.load
3262             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3263         try:
3264             self.__download_wrapper(self.process_ie_result)(info, download=True)
3265         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3266             if not isinstance(e, EntryNotInPlaylist):
3267                 self.to_stderr('\r')
3268             webpage_url = info.get('webpage_url')
3269             if webpage_url is not None:
3270                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3271                 return self.download([webpage_url])
3272             else:
3273                 raise
3274         return self._download_retcode
3275
3276     @staticmethod
3277     def sanitize_info(info_dict, remove_private_keys=False):
3278         ''' Sanitize the infodict for converting to json '''
3279         if info_dict is None:
3280             return info_dict
3281         info_dict.setdefault('epoch', int(time.time()))
3282         info_dict.setdefault('_type', 'video')
3283
3284         if remove_private_keys:
3285             reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
3286                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3287                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3288             }
3289         else:
3290             reject = lambda k, v: False
3291
3292         def filter_fn(obj):
3293             if isinstance(obj, dict):
3294                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3295             elif isinstance(obj, (list, tuple, set, LazyList)):
3296                 return list(map(filter_fn, obj))
3297             elif obj is None or isinstance(obj, (str, int, float, bool)):
3298                 return obj
3299             else:
3300                 return repr(obj)
3301
3302         return filter_fn(info_dict)
3303
3304     @staticmethod
3305     def filter_requested_info(info_dict, actually_filter=True):
3306         ''' Alias of sanitize_info for backward compatibility '''
3307         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3308
3309     @staticmethod
3310     def post_extract(info_dict):
3311         def actual_post_extract(info_dict):
3312             if info_dict.get('_type') in ('playlist', 'multi_video'):
3313                 for video_dict in info_dict.get('entries', {}):
3314                     actual_post_extract(video_dict or {})
3315                 return
3316
3317             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3318             info_dict.update(post_extractor())
3319
3320         actual_post_extract(info_dict or {})
3321
3322     def run_pp(self, pp, infodict):
3323         files_to_delete = []
3324         if '__files_to_move' not in infodict:
3325             infodict['__files_to_move'] = {}
3326         try:
3327             files_to_delete, infodict = pp.run(infodict)
3328         except PostProcessingError as e:
3329             # Must be True and not 'only_download'
3330             if self.params.get('ignoreerrors') is True:
3331                 self.report_error(e)
3332                 return infodict
3333             raise
3334
3335         if not files_to_delete:
3336             return infodict
3337         if self.params.get('keepvideo', False):
3338             for f in files_to_delete:
3339                 infodict['__files_to_move'].setdefault(f, '')
3340         else:
3341             for old_filename in set(files_to_delete):
3342                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3343                 try:
3344                     os.remove(encodeFilename(old_filename))
3345                 except (IOError, OSError):
3346                     self.report_warning('Unable to remove downloaded original file')
3347                 if old_filename in infodict['__files_to_move']:
3348                     del infodict['__files_to_move'][old_filename]
3349         return infodict
3350
3351     def run_all_pps(self, key, info, *, additional_pps=None):
3352         self._forceprint(key, info)
3353         for pp in (additional_pps or []) + self._pps[key]:
3354             info = self.run_pp(pp, info)
3355         return info
3356
3357     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3358         info = dict(ie_info)
3359         info['__files_to_move'] = files_to_move or {}
3360         info = self.run_all_pps(key, info)
3361         return info, info.pop('__files_to_move', None)
3362
3363     def post_process(self, filename, info, files_to_move=None):
3364         """Run all the postprocessors on the given file."""
3365         info['filepath'] = filename
3366         info['__files_to_move'] = files_to_move or {}
3367         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3368         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3369         del info['__files_to_move']
3370         return self.run_all_pps('after_move', info)
3371
3372     def _make_archive_id(self, info_dict):
3373         video_id = info_dict.get('id')
3374         if not video_id:
3375             return
3376         # Future-proof against any change in case
3377         # and backwards compatibility with prior versions
3378         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3379         if extractor is None:
3380             url = str_or_none(info_dict.get('url'))
3381             if not url:
3382                 return
3383             # Try to find matching extractor for the URL and take its ie_key
3384             for ie_key, ie in self._ies.items():
3385                 if ie.suitable(url):
3386                     extractor = ie_key
3387                     break
3388             else:
3389                 return
3390         return '%s %s' % (extractor.lower(), video_id)
3391
3392     def in_download_archive(self, info_dict):
3393         fn = self.params.get('download_archive')
3394         if fn is None:
3395             return False
3396
3397         vid_id = self._make_archive_id(info_dict)
3398         if not vid_id:
3399             return False  # Incomplete video information
3400
3401         return vid_id in self.archive
3402
3403     def record_download_archive(self, info_dict):
3404         fn = self.params.get('download_archive')
3405         if fn is None:
3406             return
3407         vid_id = self._make_archive_id(info_dict)
3408         assert vid_id
3409         self.write_debug(f'Adding to archive: {vid_id}')
3410         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3411             archive_file.write(vid_id + '\n')
3412         self.archive.add(vid_id)
3413
3414     @staticmethod
3415     def format_resolution(format, default='unknown'):
3416         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3417             return 'audio only'
3418         if format.get('resolution') is not None:
3419             return format['resolution']
3420         if format.get('width') and format.get('height'):
3421             return '%dx%d' % (format['width'], format['height'])
3422         elif format.get('height'):
3423             return '%sp' % format['height']
3424         elif format.get('width'):
3425             return '%dx?' % format['width']
3426         return default
3427
3428     def _list_format_headers(self, *headers):
3429         if self.params.get('listformats_table', True) is not False:
3430             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3431         return headers
3432
3433     def _format_note(self, fdict):
3434         res = ''
3435         if fdict.get('ext') in ['f4f', 'f4m']:
3436             res += '(unsupported)'
3437         if fdict.get('language'):
3438             if res:
3439                 res += ' '
3440             res += '[%s]' % fdict['language']
3441         if fdict.get('format_note') is not None:
3442             if res:
3443                 res += ' '
3444             res += fdict['format_note']
3445         if fdict.get('tbr') is not None:
3446             if res:
3447                 res += ', '
3448             res += '%4dk' % fdict['tbr']
3449         if fdict.get('container') is not None:
3450             if res:
3451                 res += ', '
3452             res += '%s container' % fdict['container']
3453         if (fdict.get('vcodec') is not None
3454                 and fdict.get('vcodec') != 'none'):
3455             if res:
3456                 res += ', '
3457             res += fdict['vcodec']
3458             if fdict.get('vbr') is not None:
3459                 res += '@'
3460         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3461             res += 'video@'
3462         if fdict.get('vbr') is not None:
3463             res += '%4dk' % fdict['vbr']
3464         if fdict.get('fps') is not None:
3465             if res:
3466                 res += ', '
3467             res += '%sfps' % fdict['fps']
3468         if fdict.get('acodec') is not None:
3469             if res:
3470                 res += ', '
3471             if fdict['acodec'] == 'none':
3472                 res += 'video only'
3473             else:
3474                 res += '%-5s' % fdict['acodec']
3475         elif fdict.get('abr') is not None:
3476             if res:
3477                 res += ', '
3478             res += 'audio'
3479         if fdict.get('abr') is not None:
3480             res += '@%3dk' % fdict['abr']
3481         if fdict.get('asr') is not None:
3482             res += ' (%5dHz)' % fdict['asr']
3483         if fdict.get('filesize') is not None:
3484             if res:
3485                 res += ', '
3486             res += format_bytes(fdict['filesize'])
3487         elif fdict.get('filesize_approx') is not None:
3488             if res:
3489                 res += ', '
3490             res += '~' + format_bytes(fdict['filesize_approx'])
3491         return res
3492
3493     def render_formats_table(self, info_dict):
3494         if not info_dict.get('formats') and not info_dict.get('url'):
3495             return None
3496
3497         formats = info_dict.get('formats', [info_dict])
3498         if not self.params.get('listformats_table', True) is not False:
3499             table = [
3500                 [
3501                     format_field(f, 'format_id'),
3502                     format_field(f, 'ext'),
3503                     self.format_resolution(f),
3504                     self._format_note(f)
3505                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3506             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3507
3508         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3509         table = [
3510             [
3511                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3512                 format_field(f, 'ext'),
3513                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3514                 format_field(f, 'fps', '\t%d'),
3515                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3516                 delim,
3517                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3518                 format_field(f, 'tbr', '\t%dk'),
3519                 shorten_protocol_name(f.get('protocol', '')),
3520                 delim,
3521                 format_field(f, 'vcodec', default='unknown').replace(
3522                     'none', 'images' if f.get('acodec') == 'none'
3523                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3524                 format_field(f, 'vbr', '\t%dk'),
3525                 format_field(f, 'acodec', default='unknown').replace(
3526                     'none', '' if f.get('vcodec') == 'none'
3527                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3528                 format_field(f, 'abr', '\t%dk'),
3529                 format_field(f, 'asr', '\t%dHz'),
3530                 join_nonempty(
3531                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3532                     format_field(f, 'language', '[%s]'),
3533                     join_nonempty(format_field(f, 'format_note'),
3534                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3535                                   delim=', '),
3536                     delim=' '),
3537             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3538         header_line = self._list_format_headers(
3539             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3540             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3541
3542         return render_table(
3543             header_line, table, hide_empty=True,
3544             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3545
3546     def render_thumbnails_table(self, info_dict):
3547         thumbnails = list(info_dict.get('thumbnails') or [])
3548         if not thumbnails:
3549             return None
3550         return render_table(
3551             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3552             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3553
3554     def render_subtitles_table(self, video_id, subtitles):
3555         def _row(lang, formats):
3556             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3557             if len(set(names)) == 1:
3558                 names = [] if names[0] == 'unknown' else names[:1]
3559             return [lang, ', '.join(names), ', '.join(exts)]
3560
3561         if not subtitles:
3562             return None
3563         return render_table(
3564             self._list_format_headers('Language', 'Name', 'Formats'),
3565             [_row(lang, formats) for lang, formats in subtitles.items()],
3566             hide_empty=True)
3567
3568     def __list_table(self, video_id, name, func, *args):
3569         table = func(*args)
3570         if not table:
3571             self.to_screen(f'{video_id} has no {name}')
3572             return
3573         self.to_screen(f'[info] Available {name} for {video_id}:')
3574         self.to_stdout(table)
3575
3576     def list_formats(self, info_dict):
3577         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3578
3579     def list_thumbnails(self, info_dict):
3580         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3581
3582     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3583         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3584
3585     def urlopen(self, req):
3586         """ Start an HTTP download """
3587         if isinstance(req, compat_basestring):
3588             req = sanitized_Request(req)
3589         return self._opener.open(req, timeout=self._socket_timeout)
3590
3591     def print_debug_header(self):
3592         if not self.params.get('verbose'):
3593             return
3594
3595         def get_encoding(stream):
3596             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3597             if not supports_terminal_sequences(stream):
3598                 from .compat import WINDOWS_VT_MODE
3599                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3600             return ret
3601
3602         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3603             locale.getpreferredencoding(),
3604             sys.getfilesystemencoding(),
3605             get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
3606             self.get_encoding())
3607
3608         logger = self.params.get('logger')
3609         if logger:
3610             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3611             write_debug(encoding_str)
3612         else:
3613             write_string(f'[debug] {encoding_str}\n', encoding=None)
3614             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3615
3616         source = detect_variant()
3617         write_debug(join_nonempty(
3618             'yt-dlp version', __version__,
3619             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3620             '' if source == 'unknown' else f'({source})',
3621             delim=' '))
3622         if not _LAZY_LOADER:
3623             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3624                 write_debug('Lazy loading extractors is forcibly disabled')
3625             else:
3626                 write_debug('Lazy loading extractors is disabled')
3627         if plugin_extractors or plugin_postprocessors:
3628             write_debug('Plugins: %s' % [
3629                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3630                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3631         if self.params.get('compat_opts'):
3632             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3633
3634         if source == 'source':
3635             try:
3636                 sp = Popen(
3637                     ['git', 'rev-parse', '--short', 'HEAD'],
3638                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3639                     cwd=os.path.dirname(os.path.abspath(__file__)))
3640                 out, err = sp.communicate_or_kill()
3641                 out = out.decode().strip()
3642                 if re.match('[0-9a-f]+', out):
3643                     write_debug('Git HEAD: %s' % out)
3644             except Exception:
3645                 try:
3646                     sys.exc_clear()
3647                 except Exception:
3648                     pass
3649
3650         def python_implementation():
3651             impl_name = platform.python_implementation()
3652             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3653                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3654             return impl_name
3655
3656         write_debug('Python version %s (%s %s) - %s' % (
3657             platform.python_version(),
3658             python_implementation(),
3659             platform.architecture()[0],
3660             platform_name()))
3661
3662         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3663         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3664         if ffmpeg_features:
3665             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3666
3667         exe_versions['rtmpdump'] = rtmpdump_version()
3668         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3669         exe_str = ', '.join(
3670             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3671         ) or 'none'
3672         write_debug('exe versions: %s' % exe_str)
3673
3674         from .downloader.websocket import has_websockets
3675         from .postprocessor.embedthumbnail import has_mutagen
3676         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3677
3678         lib_str = join_nonempty(
3679             compat_brotli and compat_brotli.__name__,
3680             has_certifi and 'certifi',
3681             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3682             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3683             has_mutagen and 'mutagen',
3684             SQLITE_AVAILABLE and 'sqlite',
3685             has_websockets and 'websockets',
3686             delim=', ') or 'none'
3687         write_debug('Optional libraries: %s' % lib_str)
3688
3689         proxy_map = {}
3690         for handler in self._opener.handlers:
3691             if hasattr(handler, 'proxies'):
3692                 proxy_map.update(handler.proxies)
3693         write_debug(f'Proxy map: {proxy_map}')
3694
3695         # Not implemented
3696         if False and self.params.get('call_home'):
3697             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3698             write_debug('Public IP address: %s' % ipaddr)
3699             latest_version = self.urlopen(
3700                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3701             if version_tuple(latest_version) > version_tuple(__version__):
3702                 self.report_warning(
3703                     'You are using an outdated version (newest version: %s)! '
3704                     'See https://yt-dl.org/update if you need help updating.' %
3705                     latest_version)
3706
3707     def _setup_opener(self):
3708         timeout_val = self.params.get('socket_timeout')
3709         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3710
3711         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3712         opts_cookiefile = self.params.get('cookiefile')
3713         opts_proxy = self.params.get('proxy')
3714
3715         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3716
3717         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3718         if opts_proxy is not None:
3719             if opts_proxy == '':
3720                 proxies = {}
3721             else:
3722                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3723         else:
3724             proxies = compat_urllib_request.getproxies()
3725             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3726             if 'http' in proxies and 'https' not in proxies:
3727                 proxies['https'] = proxies['http']
3728         proxy_handler = PerRequestProxyHandler(proxies)
3729
3730         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3731         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3732         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3733         redirect_handler = YoutubeDLRedirectHandler()
3734         data_handler = compat_urllib_request_DataHandler()
3735
3736         # When passing our own FileHandler instance, build_opener won't add the
3737         # default FileHandler and allows us to disable the file protocol, which
3738         # can be used for malicious purposes (see
3739         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3740         file_handler = compat_urllib_request.FileHandler()
3741
3742         def file_open(*args, **kwargs):
3743             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3744         file_handler.file_open = file_open
3745
3746         opener = compat_urllib_request.build_opener(
3747             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3748
3749         # Delete the default user-agent header, which would otherwise apply in
3750         # cases where our custom HTTP handler doesn't come into play
3751         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3752         opener.addheaders = []
3753         self._opener = opener
3754
3755     def encode(self, s):
3756         if isinstance(s, bytes):
3757             return s  # Already encoded
3758
3759         try:
3760             return s.encode(self.get_encoding())
3761         except UnicodeEncodeError as err:
3762             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3763             raise
3764
3765     def get_encoding(self):
3766         encoding = self.params.get('encoding')
3767         if encoding is None:
3768             encoding = preferredencoding()
3769         return encoding
3770
3771     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3772         ''' Write infojson and returns True = written, False = skip, None = error '''
3773         if overwrite is None:
3774             overwrite = self.params.get('overwrites', True)
3775         if not self.params.get('writeinfojson'):
3776             return False
3777         elif not infofn:
3778             self.write_debug(f'Skipping writing {label} infojson')
3779             return False
3780         elif not self._ensure_dir_exists(infofn):
3781             return None
3782         elif not overwrite and os.path.exists(infofn):
3783             self.to_screen(f'[info] {label.title()} metadata is already present')
3784         else:
3785             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3786             try:
3787                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3788             except (OSError, IOError):
3789                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3790                 return None
3791         return True
3792
3793     def _write_description(self, label, ie_result, descfn):
3794         ''' Write description and returns True = written, False = skip, None = error '''
3795         if not self.params.get('writedescription'):
3796             return False
3797         elif not descfn:
3798             self.write_debug(f'Skipping writing {label} description')
3799             return False
3800         elif not self._ensure_dir_exists(descfn):
3801             return None
3802         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3803             self.to_screen(f'[info] {label.title()} description is already present')
3804         elif ie_result.get('description') is None:
3805             self.report_warning(f'There\'s no {label} description to write')
3806             return False
3807         else:
3808             try:
3809                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3810                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3811                     descfile.write(ie_result['description'])
3812             except (OSError, IOError):
3813                 self.report_error(f'Cannot write {label} description file {descfn}')
3814                 return None
3815         return True
3816
3817     def _write_subtitles(self, info_dict, filename):
3818         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3819         ret = []
3820         subtitles = info_dict.get('requested_subtitles')
3821         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3822             # subtitles download errors are already managed as troubles in relevant IE
3823             # that way it will silently go on when used with unsupporting IE
3824             return ret
3825
3826         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3827         if not sub_filename_base:
3828             self.to_screen('[info] Skipping writing video subtitles')
3829             return ret
3830         for sub_lang, sub_info in subtitles.items():
3831             sub_format = sub_info['ext']
3832             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3833             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3834             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3835             if existing_sub:
3836                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3837                 sub_info['filepath'] = existing_sub
3838                 ret.append((existing_sub, sub_filename_final))
3839                 continue
3840
3841             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3842             if sub_info.get('data') is not None:
3843                 try:
3844                     # Use newline='' to prevent conversion of newline characters
3845                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3846                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3847                         subfile.write(sub_info['data'])
3848                     sub_info['filepath'] = sub_filename
3849                     ret.append((sub_filename, sub_filename_final))
3850                     continue
3851                 except (OSError, IOError):
3852                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3853                     return None
3854
3855             try:
3856                 sub_copy = sub_info.copy()
3857                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3858                 self.dl(sub_filename, sub_copy, subtitle=True)
3859                 sub_info['filepath'] = sub_filename
3860                 ret.append((sub_filename, sub_filename_final))
3861             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3862                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3863                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3864                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3865         return ret
3866
3867     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3868         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3869         write_all = self.params.get('write_all_thumbnails', False)
3870         thumbnails, ret = [], []
3871         if write_all or self.params.get('writethumbnail', False):
3872             thumbnails = info_dict.get('thumbnails') or []
3873         multiple = write_all and len(thumbnails) > 1
3874
3875         if thumb_filename_base is None:
3876             thumb_filename_base = filename
3877         if thumbnails and not thumb_filename_base:
3878             self.write_debug(f'Skipping writing {label} thumbnail')
3879             return ret
3880
3881         for idx, t in list(enumerate(thumbnails))[::-1]:
3882             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3883             thumb_display_id = f'{label} thumbnail {t["id"]}'
3884             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3885             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3886
3887             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3888             if existing_thumb:
3889                 self.to_screen('[info] %s is already present' % (
3890                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3891                 t['filepath'] = existing_thumb
3892                 ret.append((existing_thumb, thumb_filename_final))
3893             else:
3894                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3895                 try:
3896                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3897                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3898                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3899                         shutil.copyfileobj(uf, thumbf)
3900                     ret.append((thumb_filename, thumb_filename_final))
3901                     t['filepath'] = thumb_filename
3902                 except network_exceptions as err:
3903                     thumbnails.pop(idx)
3904                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3905             if ret and not write_all:
3906                 break
3907         return ret