yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     InAdvancePagedList,
  76     int_or_none,
  77     iri_to_uri,
  78     ISO3166Utils,
  79     join_nonempty,
  80     LazyList,
  81     LINK_TEMPLATES,
  82     locked_file,
  83     make_dir,
  84     make_HTTPS_handler,
  85     MaxDownloadsReached,
  86     merge_headers,
  87     network_exceptions,
  88     number_of_digits,
  89     orderedSet,
  90     OUTTMPL_TYPES,
  91     PagedList,
  92     parse_filesize,
  93     PerRequestProxyHandler,
  94     platform_name,
  95     Popen,
  96     POSTPROCESS_WHEN,
  97     PostProcessingError,
  98     preferredencoding,
  99     prepend_extension,
 100     ReExtractInfo,
 101     register_socks_protocols,
 102     RejectedVideoReached,
 103     remove_terminal_sequences,
 104     render_table,
 105     replace_extension,
 106     SameFileError,
 107     sanitize_filename,
 108     sanitize_path,
 109     sanitize_url,
 110     sanitized_Request,
 111     std_headers,
 112     STR_FORMAT_RE_TMPL,
 113     STR_FORMAT_TYPES,
 114     str_or_none,
 115     strftime_or_none,
 116     subtitles_filename,
 117     supports_terminal_sequences,
 118     timetuple_from_msec,
 119     to_high_limit_path,
 120     traverse_obj,
 121     try_get,
 122     UnavailableVideoError,
 123     url_basename,
 124     variadic,
 125     version_tuple,
 126     write_json_file,
 127     write_string,
 128     YoutubeDLCookieProcessor,
 129     YoutubeDLHandler,
 130     YoutubeDLRedirectHandler,
 131 )
 132 from .cache import Cache
 133 from .minicurses import format_text
 134 from .extractor import (
 135     gen_extractor_classes,
 136     get_info_extractor,
 137     _LAZY_LOADER,
 138     _PLUGIN_CLASSES as plugin_extractors
 139 )
 140 from .extractor.openload import PhantomJSwrapper
 141 from .downloader import (
 142     FFmpegFD,
 143     get_suitable_downloader,
 144     shorten_protocol_name
 145 )
 146 from .downloader.rtmp import rtmpdump_version
 147 from .postprocessor import (
 148     get_postprocessor,
 149     EmbedThumbnailPP,
 150     FFmpegFixupDuplicateMoovPP,
 151     FFmpegFixupDurationPP,
 152     FFmpegFixupM3u8PP,
 153     FFmpegFixupM4aPP,
 154     FFmpegFixupStretchedPP,
 155     FFmpegFixupTimestampPP,
 156     FFmpegMergerPP,
 157     FFmpegPostProcessor,
 158     MoveFilesAfterDownloadPP,
 159     _PLUGIN_CLASSES as plugin_postprocessors
 160 )
 161 from .update import detect_variant
 162 from .version import __version__, RELEASE_GIT_HEAD
 163
 164 if compat_os_name == 'nt':
 165     import ctypes
 166
 167
 168 class YoutubeDL(object):
 169     """YoutubeDL class.
 170
 171     YoutubeDL objects are the ones responsible of downloading the
 172     actual video file and writing it to disk if the user has requested
 173     it, among some other tasks. In most cases there should be one per
 174     program. As, given a video URL, the downloader doesn't know how to
 175     extract all the needed information, task that InfoExtractors do, it
 176     has to pass the URL to one of them.
 177
 178     For this, YoutubeDL objects have a method that allows
 179     InfoExtractors to be registered in a given order. When it is passed
 180     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 181     finds that reports being able to handle it. The InfoExtractor extracts
 182     all the information about the video or videos the URL refers to, and
 183     YoutubeDL process the extracted information, possibly using a File
 184     Downloader to download the video.
 185
 186     YoutubeDL objects accept a lot of parameters. In order not to saturate
 187     the object constructor with arguments, it receives a dictionary of
 188     options instead. These options are available through the params
 189     attribute for the InfoExtractors to use. The YoutubeDL also
 190     registers itself as the downloader in charge for the InfoExtractors
 191     that are added to it, so this is a "mutual registration".
 192
 193     Available options:
 194
 195     username:          Username for authentication purposes.
 196     password:          Password for authentication purposes.
 197     videopassword:     Password for accessing a video.
 198     ap_mso:            Adobe Pass multiple-system operator identifier.
 199     ap_username:       Multiple-system operator account username.
 200     ap_password:       Multiple-system operator account password.
 201     usenetrc:          Use netrc for authentication instead.
 202     verbose:           Print additional info to stdout.
 203     quiet:             Do not print messages to stdout.
 204     no_warnings:       Do not print out anything for warnings.
 205     forceprint:        A dict with keys WHEN mapped to a list of templates to
 206                        print to stdout. The allowed keys are video or any of the
 207                        items in utils.POSTPROCESS_WHEN.
 208                        For compatibility, a single list is also accepted
 209     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 210                        a list of tuples with (template, filename)
 211     forceurl:          Force printing final URL. (Deprecated)
 212     forcetitle:        Force printing title. (Deprecated)
 213     forceid:           Force printing ID. (Deprecated)
 214     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 215     forcedescription:  Force printing description. (Deprecated)
 216     forcefilename:     Force printing final filename. (Deprecated)
 217     forceduration:     Force printing duration. (Deprecated)
 218     forcejson:         Force printing info_dict as JSON.
 219     dump_single_json:  Force printing the info_dict of the whole playlist
 220                        (or video) as a single JSON line.
 221     force_write_download_archive: Force writing download archive regardless
 222                        of 'skip_download' or 'simulate'.
 223     simulate:          Do not download the video files. If unset (or None),
 224                        simulate only if listsubtitles, listformats or list_thumbnails is used
 225     format:            Video format code. see "FORMAT SELECTION" for more details.
 226                        You can also pass a function. The function takes 'ctx' as
 227                        argument and returns the formats to download.
 228                        See "build_format_selector" for an implementation
 229     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 230     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 231                        extracting metadata even if the video is not actually
 232                        available for download (experimental)
 233     format_sort:       A list of fields by which to sort the video formats.
 234                        See "Sorting Formats" for more details.
 235     format_sort_force: Force the given format_sort. see "Sorting Formats"
 236                        for more details.
 237     allow_multiple_video_streams:   Allow multiple video streams to be merged
 238                        into a single file
 239     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 240                        into a single file
 241     check_formats      Whether to test if the formats are downloadable.
 242                        Can be True (check all), False (check none),
 243                        'selected' (check selected formats),
 244                        or None (check only if requested by extractor)
 245     paths:             Dictionary of output paths. The allowed keys are 'home'
 246                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 247     outtmpl:           Dictionary of templates for output names. Allowed keys
 248                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 249                        For compatibility with youtube-dl, a single string can also be used
 250     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 251     restrictfilenames: Do not allow "&" and spaces in file names
 252     trim_file_name:    Limit length of filename (extension excluded)
 253     windowsfilenames:  Force the filenames to be windows compatible
 254     ignoreerrors:      Do not stop on download/postprocessing errors.
 255                        Can be 'only_download' to ignore only download errors.
 256                        Default is 'only_download' for CLI, but False for API
 257     skip_playlist_after_errors: Number of allowed failures until the rest of
 258                        the playlist is skipped
 259     force_generic_extractor: Force downloader to use the generic extractor
 260     overwrites:        Overwrite all video and metadata files if True,
 261                        overwrite only non-video files if None
 262                        and don't overwrite any file if False
 263                        For compatibility with youtube-dl,
 264                        "nooverwrites" may also be used instead
 265     playliststart:     Playlist item to start at.
 266     playlistend:       Playlist item to end at.
 267     playlist_items:    Specific indices of playlist to download.
 268     playlistreverse:   Download playlist items in reverse order.
 269     playlistrandom:    Download playlist items in random order.
 270     matchtitle:        Download only matching titles.
 271     rejecttitle:       Reject downloads for matching titles.
 272     logger:            Log messages to a logging.Logger instance.
 273     logtostderr:       Log messages to stderr instead of stdout.
 274     consoletitle:       Display progress in console window's titlebar.
 275     writedescription:  Write the video description to a .description file
 276     writeinfojson:     Write the video description to a .info.json file
 277     clean_infojson:    Remove private fields from the infojson
 278     getcomments:       Extract video comments. This will not be written to disk
 279                        unless writeinfojson is also given
 280     writeannotations:  Write the video annotations to a .annotations.xml file
 281     writethumbnail:    Write the thumbnail image to a file
 282     allow_playlist_files: Whether to write playlists' description, infojson etc
 283                        also to disk when using the 'write*' options
 284     write_all_thumbnails:  Write all thumbnail formats to files
 285     writelink:         Write an internet shortcut file, depending on the
 286                        current platform (.url/.webloc/.desktop)
 287     writeurllink:      Write a Windows internet shortcut file (.url)
 288     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 289     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 290     writesubtitles:    Write the video subtitles to a file
 291     writeautomaticsub: Write the automatically generated subtitles to a file
 292     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 293                        Downloads all the subtitles of the video
 294                        (requires writesubtitles or writeautomaticsub)
 295     listsubtitles:     Lists all available subtitles for the video
 296     subtitlesformat:   The format code for subtitles
 297     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 298                        The list may contain "all" to refer to all the available
 299                        subtitles. The language can be prefixed with a "-" to
 300                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 301     keepvideo:         Keep the video file after post-processing
 302     daterange:         A DateRange object, download only if the upload_date is in the range.
 303     skip_download:     Skip the actual download of the video file
 304     cachedir:          Location of the cache files in the filesystem.
 305                        False to disable filesystem cache.
 306     noplaylist:        Download single video instead of a playlist if in doubt.
 307     age_limit:         An integer representing the user's age in years.
 308                        Unsuitable videos for the given age are skipped.
 309     min_views:         An integer representing the minimum view count the video
 310                        must have in order to not be skipped.
 311                        Videos without view count information are always
 312                        downloaded. None for no limit.
 313     max_views:         An integer representing the maximum view count.
 314                        Videos that are more popular than that are not
 315                        downloaded.
 316                        Videos without view count information are always
 317                        downloaded. None for no limit.
 318     download_archive:  File name of a file where all downloads are recorded.
 319                        Videos already present in the file are not downloaded
 320                        again.
 321     break_on_existing: Stop the download process after attempting to download a
 322                        file that is in the archive.
 323     break_on_reject:   Stop the download process when encountering a video that
 324                        has been filtered out.
 325     break_per_url:     Whether break_on_reject and break_on_existing
 326                        should act on each input URL as opposed to for the entire queue
 327     cookiefile:        File name where cookies should be read from and dumped to
 328     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 329                        name/pathfrom where cookies are loaded, and the name of the
 330                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 331     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 332                        support RFC 5746 secure renegotiation
 333     nocheckcertificate:  Do not verify SSL certificates
 334     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 335                        At the moment, this is only supported by YouTube.
 336     http_headers:      A dictionary of custom headers to be used for all requests
 337     proxy:             URL of the proxy server to use
 338     geo_verification_proxy:  URL of the proxy to use for IP address verification
 339                        on geo-restricted sites.
 340     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 341     bidi_workaround:   Work around buggy terminals without bidirectional text
 342                        support, using fridibi
 343     debug_printtraffic:Print out sent and received HTTP traffic
 344     include_ads:       Download ads as well (deprecated)
 345     default_search:    Prepend this string if an input url is not valid.
 346                        'auto' for elaborate guessing
 347     encoding:          Use this encoding instead of the system-specified.
 348     extract_flat:      Do not resolve URLs, return the immediate result.
 349                        Pass in 'in_playlist' to only show this behavior for
 350                        playlist items.
 351     wait_for_video:    If given, wait for scheduled streams to become available.
 352                        The value should be a tuple containing the range
 353                        (min_secs, max_secs) to wait between retries
 354     postprocessors:    A list of dictionaries, each with an entry
 355                        * key:  The name of the postprocessor. See
 356                                yt_dlp/postprocessor/__init__.py for a list.
 357                        * when: When to run the postprocessor. Allowed values are
 358                                the entries of utils.POSTPROCESS_WHEN
 359                                Assumed to be 'post_process' if not given
 360     post_hooks:        Deprecated - Register a custom postprocessor instead
 361                        A list of functions that get called as the final step
 362                        for each video file, after all postprocessors have been
 363                        called. The filename will be passed as the only argument.
 364     progress_hooks:    A list of functions that get called on download
 365                        progress, with a dictionary with the entries
 366                        * status: One of "downloading", "error", or "finished".
 367                                  Check this first and ignore unknown values.
 368                        * info_dict: The extracted info_dict
 369
 370                        If status is one of "downloading", or "finished", the
 371                        following properties may also be present:
 372                        * filename: The final filename (always present)
 373                        * tmpfilename: The filename we're currently writing to
 374                        * downloaded_bytes: Bytes on disk
 375                        * total_bytes: Size of the whole file, None if unknown
 376                        * total_bytes_estimate: Guess of the eventual file size,
 377                                                None if unavailable.
 378                        * elapsed: The number of seconds since download started.
 379                        * eta: The estimated time in seconds, None if unknown
 380                        * speed: The download speed in bytes/second, None if
 381                                 unknown
 382                        * fragment_index: The counter of the currently
 383                                          downloaded video fragment.
 384                        * fragment_count: The number of fragments (= individual
 385                                          files that will be merged)
 386
 387                        Progress hooks are guaranteed to be called at least once
 388                        (with status "finished") if the download is successful.
 389     postprocessor_hooks:  A list of functions that get called on postprocessing
 390                        progress, with a dictionary with the entries
 391                        * status: One of "started", "processing", or "finished".
 392                                  Check this first and ignore unknown values.
 393                        * postprocessor: Name of the postprocessor
 394                        * info_dict: The extracted info_dict
 395
 396                        Progress hooks are guaranteed to be called at least twice
 397                        (with status "started" and "finished") if the processing is successful.
 398     merge_output_format: Extension to use when merging formats.
 399     final_ext:         Expected final extension; used to detect when the file was
 400                        already downloaded and converted
 401     fixup:             Automatically correct known faults of the file.
 402                        One of:
 403                        - "never": do nothing
 404                        - "warn": only emit a warning
 405                        - "detect_or_warn": check whether we can do anything
 406                                            about it, warn otherwise (default)
 407     source_address:    Client-side IP address to bind to.
 408     call_home:         Boolean, true iff we are allowed to contact the
 409                        yt-dlp servers for debugging. (BROKEN)
 410     sleep_interval_requests: Number of seconds to sleep between requests
 411                        during extraction
 412     sleep_interval:    Number of seconds to sleep before each download when
 413                        used alone or a lower bound of a range for randomized
 414                        sleep before each download (minimum possible number
 415                        of seconds to sleep) when used along with
 416                        max_sleep_interval.
 417     max_sleep_interval:Upper bound of a range for randomized sleep before each
 418                        download (maximum possible number of seconds to sleep).
 419                        Must only be used along with sleep_interval.
 420                        Actual sleep time will be a random float from range
 421                        [sleep_interval; max_sleep_interval].
 422     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 423     listformats:       Print an overview of available video formats and exit.
 424     list_thumbnails:   Print a table of all thumbnails and exit.
 425     match_filter:      A function that gets called with the info_dict of
 426                        every video.
 427                        If it returns a message, the video is ignored.
 428                        If it returns None, the video is downloaded.
 429                        match_filter_func in utils.py is one example for this.
 430     no_color:          Do not emit color codes in output.
 431     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 432                        HTTP header
 433     geo_bypass_country:
 434                        Two-letter ISO 3166-2 country code that will be used for
 435                        explicit geographic restriction bypassing via faking
 436                        X-Forwarded-For HTTP header
 437     geo_bypass_ip_block:
 438                        IP range in CIDR notation that will be used similarly to
 439                        geo_bypass_country
 440
 441     The following options determine which downloader is picked:
 442     external_downloader: A dictionary of protocol keys and the executable of the
 443                        external downloader to use for it. The allowed protocols
 444                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 445                        Set the value to 'native' to use the native downloader
 446     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 447                        or {'m3u8': 'ffmpeg'} instead.
 448                        Use the native HLS downloader instead of ffmpeg/avconv
 449                        if True, otherwise use ffmpeg/avconv if False, otherwise
 450                        use downloader suggested by extractor if None.
 451     compat_opts:       Compatibility options. See "Differences in default behavior".
 452                        The following options do not work when used through the API:
 453                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 454                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 455                        Refer __init__.py for their implementation
 456     progress_template: Dictionary of templates for progress outputs.
 457                        Allowed keys are 'download', 'postprocess',
 458                        'download-title' (console title) and 'postprocess-title'.
 459                        The template is mapped on a dictionary with keys 'progress' and 'info'
 460
 461     The following parameters are not used by YoutubeDL itself, they are used by
 462     the downloader (see yt_dlp/downloader/common.py):
 463     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 464     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 465     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 466     external_downloader_args, concurrent_fragment_downloads.
 467
 468     The following options are used by the post processors:
 469     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 470                        otherwise prefer ffmpeg. (avconv support is deprecated)
 471     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 472                        to the binary or its containing directory.
 473     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 474                        and a list of additional command-line arguments for the
 475                        postprocessor/executable. The dict can also have "PP+EXE" keys
 476                        which are used when the given exe is used by the given PP.
 477                        Use 'default' as the name for arguments to passed to all PP
 478                        For compatibility with youtube-dl, a single list of args
 479                        can also be used
 480
 481     The following options are used by the extractors:
 482     extractor_retries: Number of times to retry for known errors
 483     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 484     hls_split_discontinuity: Split HLS playlists to different formats at
 485                        discontinuities such as ad breaks (default: False)
 486     extractor_args:    A dictionary of arguments to be passed to the extractors.
 487                        See "EXTRACTOR ARGUMENTS" for details.
 488                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 489     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 490     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 491                        If True (default), DASH manifests and related
 492                        data will be downloaded and processed by extractor.
 493                        You can reduce network I/O by disabling it if you don't
 494                        care about DASH. (only for youtube)
 495     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 496                        If True (default), HLS manifests and related
 497                        data will be downloaded and processed by extractor.
 498                        You can reduce network I/O by disabling it if you don't
 499                        care about HLS. (only for youtube)
 500     """
 501
 502     _NUMERIC_FIELDS = set((
 503         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 504         'timestamp', 'release_timestamp',
 505         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 506         'average_rating', 'comment_count', 'age_limit',
 507         'start_time', 'end_time',
 508         'chapter_number', 'season_number', 'episode_number',
 509         'track_number', 'disc_number', 'release_year',
 510     ))
 511
 512     _format_selection_exts = {
 513         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 514         'video': {'mp4', 'flv', 'webm', '3gp'},
 515         'storyboards': {'mhtml'},
 516     }
 517
 518     params = None
 519     _ies = {}
 520     _pps = {k: [] for k in POSTPROCESS_WHEN}
 521     _printed_messages = set()
 522     _first_webpage_request = True
 523     _download_retcode = None
 524     _num_downloads = None
 525     _playlist_level = 0
 526     _playlist_urls = set()
 527     _screen_file = None
 528
 529     def __init__(self, params=None, auto_init=True):
 530         """Create a FileDownloader object with the given options.
 531         @param auto_init    Whether to load the default extractors and print header (if verbose).
 532                             Set to 'no_verbose_header' to not print the header
 533         """
 534         if params is None:
 535             params = {}
 536         self._ies = {}
 537         self._ies_instances = {}
 538         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 539         self._printed_messages = set()
 540         self._first_webpage_request = True
 541         self._post_hooks = []
 542         self._progress_hooks = []
 543         self._postprocessor_hooks = []
 544         self._download_retcode = 0
 545         self._num_downloads = 0
 546         self._num_videos = 0
 547         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 548         self._err_file = sys.stderr
 549         self.params = params
 550         self.cache = Cache(self)
 551
 552         windows_enable_vt_mode()
 553         self._allow_colors = {
 554             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 555             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 556         }
 557
 558         if sys.version_info < (3, 6):
 559             self.report_warning(
 560                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 561
 562         if self.params.get('allow_unplayable_formats'):
 563             self.report_warning(
 564                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 565                 'This is a developer option intended for debugging. \n'
 566                 '         If you experience any issues while using this option, '
 567                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 568
 569         def check_deprecated(param, option, suggestion):
 570             if self.params.get(param) is not None:
 571                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 572                 return True
 573             return False
 574
 575         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 576             if self.params.get('geo_verification_proxy') is None:
 577                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 578
 579         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 580         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 581         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 582
 583         for msg in self.params.get('_warnings', []):
 584             self.report_warning(msg)
 585         for msg in self.params.get('_deprecation_warnings', []):
 586             self.deprecation_warning(msg)
 587
 588         if 'list-formats' in self.params.get('compat_opts', []):
 589             self.params['listformats_table'] = False
 590
 591         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 592             # nooverwrites was unnecessarily changed to overwrites
 593             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 594             # This ensures compatibility with both keys
 595             self.params['overwrites'] = not self.params['nooverwrites']
 596         elif self.params.get('overwrites') is None:
 597             self.params.pop('overwrites', None)
 598         else:
 599             self.params['nooverwrites'] = not self.params['overwrites']
 600
 601         self.params.setdefault('forceprint', {})
 602         self.params.setdefault('print_to_file', {})
 603
 604         # Compatibility with older syntax
 605         if not isinstance(params['forceprint'], dict):
 606             self.params['forceprint'] = {'video': params['forceprint']}
 607
 608         if self.params.get('bidi_workaround', False):
 609             try:
 610                 import pty
 611                 master, slave = pty.openpty()
 612                 width = compat_get_terminal_size().columns
 613                 if width is None:
 614                     width_args = []
 615                 else:
 616                     width_args = ['-w', str(width)]
 617                 sp_kwargs = dict(
 618                     stdin=subprocess.PIPE,
 619                     stdout=slave,
 620                     stderr=self._err_file)
 621                 try:
 622                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 623                 except OSError:
 624                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 625                 self._output_channel = os.fdopen(master, 'rb')
 626             except OSError as ose:
 627                 if ose.errno == errno.ENOENT:
 628                     self.report_warning(
 629                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 630                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 631                 else:
 632                     raise
 633
 634         if (sys.platform != 'win32'
 635                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 636                 and not self.params.get('restrictfilenames', False)):
 637             # Unicode filesystem API will throw errors (#1474, #13027)
 638             self.report_warning(
 639                 'Assuming --restrict-filenames since file system encoding '
 640                 'cannot encode all characters. '
 641                 'Set the LC_ALL environment variable to fix this.')
 642             self.params['restrictfilenames'] = True
 643
 644         self.outtmpl_dict = self.parse_outtmpl()
 645
 646         # Creating format selector here allows us to catch syntax errors before the extraction
 647         self.format_selector = (
 648             self.params.get('format') if self.params.get('format') in (None, '-')
 649             else self.params['format'] if callable(self.params['format'])
 650             else self.build_format_selector(self.params['format']))
 651
 652         # Set http_headers defaults according to std_headers
 653         self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
 654
 655         self._setup_opener()
 656
 657         if auto_init:
 658             if auto_init != 'no_verbose_header':
 659                 self.print_debug_header()
 660             self.add_default_info_extractors()
 661
 662         hooks = {
 663             'post_hooks': self.add_post_hook,
 664             'progress_hooks': self.add_progress_hook,
 665             'postprocessor_hooks': self.add_postprocessor_hook,
 666         }
 667         for opt, fn in hooks.items():
 668             for ph in self.params.get(opt, []):
 669                 fn(ph)
 670
 671         for pp_def_raw in self.params.get('postprocessors', []):
 672             pp_def = dict(pp_def_raw)
 673             when = pp_def.pop('when', 'post_process')
 674             self.add_post_processor(
 675                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 676                 when=when)
 677
 678         register_socks_protocols()
 679
 680         def preload_download_archive(fn):
 681             """Preload the archive, if any is specified"""
 682             if fn is None:
 683                 return False
 684             self.write_debug(f'Loading archive file {fn!r}')
 685             try:
 686                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 687                     for line in archive_file:
 688                         self.archive.add(line.strip())
 689             except IOError as ioe:
 690                 if ioe.errno != errno.ENOENT:
 691                     raise
 692                 return False
 693             return True
 694
 695         self.archive = set()
 696         preload_download_archive(self.params.get('download_archive'))
 697
 698     def warn_if_short_id(self, argv):
 699         # short YouTube ID starting with dash?
 700         idxs = [
 701             i for i, a in enumerate(argv)
 702             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 703         if idxs:
 704             correct_argv = (
 705                 ['yt-dlp']
 706                 + [a for i, a in enumerate(argv) if i not in idxs]
 707                 + ['--'] + [argv[i] for i in idxs]
 708             )
 709             self.report_warning(
 710                 'Long argument string detected. '
 711                 'Use -- to separate parameters and URLs, like this:\n%s' %
 712                 args_to_str(correct_argv))
 713
 714     def add_info_extractor(self, ie):
 715         """Add an InfoExtractor object to the end of the list."""
 716         ie_key = ie.ie_key()
 717         self._ies[ie_key] = ie
 718         if not isinstance(ie, type):
 719             self._ies_instances[ie_key] = ie
 720             ie.set_downloader(self)
 721
 722     def _get_info_extractor_class(self, ie_key):
 723         ie = self._ies.get(ie_key)
 724         if ie is None:
 725             ie = get_info_extractor(ie_key)
 726             self.add_info_extractor(ie)
 727         return ie
 728
 729     def get_info_extractor(self, ie_key):
 730         """
 731         Get an instance of an IE with name ie_key, it will try to get one from
 732         the _ies list, if there's no instance it will create a new one and add
 733         it to the extractor list.
 734         """
 735         ie = self._ies_instances.get(ie_key)
 736         if ie is None:
 737             ie = get_info_extractor(ie_key)()
 738             self.add_info_extractor(ie)
 739         return ie
 740
 741     def add_default_info_extractors(self):
 742         """
 743         Add the InfoExtractors returned by gen_extractors to the end of the list
 744         """
 745         for ie in gen_extractor_classes():
 746             self.add_info_extractor(ie)
 747
 748     def add_post_processor(self, pp, when='post_process'):
 749         """Add a PostProcessor object to the end of the chain."""
 750         self._pps[when].append(pp)
 751         pp.set_downloader(self)
 752
 753     def add_post_hook(self, ph):
 754         """Add the post hook"""
 755         self._post_hooks.append(ph)
 756
 757     def add_progress_hook(self, ph):
 758         """Add the download progress hook"""
 759         self._progress_hooks.append(ph)
 760
 761     def add_postprocessor_hook(self, ph):
 762         """Add the postprocessing progress hook"""
 763         self._postprocessor_hooks.append(ph)
 764         for pps in self._pps.values():
 765             for pp in pps:
 766                 pp.add_progress_hook(ph)
 767
 768     def _bidi_workaround(self, message):
 769         if not hasattr(self, '_output_channel'):
 770             return message
 771
 772         assert hasattr(self, '_output_process')
 773         assert isinstance(message, compat_str)
 774         line_count = message.count('\n') + 1
 775         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 776         self._output_process.stdin.flush()
 777         res = ''.join(self._output_channel.readline().decode('utf-8')
 778                       for _ in range(line_count))
 779         return res[:-len('\n')]
 780
 781     def _write_string(self, message, out=None, only_once=False):
 782         if only_once:
 783             if message in self._printed_messages:
 784                 return
 785             self._printed_messages.add(message)
 786         write_string(message, out=out, encoding=self.params.get('encoding'))
 787
 788     def to_stdout(self, message, skip_eol=False, quiet=False):
 789         """Print message to stdout"""
 790         if self.params.get('logger'):
 791             self.params['logger'].debug(message)
 792         elif not quiet or self.params.get('verbose'):
 793             self._write_string(
 794                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 795                 self._err_file if quiet else self._screen_file)
 796
 797     def to_stderr(self, message, only_once=False):
 798         """Print message to stderr"""
 799         assert isinstance(message, compat_str)
 800         if self.params.get('logger'):
 801             self.params['logger'].error(message)
 802         else:
 803             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 804
 805     def to_console_title(self, message):
 806         if not self.params.get('consoletitle', False):
 807             return
 808         message = remove_terminal_sequences(message)
 809         if compat_os_name == 'nt':
 810             if ctypes.windll.kernel32.GetConsoleWindow():
 811                 # c_wchar_p() might not be necessary if `message` is
 812                 # already of type unicode()
 813                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 814         elif 'TERM' in os.environ:
 815             self._write_string('\033]0;%s\007' % message, self._screen_file)
 816
 817     def save_console_title(self):
 818         if not self.params.get('consoletitle', False):
 819             return
 820         if self.params.get('simulate'):
 821             return
 822         if compat_os_name != 'nt' and 'TERM' in os.environ:
 823             # Save the title on stack
 824             self._write_string('\033[22;0t', self._screen_file)
 825
 826     def restore_console_title(self):
 827         if not self.params.get('consoletitle', False):
 828             return
 829         if self.params.get('simulate'):
 830             return
 831         if compat_os_name != 'nt' and 'TERM' in os.environ:
 832             # Restore the title from stack
 833             self._write_string('\033[23;0t', self._screen_file)
 834
 835     def __enter__(self):
 836         self.save_console_title()
 837         return self
 838
 839     def __exit__(self, *args):
 840         self.restore_console_title()
 841
 842         if self.params.get('cookiefile') is not None:
 843             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 844
 845     def trouble(self, message=None, tb=None, is_error=True):
 846         """Determine action to take when a download problem appears.
 847
 848         Depending on if the downloader has been configured to ignore
 849         download errors or not, this method may throw an exception or
 850         not when errors are found, after printing the message.
 851
 852         @param tb          If given, is additional traceback information
 853         @param is_error    Whether to raise error according to ignorerrors
 854         """
 855         if message is not None:
 856             self.to_stderr(message)
 857         if self.params.get('verbose'):
 858             if tb is None:
 859                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 860                     tb = ''
 861                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 862                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 863                     tb += encode_compat_str(traceback.format_exc())
 864                 else:
 865                     tb_data = traceback.format_list(traceback.extract_stack())
 866                     tb = ''.join(tb_data)
 867             if tb:
 868                 self.to_stderr(tb)
 869         if not is_error:
 870             return
 871         if not self.params.get('ignoreerrors'):
 872             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 873                 exc_info = sys.exc_info()[1].exc_info
 874             else:
 875                 exc_info = sys.exc_info()
 876             raise DownloadError(message, exc_info)
 877         self._download_retcode = 1
 878
 879     def to_screen(self, message, skip_eol=False):
 880         """Print message to stdout if not in quiet mode"""
 881         self.to_stdout(
 882             message, skip_eol, quiet=self.params.get('quiet', False))
 883
 884     class Styles(Enum):
 885         HEADERS = 'yellow'
 886         EMPHASIS = 'light blue'
 887         ID = 'green'
 888         DELIM = 'blue'
 889         ERROR = 'red'
 890         WARNING = 'yellow'
 891         SUPPRESS = 'light black'
 892
 893     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 894         if test_encoding:
 895             original_text = text
 896             # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
 897             encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
 898             text = text.encode(encoding, 'ignore').decode(encoding)
 899             if fallback is not None and text != original_text:
 900                 text = fallback
 901         if isinstance(f, self.Styles):
 902             f = f.value
 903         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 904
 905     def _format_screen(self, *args, **kwargs):
 906         return self._format_text(
 907             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 908
 909     def _format_err(self, *args, **kwargs):
 910         return self._format_text(
 911             self._err_file, self._allow_colors['err'], *args, **kwargs)
 912
 913     def report_warning(self, message, only_once=False):
 914         '''
 915         Print the message to stderr, it will be prefixed with 'WARNING:'
 916         If stderr is a tty file the 'WARNING:' will be colored
 917         '''
 918         if self.params.get('logger') is not None:
 919             self.params['logger'].warning(message)
 920         else:
 921             if self.params.get('no_warnings'):
 922                 return
 923             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 924
 925     def deprecation_warning(self, message):
 926         if self.params.get('logger') is not None:
 927             self.params['logger'].warning('DeprecationWarning: {message}')
 928         else:
 929             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 930
 931     def report_error(self, message, *args, **kwargs):
 932         '''
 933         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 934         in red if stderr is a tty file.
 935         '''
 936         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 937
 938     def write_debug(self, message, only_once=False):
 939         '''Log debug message or Print message to stderr'''
 940         if not self.params.get('verbose', False):
 941             return
 942         message = '[debug] %s' % message
 943         if self.params.get('logger'):
 944             self.params['logger'].debug(message)
 945         else:
 946             self.to_stderr(message, only_once)
 947
 948     def report_file_already_downloaded(self, file_name):
 949         """Report file has already been fully downloaded."""
 950         try:
 951             self.to_screen('[download] %s has already been downloaded' % file_name)
 952         except UnicodeEncodeError:
 953             self.to_screen('[download] The file has already been downloaded')
 954
 955     def report_file_delete(self, file_name):
 956         """Report that existing file will be deleted."""
 957         try:
 958             self.to_screen('Deleting existing file %s' % file_name)
 959         except UnicodeEncodeError:
 960             self.to_screen('Deleting existing file')
 961
 962     def raise_no_formats(self, info, forced=False, *, msg=None):
 963         has_drm = info.get('__has_drm')
 964         ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
 965         msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
 966         if forced or not ignored:
 967             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 968                                  expected=has_drm or ignored or expected)
 969         else:
 970             self.report_warning(msg)
 971
 972     def parse_outtmpl(self):
 973         outtmpl_dict = self.params.get('outtmpl', {})
 974         if not isinstance(outtmpl_dict, dict):
 975             outtmpl_dict = {'default': outtmpl_dict}
 976         # Remove spaces in the default template
 977         if self.params.get('restrictfilenames'):
 978             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 979         else:
 980             sanitize = lambda x: x
 981         outtmpl_dict.update({
 982             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 983             if outtmpl_dict.get(k) is None})
 984         for key, val in outtmpl_dict.items():
 985             if isinstance(val, bytes):
 986                 self.report_warning(
 987                     'Parameter outtmpl is bytes, but should be a unicode string. '
 988                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 989         return outtmpl_dict
 990
 991     def get_output_path(self, dir_type='', filename=None):
 992         paths = self.params.get('paths', {})
 993         assert isinstance(paths, dict)
 994         path = os.path.join(
 995             expand_path(paths.get('home', '').strip()),
 996             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 997             filename or '')
 998
 999         # Temporary fix for #4787
1000         # 'Treat' all problem characters by passing filename through preferredencoding
1001         # to workaround encoding issues with subprocess on python2 @ Windows
1002         if sys.version_info < (3, 0) and sys.platform == 'win32':
1003             path = encodeFilename(path, True).decode(preferredencoding())
1004         return sanitize_path(path, force=self.params.get('windowsfilenames'))
1005
1006     @staticmethod
1007     def _outtmpl_expandpath(outtmpl):
1008         # expand_path translates '%%' into '%' and '$$' into '$'
1009         # correspondingly that is not what we want since we need to keep
1010         # '%%' intact for template dict substitution step. Working around
1011         # with boundary-alike separator hack.
1012         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1013         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1014
1015         # outtmpl should be expand_path'ed before template dict substitution
1016         # because meta fields may contain env variables we don't want to
1017         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1018         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1019         return expand_path(outtmpl).replace(sep, '')
1020
1021     @staticmethod
1022     def escape_outtmpl(outtmpl):
1023         ''' Escape any remaining strings like %s, %abc% etc. '''
1024         return re.sub(
1025             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1026             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1027             outtmpl)
1028
1029     @classmethod
1030     def validate_outtmpl(cls, outtmpl):
1031         ''' @return None or Exception object '''
1032         outtmpl = re.sub(
1033             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1034             lambda mobj: f'{mobj.group(0)[:-1]}s',
1035             cls._outtmpl_expandpath(outtmpl))
1036         try:
1037             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1038             return None
1039         except ValueError as err:
1040             return err
1041
1042     @staticmethod
1043     def _copy_infodict(info_dict):
1044         info_dict = dict(info_dict)
1045         info_dict.pop('__postprocessors', None)
1046         return info_dict
1047
1048     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1049         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1050         @param sanitize    Whether to sanitize the output as a filename.
1051                            For backward compatibility, a function can also be passed
1052         """
1053
1054         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1055
1056         info_dict = self._copy_infodict(info_dict)
1057         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1058             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1059             if info_dict.get('duration', None) is not None
1060             else None)
1061         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1062         info_dict['video_autonumber'] = self._num_videos
1063         if info_dict.get('resolution') is None:
1064             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1065
1066         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1067         # of %(field)s to %(field)0Nd for backward compatibility
1068         field_size_compat_map = {
1069             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1070             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1071             'autonumber': self.params.get('autonumber_size') or 5,
1072         }
1073
1074         TMPL_DICT = {}
1075         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1076         MATH_FUNCTIONS = {
1077             '+': float.__add__,
1078             '-': float.__sub__,
1079         }
1080         # Field is of the form key1.key2...
1081         # where keys (except first) can be string, int or slice
1082         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1083         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1084         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1085         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1086             (?P<negate>-)?
1087             (?P<fields>{field})
1088             (?P<maths>(?:{math_op}{math_field})*)
1089             (?:>(?P<strf_format>.+?))?
1090             (?P<alternate>(?<!\\),[^|&)]+)?
1091             (?:&(?P<replacement>.*?))?
1092             (?:\|(?P<default>.*?))?
1093             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1094
1095         def _traverse_infodict(k):
1096             k = k.split('.')
1097             if k[0] == '':
1098                 k.pop(0)
1099             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1100
1101         def get_value(mdict):
1102             # Object traversal
1103             value = _traverse_infodict(mdict['fields'])
1104             # Negative
1105             if mdict['negate']:
1106                 value = float_or_none(value)
1107                 if value is not None:
1108                     value *= -1
1109             # Do maths
1110             offset_key = mdict['maths']
1111             if offset_key:
1112                 value = float_or_none(value)
1113                 operator = None
1114                 while offset_key:
1115                     item = re.match(
1116                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1117                         offset_key).group(0)
1118                     offset_key = offset_key[len(item):]
1119                     if operator is None:
1120                         operator = MATH_FUNCTIONS[item]
1121                         continue
1122                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1123                     offset = float_or_none(item)
1124                     if offset is None:
1125                         offset = float_or_none(_traverse_infodict(item))
1126                     try:
1127                         value = operator(value, multiplier * offset)
1128                     except (TypeError, ZeroDivisionError):
1129                         return None
1130                     operator = None
1131             # Datetime formatting
1132             if mdict['strf_format']:
1133                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1134
1135             return value
1136
1137         na = self.params.get('outtmpl_na_placeholder', 'NA')
1138
1139         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1140             return sanitize_filename(str(value), restricted=restricted,
1141                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1142
1143         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1144         sanitize = bool(sanitize)
1145
1146         def _dumpjson_default(obj):
1147             if isinstance(obj, (set, LazyList)):
1148                 return list(obj)
1149             return repr(obj)
1150
1151         def create_key(outer_mobj):
1152             if not outer_mobj.group('has_key'):
1153                 return outer_mobj.group(0)
1154             key = outer_mobj.group('key')
1155             mobj = re.match(INTERNAL_FORMAT_RE, key)
1156             initial_field = mobj.group('fields') if mobj else ''
1157             value, replacement, default = None, None, na
1158             while mobj:
1159                 mobj = mobj.groupdict()
1160                 default = mobj['default'] if mobj['default'] is not None else default
1161                 value = get_value(mobj)
1162                 replacement = mobj['replacement']
1163                 if value is None and mobj['alternate']:
1164                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1165                 else:
1166                     break
1167
1168             fmt = outer_mobj.group('format')
1169             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1170                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1171
1172             value = default if value is None else value if replacement is None else replacement
1173
1174             flags = outer_mobj.group('conversion') or ''
1175             str_fmt = f'{fmt[:-1]}s'
1176             if fmt[-1] == 'l':  # list
1177                 delim = '\n' if '#' in flags else ', '
1178                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1179             elif fmt[-1] == 'j':  # json
1180                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1181             elif fmt[-1] == 'q':  # quoted
1182                 value = map(str, variadic(value) if '#' in flags else [value])
1183                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1184             elif fmt[-1] == 'B':  # bytes
1185                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1186                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1187             elif fmt[-1] == 'U':  # unicode normalized
1188                 value, fmt = unicodedata.normalize(
1189                     # "+" = compatibility equivalence, "#" = NFD
1190                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1191                     value), str_fmt
1192             elif fmt[-1] == 'D':  # decimal suffix
1193                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1194                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1195                                               factor=1024 if '#' in flags else 1000)
1196             elif fmt[-1] == 'S':  # filename sanitization
1197                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1198             elif fmt[-1] == 'c':
1199                 if value:
1200                     value = str(value)[0]
1201                 else:
1202                     fmt = str_fmt
1203             elif fmt[-1] not in 'rs':  # numeric
1204                 value = float_or_none(value)
1205                 if value is None:
1206                     value, fmt = default, 's'
1207
1208             if sanitize:
1209                 if fmt[-1] == 'r':
1210                     # If value is an object, sanitize might convert it to a string
1211                     # So we convert it to repr first
1212                     value, fmt = repr(value), str_fmt
1213                 if fmt[-1] in 'csr':
1214                     value = sanitizer(initial_field, value)
1215
1216             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1217             TMPL_DICT[key] = value
1218             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1219
1220         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1221
1222     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1223         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1224         return self.escape_outtmpl(outtmpl) % info_dict
1225
1226     def _prepare_filename(self, info_dict, tmpl_type='default'):
1227         try:
1228             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1229             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1230             if not filename:
1231                 return None
1232
1233             if tmpl_type in ('default', 'temp'):
1234                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1235                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1236                     filename = replace_extension(filename, ext, final_ext)
1237             else:
1238                 force_ext = OUTTMPL_TYPES[tmpl_type]
1239                 if force_ext:
1240                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1241
1242             # https://github.com/blackjack4494/youtube-dlc/issues/85
1243             trim_file_name = self.params.get('trim_file_name', False)
1244             if trim_file_name:
1245                 no_ext, *ext = filename.rsplit('.', 2)
1246                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1247
1248             return filename
1249         except ValueError as err:
1250             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1251             return None
1252
1253     def prepare_filename(self, info_dict, dir_type='', warn=False):
1254         """Generate the output filename."""
1255
1256         filename = self._prepare_filename(info_dict, dir_type or 'default')
1257         if not filename and dir_type not in ('', 'temp'):
1258             return ''
1259
1260         if warn:
1261             if not self.params.get('paths'):
1262                 pass
1263             elif filename == '-':
1264                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1265             elif os.path.isabs(filename):
1266                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1267         if filename == '-' or not filename:
1268             return filename
1269
1270         return self.get_output_path(dir_type, filename)
1271
1272     def _match_entry(self, info_dict, incomplete=False, silent=False):
1273         """ Returns None if the file should be downloaded """
1274
1275         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1276
1277         def check_filter():
1278             if 'title' in info_dict:
1279                 # This can happen when we're just evaluating the playlist
1280                 title = info_dict['title']
1281                 matchtitle = self.params.get('matchtitle', False)
1282                 if matchtitle:
1283                     if not re.search(matchtitle, title, re.IGNORECASE):
1284                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1285                 rejecttitle = self.params.get('rejecttitle', False)
1286                 if rejecttitle:
1287                     if re.search(rejecttitle, title, re.IGNORECASE):
1288                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1289             date = info_dict.get('upload_date')
1290             if date is not None:
1291                 dateRange = self.params.get('daterange', DateRange())
1292                 if date not in dateRange:
1293                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1294             view_count = info_dict.get('view_count')
1295             if view_count is not None:
1296                 min_views = self.params.get('min_views')
1297                 if min_views is not None and view_count < min_views:
1298                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1299                 max_views = self.params.get('max_views')
1300                 if max_views is not None and view_count > max_views:
1301                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1302             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1303                 return 'Skipping "%s" because it is age restricted' % video_title
1304
1305             match_filter = self.params.get('match_filter')
1306             if match_filter is not None:
1307                 try:
1308                     ret = match_filter(info_dict, incomplete=incomplete)
1309                 except TypeError:
1310                     # For backward compatibility
1311                     ret = None if incomplete else match_filter(info_dict)
1312                 if ret is not None:
1313                     return ret
1314             return None
1315
1316         if self.in_download_archive(info_dict):
1317             reason = '%s has already been recorded in the archive' % video_title
1318             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1319         else:
1320             reason = check_filter()
1321             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1322         if reason is not None:
1323             if not silent:
1324                 self.to_screen('[download] ' + reason)
1325             if self.params.get(break_opt, False):
1326                 raise break_err()
1327         return reason
1328
1329     @staticmethod
1330     def add_extra_info(info_dict, extra_info):
1331         '''Set the keys from extra_info in info dict if they are missing'''
1332         for key, value in extra_info.items():
1333             info_dict.setdefault(key, value)
1334
1335     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1336                      process=True, force_generic_extractor=False):
1337         """
1338         Return a list with a dictionary for each video extracted.
1339
1340         Arguments:
1341         url -- URL to extract
1342
1343         Keyword arguments:
1344         download -- whether to download videos during extraction
1345         ie_key -- extractor key hint
1346         extra_info -- dictionary containing the extra values to add to each result
1347         process -- whether to resolve all unresolved references (URLs, playlist items),
1348             must be True for download to work.
1349         force_generic_extractor -- force using the generic extractor
1350         """
1351
1352         if extra_info is None:
1353             extra_info = {}
1354
1355         if not ie_key and force_generic_extractor:
1356             ie_key = 'Generic'
1357
1358         if ie_key:
1359             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1360         else:
1361             ies = self._ies
1362
1363         for ie_key, ie in ies.items():
1364             if not ie.suitable(url):
1365                 continue
1366
1367             if not ie.working():
1368                 self.report_warning('The program functionality for this site has been marked as broken, '
1369                                     'and will probably not work.')
1370
1371             temp_id = ie.get_temp_id(url)
1372             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1373                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1374                 if self.params.get('break_on_existing', False):
1375                     raise ExistingVideoReached()
1376                 break
1377             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1378         else:
1379             self.report_error('no suitable InfoExtractor for URL %s' % url)
1380
1381     def __handle_extraction_exceptions(func):
1382         @functools.wraps(func)
1383         def wrapper(self, *args, **kwargs):
1384             while True:
1385                 try:
1386                     return func(self, *args, **kwargs)
1387                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1388                     raise
1389                 except ReExtractInfo as e:
1390                     if e.expected:
1391                         self.to_screen(f'{e}; Re-extracting data')
1392                     else:
1393                         self.to_stderr('\r')
1394                         self.report_warning(f'{e}; Re-extracting data')
1395                     continue
1396                 except GeoRestrictedError as e:
1397                     msg = e.msg
1398                     if e.countries:
1399                         msg += '\nThis video is available in %s.' % ', '.join(
1400                             map(ISO3166Utils.short2full, e.countries))
1401                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1402                     self.report_error(msg)
1403                 except ExtractorError as e:  # An error we somewhat expected
1404                     self.report_error(str(e), e.format_traceback())
1405                 except Exception as e:
1406                     if self.params.get('ignoreerrors'):
1407                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1408                     else:
1409                         raise
1410                 break
1411         return wrapper
1412
1413     def _wait_for_video(self, ie_result):
1414         if (not self.params.get('wait_for_video')
1415                 or ie_result.get('_type', 'video') != 'video'
1416                 or ie_result.get('formats') or ie_result.get('url')):
1417             return
1418
1419         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1420         last_msg = ''
1421
1422         def progress(msg):
1423             nonlocal last_msg
1424             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1425             last_msg = msg
1426
1427         min_wait, max_wait = self.params.get('wait_for_video')
1428         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1429         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1430             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1431             self.report_warning('Release time of video is not known')
1432         elif (diff or 0) <= 0:
1433             self.report_warning('Video should already be available according to extracted info')
1434         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1435         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1436
1437         wait_till = time.time() + diff
1438         try:
1439             while True:
1440                 diff = wait_till - time.time()
1441                 if diff <= 0:
1442                     progress('')
1443                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1444                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1445                 time.sleep(1)
1446         except KeyboardInterrupt:
1447             progress('')
1448             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1449         except BaseException as e:
1450             if not isinstance(e, ReExtractInfo):
1451                 self.to_screen('')
1452             raise
1453
1454     @__handle_extraction_exceptions
1455     def __extract_info(self, url, ie, download, extra_info, process):
1456         ie_result = ie.extract(url)
1457         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1458             return
1459         if isinstance(ie_result, list):
1460             # Backwards compatibility: old IE result format
1461             ie_result = {
1462                 '_type': 'compat_list',
1463                 'entries': ie_result,
1464             }
1465         if extra_info.get('original_url'):
1466             ie_result.setdefault('original_url', extra_info['original_url'])
1467         self.add_default_extra_info(ie_result, ie, url)
1468         if process:
1469             self._wait_for_video(ie_result)
1470             return self.process_ie_result(ie_result, download, extra_info)
1471         else:
1472             return ie_result
1473
1474     def add_default_extra_info(self, ie_result, ie, url):
1475         if url is not None:
1476             self.add_extra_info(ie_result, {
1477                 'webpage_url': url,
1478                 'original_url': url,
1479             })
1480         webpage_url = ie_result.get('webpage_url')
1481         if webpage_url:
1482             self.add_extra_info(ie_result, {
1483                 'webpage_url_basename': url_basename(webpage_url),
1484                 'webpage_url_domain': get_domain(webpage_url),
1485             })
1486         if ie is not None:
1487             self.add_extra_info(ie_result, {
1488                 'extractor': ie.IE_NAME,
1489                 'extractor_key': ie.ie_key(),
1490             })
1491
1492     def process_ie_result(self, ie_result, download=True, extra_info=None):
1493         """
1494         Take the result of the ie(may be modified) and resolve all unresolved
1495         references (URLs, playlist items).
1496
1497         It will also download the videos if 'download'.
1498         Returns the resolved ie_result.
1499         """
1500         if extra_info is None:
1501             extra_info = {}
1502         result_type = ie_result.get('_type', 'video')
1503
1504         if result_type in ('url', 'url_transparent'):
1505             ie_result['url'] = sanitize_url(ie_result['url'])
1506             if ie_result.get('original_url'):
1507                 extra_info.setdefault('original_url', ie_result['original_url'])
1508
1509             extract_flat = self.params.get('extract_flat', False)
1510             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1511                     or extract_flat is True):
1512                 info_copy = ie_result.copy()
1513                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1514                 if ie and not ie_result.get('id'):
1515                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1516                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1517                 self.add_extra_info(info_copy, extra_info)
1518                 info_copy, _ = self.pre_process(info_copy)
1519                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1520                 if self.params.get('force_write_download_archive', False):
1521                     self.record_download_archive(info_copy)
1522                 return ie_result
1523
1524         if result_type == 'video':
1525             self.add_extra_info(ie_result, extra_info)
1526             ie_result = self.process_video_result(ie_result, download=download)
1527             additional_urls = (ie_result or {}).get('additional_urls')
1528             if additional_urls:
1529                 # TODO: Improve MetadataParserPP to allow setting a list
1530                 if isinstance(additional_urls, compat_str):
1531                     additional_urls = [additional_urls]
1532                 self.to_screen(
1533                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1534                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1535                 ie_result['additional_entries'] = [
1536                     self.extract_info(
1537                         url, download, extra_info=extra_info,
1538                         force_generic_extractor=self.params.get('force_generic_extractor'))
1539                     for url in additional_urls
1540                 ]
1541             return ie_result
1542         elif result_type == 'url':
1543             # We have to add extra_info to the results because it may be
1544             # contained in a playlist
1545             return self.extract_info(
1546                 ie_result['url'], download,
1547                 ie_key=ie_result.get('ie_key'),
1548                 extra_info=extra_info)
1549         elif result_type == 'url_transparent':
1550             # Use the information from the embedding page
1551             info = self.extract_info(
1552                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1553                 extra_info=extra_info, download=False, process=False)
1554
1555             # extract_info may return None when ignoreerrors is enabled and
1556             # extraction failed with an error, don't crash and return early
1557             # in this case
1558             if not info:
1559                 return info
1560
1561             force_properties = dict(
1562                 (k, v) for k, v in ie_result.items() if v is not None)
1563             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1564                 if f in force_properties:
1565                     del force_properties[f]
1566             new_result = info.copy()
1567             new_result.update(force_properties)
1568
1569             # Extracted info may not be a video result (i.e.
1570             # info.get('_type', 'video') != video) but rather an url or
1571             # url_transparent. In such cases outer metadata (from ie_result)
1572             # should be propagated to inner one (info). For this to happen
1573             # _type of info should be overridden with url_transparent. This
1574             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1575             if new_result.get('_type') == 'url':
1576                 new_result['_type'] = 'url_transparent'
1577
1578             return self.process_ie_result(
1579                 new_result, download=download, extra_info=extra_info)
1580         elif result_type in ('playlist', 'multi_video'):
1581             # Protect from infinite recursion due to recursively nested playlists
1582             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1583             webpage_url = ie_result['webpage_url']
1584             if webpage_url in self._playlist_urls:
1585                 self.to_screen(
1586                     '[download] Skipping already downloaded playlist: %s'
1587                     % ie_result.get('title') or ie_result.get('id'))
1588                 return
1589
1590             self._playlist_level += 1
1591             self._playlist_urls.add(webpage_url)
1592             self._fill_common_fields(ie_result, False)
1593             self._sanitize_thumbnails(ie_result)
1594             try:
1595                 return self.__process_playlist(ie_result, download)
1596             finally:
1597                 self._playlist_level -= 1
1598                 if not self._playlist_level:
1599                     self._playlist_urls.clear()
1600         elif result_type == 'compat_list':
1601             self.report_warning(
1602                 'Extractor %s returned a compat_list result. '
1603                 'It needs to be updated.' % ie_result.get('extractor'))
1604
1605             def _fixup(r):
1606                 self.add_extra_info(r, {
1607                     'extractor': ie_result['extractor'],
1608                     'webpage_url': ie_result['webpage_url'],
1609                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1610                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1611                     'extractor_key': ie_result['extractor_key'],
1612                 })
1613                 return r
1614             ie_result['entries'] = [
1615                 self.process_ie_result(_fixup(r), download, extra_info)
1616                 for r in ie_result['entries']
1617             ]
1618             return ie_result
1619         else:
1620             raise Exception('Invalid result type: %s' % result_type)
1621
1622     def _ensure_dir_exists(self, path):
1623         return make_dir(path, self.report_error)
1624
1625     @staticmethod
1626     def _playlist_infodict(ie_result, **kwargs):
1627         return {
1628             **ie_result,
1629             'playlist': ie_result.get('title') or ie_result.get('id'),
1630             'playlist_id': ie_result.get('id'),
1631             'playlist_title': ie_result.get('title'),
1632             'playlist_uploader': ie_result.get('uploader'),
1633             'playlist_uploader_id': ie_result.get('uploader_id'),
1634             'playlist_index': 0,
1635             **kwargs,
1636         }
1637
1638     def __process_playlist(self, ie_result, download):
1639         # We process each entry in the playlist
1640         playlist = ie_result.get('title') or ie_result.get('id')
1641         self.to_screen('[download] Downloading playlist: %s' % playlist)
1642
1643         if 'entries' not in ie_result:
1644             raise EntryNotInPlaylist('There are no entries')
1645
1646         MissingEntry = object()
1647         incomplete_entries = bool(ie_result.get('requested_entries'))
1648         if incomplete_entries:
1649             def fill_missing_entries(entries, indices):
1650                 ret = [MissingEntry] * max(indices)
1651                 for i, entry in zip(indices, entries):
1652                     ret[i - 1] = entry
1653                 return ret
1654             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1655
1656         playlist_results = []
1657
1658         playliststart = self.params.get('playliststart', 1)
1659         playlistend = self.params.get('playlistend')
1660         # For backwards compatibility, interpret -1 as whole list
1661         if playlistend == -1:
1662             playlistend = None
1663
1664         playlistitems_str = self.params.get('playlist_items')
1665         playlistitems = None
1666         if playlistitems_str is not None:
1667             def iter_playlistitems(format):
1668                 for string_segment in format.split(','):
1669                     if '-' in string_segment:
1670                         start, end = string_segment.split('-')
1671                         for item in range(int(start), int(end) + 1):
1672                             yield int(item)
1673                     else:
1674                         yield int(string_segment)
1675             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1676
1677         ie_entries = ie_result['entries']
1678         if isinstance(ie_entries, list):
1679             playlist_count = len(ie_entries)
1680             msg = f'Collected {playlist_count} videos; downloading %d of them'
1681             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1682
1683             def get_entry(i):
1684                 return ie_entries[i - 1]
1685         else:
1686             msg = 'Downloading %d videos'
1687             if not isinstance(ie_entries, (PagedList, LazyList)):
1688                 ie_entries = LazyList(ie_entries)
1689             elif isinstance(ie_entries, InAdvancePagedList):
1690                 if ie_entries._pagesize == 1:
1691                     playlist_count = ie_entries._pagecount
1692
1693             def get_entry(i):
1694                 return YoutubeDL.__handle_extraction_exceptions(
1695                     lambda self, i: ie_entries[i - 1]
1696                 )(self, i)
1697
1698         entries, broken = [], False
1699         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1700         for i in items:
1701             if i == 0:
1702                 continue
1703             if playlistitems is None and playlistend is not None and playlistend < i:
1704                 break
1705             entry = None
1706             try:
1707                 entry = get_entry(i)
1708                 if entry is MissingEntry:
1709                     raise EntryNotInPlaylist()
1710             except (IndexError, EntryNotInPlaylist):
1711                 if incomplete_entries:
1712                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1713                 elif not playlistitems:
1714                     break
1715             entries.append(entry)
1716             try:
1717                 if entry is not None:
1718                     self._match_entry(entry, incomplete=True, silent=True)
1719             except (ExistingVideoReached, RejectedVideoReached):
1720                 broken = True
1721                 break
1722         ie_result['entries'] = entries
1723
1724         # Save playlist_index before re-ordering
1725         entries = [
1726             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1727             for i, entry in enumerate(entries, 1)
1728             if entry is not None]
1729         n_entries = len(entries)
1730
1731         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1732             ie_result['playlist_count'] = n_entries
1733
1734         if not playlistitems and (playliststart != 1 or playlistend):
1735             playlistitems = list(range(playliststart, playliststart + n_entries))
1736         ie_result['requested_entries'] = playlistitems
1737
1738         _infojson_written = False
1739         write_playlist_files = self.params.get('allow_playlist_files', True)
1740         if write_playlist_files and self.params.get('list_thumbnails'):
1741             self.list_thumbnails(ie_result)
1742         if write_playlist_files and not self.params.get('simulate'):
1743             ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1744             _infojson_written = self._write_info_json(
1745                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1746             if _infojson_written is None:
1747                 return
1748             if self._write_description('playlist', ie_result,
1749                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1750                 return
1751             # TODO: This should be passed to ThumbnailsConvertor if necessary
1752             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1753
1754         if self.params.get('playlistreverse', False):
1755             entries = entries[::-1]
1756         if self.params.get('playlistrandom', False):
1757             random.shuffle(entries)
1758
1759         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1760
1761         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1762         failures = 0
1763         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1764         for i, entry_tuple in enumerate(entries, 1):
1765             playlist_index, entry = entry_tuple
1766             if 'playlist-index' in self.params.get('compat_opts', []):
1767                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1768             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1769             # This __x_forwarded_for_ip thing is a bit ugly but requires
1770             # minimal changes
1771             if x_forwarded_for:
1772                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1773             extra = {
1774                 'n_entries': n_entries,
1775                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1776                 'playlist_count': ie_result.get('playlist_count'),
1777                 'playlist_index': playlist_index,
1778                 'playlist_autonumber': i,
1779                 'playlist': playlist,
1780                 'playlist_id': ie_result.get('id'),
1781                 'playlist_title': ie_result.get('title'),
1782                 'playlist_uploader': ie_result.get('uploader'),
1783                 'playlist_uploader_id': ie_result.get('uploader_id'),
1784                 'extractor': ie_result['extractor'],
1785                 'webpage_url': ie_result['webpage_url'],
1786                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1787                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1788                 'extractor_key': ie_result['extractor_key'],
1789             }
1790
1791             if self._match_entry(entry, incomplete=True) is not None:
1792                 continue
1793
1794             entry_result = self.__process_iterable_entry(entry, download, extra)
1795             if not entry_result:
1796                 failures += 1
1797             if failures >= max_failures:
1798                 self.report_error(
1799                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1800                 break
1801             playlist_results.append(entry_result)
1802         ie_result['entries'] = playlist_results
1803
1804         # Write the updated info to json
1805         if _infojson_written and self._write_info_json(
1806                 'updated playlist', ie_result,
1807                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1808             return
1809
1810         ie_result = self.run_all_pps('playlist', ie_result)
1811         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1812         return ie_result
1813
1814     @__handle_extraction_exceptions
1815     def __process_iterable_entry(self, entry, download, extra_info):
1816         return self.process_ie_result(
1817             entry, download=download, extra_info=extra_info)
1818
1819     def _build_format_filter(self, filter_spec):
1820         " Returns a function to filter the formats according to the filter_spec "
1821
1822         OPERATORS = {
1823             '<': operator.lt,
1824             '<=': operator.le,
1825             '>': operator.gt,
1826             '>=': operator.ge,
1827             '=': operator.eq,
1828             '!=': operator.ne,
1829         }
1830         operator_rex = re.compile(r'''(?x)\s*
1831             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1832             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1833             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1834             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1835         m = operator_rex.fullmatch(filter_spec)
1836         if m:
1837             try:
1838                 comparison_value = int(m.group('value'))
1839             except ValueError:
1840                 comparison_value = parse_filesize(m.group('value'))
1841                 if comparison_value is None:
1842                     comparison_value = parse_filesize(m.group('value') + 'B')
1843                 if comparison_value is None:
1844                     raise ValueError(
1845                         'Invalid value %r in format specification %r' % (
1846                             m.group('value'), filter_spec))
1847             op = OPERATORS[m.group('op')]
1848
1849         if not m:
1850             STR_OPERATORS = {
1851                 '=': operator.eq,
1852                 '^=': lambda attr, value: attr.startswith(value),
1853                 '$=': lambda attr, value: attr.endswith(value),
1854                 '*=': lambda attr, value: value in attr,
1855                 '~=': lambda attr, value: value.search(attr) is not None
1856             }
1857             str_operator_rex = re.compile(r'''(?x)\s*
1858                 (?P<key>[a-zA-Z0-9._-]+)\s*
1859                 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1860                 (?P<quote>["'])?
1861                 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1862                 (?(quote)(?P=quote))\s*
1863                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1864             m = str_operator_rex.fullmatch(filter_spec)
1865             if m:
1866                 if m.group('op') == '~=':
1867                     comparison_value = re.compile(m.group('value'))
1868                 else:
1869                     comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1870                 str_op = STR_OPERATORS[m.group('op')]
1871                 if m.group('negation'):
1872                     op = lambda attr, value: not str_op(attr, value)
1873                 else:
1874                     op = str_op
1875
1876         if not m:
1877             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1878
1879         def _filter(f):
1880             actual_value = f.get(m.group('key'))
1881             if actual_value is None:
1882                 return m.group('none_inclusive')
1883             return op(actual_value, comparison_value)
1884         return _filter
1885
1886     def _check_formats(self, formats):
1887         for f in formats:
1888             self.to_screen('[info] Testing format %s' % f['format_id'])
1889             path = self.get_output_path('temp')
1890             if not self._ensure_dir_exists(f'{path}/'):
1891                 continue
1892             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1893             temp_file.close()
1894             try:
1895                 success, _ = self.dl(temp_file.name, f, test=True)
1896             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1897                 success = False
1898             finally:
1899                 if os.path.exists(temp_file.name):
1900                     try:
1901                         os.remove(temp_file.name)
1902                     except OSError:
1903                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1904             if success:
1905                 yield f
1906             else:
1907                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1908
1909     def _default_format_spec(self, info_dict, download=True):
1910
1911         def can_merge():
1912             merger = FFmpegMergerPP(self)
1913             return merger.available and merger.can_merge()
1914
1915         prefer_best = (
1916             not self.params.get('simulate')
1917             and download
1918             and (
1919                 not can_merge()
1920                 or info_dict.get('is_live', False)
1921                 or self.outtmpl_dict['default'] == '-'))
1922         compat = (
1923             prefer_best
1924             or self.params.get('allow_multiple_audio_streams', False)
1925             or 'format-spec' in self.params.get('compat_opts', []))
1926
1927         return (
1928             'best/bestvideo+bestaudio' if prefer_best
1929             else 'bestvideo*+bestaudio/best' if not compat
1930             else 'bestvideo+bestaudio/best')
1931
1932     def build_format_selector(self, format_spec):
1933         def syntax_error(note, start):
1934             message = (
1935                 'Invalid format specification: '
1936                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1937             return SyntaxError(message)
1938
1939         PICKFIRST = 'PICKFIRST'
1940         MERGE = 'MERGE'
1941         SINGLE = 'SINGLE'
1942         GROUP = 'GROUP'
1943         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1944
1945         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1946                                   'video': self.params.get('allow_multiple_video_streams', False)}
1947
1948         check_formats = self.params.get('check_formats') == 'selected'
1949
1950         def _parse_filter(tokens):
1951             filter_parts = []
1952             for type, string, start, _, _ in tokens:
1953                 if type == tokenize.OP and string == ']':
1954                     return ''.join(filter_parts)
1955                 else:
1956                     filter_parts.append(string)
1957
1958         def _remove_unused_ops(tokens):
1959             # Remove operators that we don't use and join them with the surrounding strings
1960             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1961             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1962             last_string, last_start, last_end, last_line = None, None, None, None
1963             for type, string, start, end, line in tokens:
1964                 if type == tokenize.OP and string == '[':
1965                     if last_string:
1966                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1967                         last_string = None
1968                     yield type, string, start, end, line
1969                     # everything inside brackets will be handled by _parse_filter
1970                     for type, string, start, end, line in tokens:
1971                         yield type, string, start, end, line
1972                         if type == tokenize.OP and string == ']':
1973                             break
1974                 elif type == tokenize.OP and string in ALLOWED_OPS:
1975                     if last_string:
1976                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1977                         last_string = None
1978                     yield type, string, start, end, line
1979                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1980                     if not last_string:
1981                         last_string = string
1982                         last_start = start
1983                         last_end = end
1984                     else:
1985                         last_string += string
1986             if last_string:
1987                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1988
1989         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1990             selectors = []
1991             current_selector = None
1992             for type, string, start, _, _ in tokens:
1993                 # ENCODING is only defined in python 3.x
1994                 if type == getattr(tokenize, 'ENCODING', None):
1995                     continue
1996                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1997                     current_selector = FormatSelector(SINGLE, string, [])
1998                 elif type == tokenize.OP:
1999                     if string == ')':
2000                         if not inside_group:
2001                             # ')' will be handled by the parentheses group
2002                             tokens.restore_last_token()
2003                         break
2004                     elif inside_merge and string in ['/', ',']:
2005                         tokens.restore_last_token()
2006                         break
2007                     elif inside_choice and string == ',':
2008                         tokens.restore_last_token()
2009                         break
2010                     elif string == ',':
2011                         if not current_selector:
2012                             raise syntax_error('"," must follow a format selector', start)
2013                         selectors.append(current_selector)
2014                         current_selector = None
2015                     elif string == '/':
2016                         if not current_selector:
2017                             raise syntax_error('"/" must follow a format selector', start)
2018                         first_choice = current_selector
2019                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2020                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2021                     elif string == '[':
2022                         if not current_selector:
2023                             current_selector = FormatSelector(SINGLE, 'best', [])
2024                         format_filter = _parse_filter(tokens)
2025                         current_selector.filters.append(format_filter)
2026                     elif string == '(':
2027                         if current_selector:
2028                             raise syntax_error('Unexpected "("', start)
2029                         group = _parse_format_selection(tokens, inside_group=True)
2030                         current_selector = FormatSelector(GROUP, group, [])
2031                     elif string == '+':
2032                         if not current_selector:
2033                             raise syntax_error('Unexpected "+"', start)
2034                         selector_1 = current_selector
2035                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2036                         if not selector_2:
2037                             raise syntax_error('Expected a selector', start)
2038                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2039                     else:
2040                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2041                 elif type == tokenize.ENDMARKER:
2042                     break
2043             if current_selector:
2044                 selectors.append(current_selector)
2045             return selectors
2046
2047         def _merge(formats_pair):
2048             format_1, format_2 = formats_pair
2049
2050             formats_info = []
2051             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2052             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2053
2054             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2055                 get_no_more = {'video': False, 'audio': False}
2056                 for (i, fmt_info) in enumerate(formats_info):
2057                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2058                         formats_info.pop(i)
2059                         continue
2060                     for aud_vid in ['audio', 'video']:
2061                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2062                             if get_no_more[aud_vid]:
2063                                 formats_info.pop(i)
2064                                 break
2065                             get_no_more[aud_vid] = True
2066
2067             if len(formats_info) == 1:
2068                 return formats_info[0]
2069
2070             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2071             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2072
2073             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2074             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2075
2076             output_ext = self.params.get('merge_output_format')
2077             if not output_ext:
2078                 if the_only_video:
2079                     output_ext = the_only_video['ext']
2080                 elif the_only_audio and not video_fmts:
2081                     output_ext = the_only_audio['ext']
2082                 else:
2083                     output_ext = 'mkv'
2084
2085             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2086
2087             new_dict = {
2088                 'requested_formats': formats_info,
2089                 'format': '+'.join(filtered('format')),
2090                 'format_id': '+'.join(filtered('format_id')),
2091                 'ext': output_ext,
2092                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2093                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2094                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2095                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2096                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2097             }
2098
2099             if the_only_video:
2100                 new_dict.update({
2101                     'width': the_only_video.get('width'),
2102                     'height': the_only_video.get('height'),
2103                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2104                     'fps': the_only_video.get('fps'),
2105                     'dynamic_range': the_only_video.get('dynamic_range'),
2106                     'vcodec': the_only_video.get('vcodec'),
2107                     'vbr': the_only_video.get('vbr'),
2108                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2109                 })
2110
2111             if the_only_audio:
2112                 new_dict.update({
2113                     'acodec': the_only_audio.get('acodec'),
2114                     'abr': the_only_audio.get('abr'),
2115                     'asr': the_only_audio.get('asr'),
2116                 })
2117
2118             return new_dict
2119
2120         def _check_formats(formats):
2121             if not check_formats:
2122                 yield from formats
2123                 return
2124             yield from self._check_formats(formats)
2125
2126         def _build_selector_function(selector):
2127             if isinstance(selector, list):  # ,
2128                 fs = [_build_selector_function(s) for s in selector]
2129
2130                 def selector_function(ctx):
2131                     for f in fs:
2132                         yield from f(ctx)
2133                 return selector_function
2134
2135             elif selector.type == GROUP:  # ()
2136                 selector_function = _build_selector_function(selector.selector)
2137
2138             elif selector.type == PICKFIRST:  # /
2139                 fs = [_build_selector_function(s) for s in selector.selector]
2140
2141                 def selector_function(ctx):
2142                     for f in fs:
2143                         picked_formats = list(f(ctx))
2144                         if picked_formats:
2145                             return picked_formats
2146                     return []
2147
2148             elif selector.type == MERGE:  # +
2149                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2150
2151                 def selector_function(ctx):
2152                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2153                         yield _merge(pair)
2154
2155             elif selector.type == SINGLE:  # atom
2156                 format_spec = selector.selector or 'best'
2157
2158                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2159                 if format_spec == 'all':
2160                     def selector_function(ctx):
2161                         yield from _check_formats(ctx['formats'][::-1])
2162                 elif format_spec == 'mergeall':
2163                     def selector_function(ctx):
2164                         formats = list(_check_formats(ctx['formats']))
2165                         if not formats:
2166                             return
2167                         merged_format = formats[-1]
2168                         for f in formats[-2::-1]:
2169                             merged_format = _merge((merged_format, f))
2170                         yield merged_format
2171
2172                 else:
2173                     format_fallback, format_reverse, format_idx = False, True, 1
2174                     mobj = re.match(
2175                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2176                         format_spec)
2177                     if mobj is not None:
2178                         format_idx = int_or_none(mobj.group('n'), default=1)
2179                         format_reverse = mobj.group('bw')[0] == 'b'
2180                         format_type = (mobj.group('type') or [None])[0]
2181                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2182                         format_modified = mobj.group('mod') is not None
2183
2184                         format_fallback = not format_type and not format_modified  # for b, w
2185                         _filter_f = (
2186                             (lambda f: f.get('%scodec' % format_type) != 'none')
2187                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2188                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2189                             if format_type  # bv, ba, wv, wa
2190                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2191                             if not format_modified  # b, w
2192                             else lambda f: True)  # b*, w*
2193                         filter_f = lambda f: _filter_f(f) and (
2194                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2195                     else:
2196                         if format_spec in self._format_selection_exts['audio']:
2197                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2198                         elif format_spec in self._format_selection_exts['video']:
2199                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2200                         elif format_spec in self._format_selection_exts['storyboards']:
2201                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2202                         else:
2203                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2204
2205                     def selector_function(ctx):
2206                         formats = list(ctx['formats'])
2207                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2208                         if format_fallback and ctx['incomplete_formats'] and not matches:
2209                             # for extractors with incomplete formats (audio only (soundcloud)
2210                             # or video only (imgur)) best/worst will fallback to
2211                             # best/worst {video,audio}-only format
2212                             matches = formats
2213                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2214                         try:
2215                             yield matches[format_idx - 1]
2216                         except IndexError:
2217                             return
2218
2219             filters = [self._build_format_filter(f) for f in selector.filters]
2220
2221             def final_selector(ctx):
2222                 ctx_copy = dict(ctx)
2223                 for _filter in filters:
2224                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2225                 return selector_function(ctx_copy)
2226             return final_selector
2227
2228         stream = io.BytesIO(format_spec.encode('utf-8'))
2229         try:
2230             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2231         except tokenize.TokenError:
2232             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2233
2234         class TokenIterator(object):
2235             def __init__(self, tokens):
2236                 self.tokens = tokens
2237                 self.counter = 0
2238
2239             def __iter__(self):
2240                 return self
2241
2242             def __next__(self):
2243                 if self.counter >= len(self.tokens):
2244                     raise StopIteration()
2245                 value = self.tokens[self.counter]
2246                 self.counter += 1
2247                 return value
2248
2249             next = __next__
2250
2251             def restore_last_token(self):
2252                 self.counter -= 1
2253
2254         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2255         return _build_selector_function(parsed_selector)
2256
2257     def _calc_headers(self, info_dict):
2258         res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2259
2260         cookies = self._calc_cookies(info_dict)
2261         if cookies:
2262             res['Cookie'] = cookies
2263
2264         if 'X-Forwarded-For' not in res:
2265             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2266             if x_forwarded_for_ip:
2267                 res['X-Forwarded-For'] = x_forwarded_for_ip
2268
2269         return res
2270
2271     def _calc_cookies(self, info_dict):
2272         pr = sanitized_Request(info_dict['url'])
2273         self.cookiejar.add_cookie_header(pr)
2274         return pr.get_header('Cookie')
2275
2276     def _sort_thumbnails(self, thumbnails):
2277         thumbnails.sort(key=lambda t: (
2278             t.get('preference') if t.get('preference') is not None else -1,
2279             t.get('width') if t.get('width') is not None else -1,
2280             t.get('height') if t.get('height') is not None else -1,
2281             t.get('id') if t.get('id') is not None else '',
2282             t.get('url')))
2283
2284     def _sanitize_thumbnails(self, info_dict):
2285         thumbnails = info_dict.get('thumbnails')
2286         if thumbnails is None:
2287             thumbnail = info_dict.get('thumbnail')
2288             if thumbnail:
2289                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2290         if not thumbnails:
2291             return
2292
2293         def check_thumbnails(thumbnails):
2294             for t in thumbnails:
2295                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2296                 try:
2297                     self.urlopen(HEADRequest(t['url']))
2298                 except network_exceptions as err:
2299                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2300                     continue
2301                 yield t
2302
2303         self._sort_thumbnails(thumbnails)
2304         for i, t in enumerate(thumbnails):
2305             if t.get('id') is None:
2306                 t['id'] = '%d' % i
2307             if t.get('width') and t.get('height'):
2308                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2309             t['url'] = sanitize_url(t['url'])
2310
2311         if self.params.get('check_formats') is True:
2312             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2313         else:
2314             info_dict['thumbnails'] = thumbnails
2315
2316     def _fill_common_fields(self, info_dict, is_video=True):
2317         # TODO: move sanitization here
2318         if is_video:
2319             # playlists are allowed to lack "title"
2320             info_dict['fulltitle'] = info_dict.get('title')
2321             if 'title' not in info_dict:
2322                 raise ExtractorError('Missing "title" field in extractor result',
2323                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2324             elif not info_dict.get('title'):
2325                 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2326                 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2327
2328         if info_dict.get('duration') is not None:
2329             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2330
2331         for ts_key, date_key in (
2332                 ('timestamp', 'upload_date'),
2333                 ('release_timestamp', 'release_date'),
2334                 ('modified_timestamp', 'modified_date'),
2335         ):
2336             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2337                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2338                 # see http://bugs.python.org/issue1646728)
2339                 try:
2340                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2341                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2342                 except (ValueError, OverflowError, OSError):
2343                     pass
2344
2345         live_keys = ('is_live', 'was_live')
2346         live_status = info_dict.get('live_status')
2347         if live_status is None:
2348             for key in live_keys:
2349                 if info_dict.get(key) is False:
2350                     continue
2351                 if info_dict.get(key):
2352                     live_status = key
2353                 break
2354             if all(info_dict.get(key) is False for key in live_keys):
2355                 live_status = 'not_live'
2356         if live_status:
2357             info_dict['live_status'] = live_status
2358             for key in live_keys:
2359                 if info_dict.get(key) is None:
2360                     info_dict[key] = (live_status == key)
2361
2362         # Auto generate title fields corresponding to the *_number fields when missing
2363         # in order to always have clean titles. This is very common for TV series.
2364         for field in ('chapter', 'season', 'episode'):
2365             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2366                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2367
2368     def process_video_result(self, info_dict, download=True):
2369         assert info_dict.get('_type', 'video') == 'video'
2370         self._num_videos += 1
2371
2372         if 'id' not in info_dict:
2373             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2374         elif not info_dict.get('id'):
2375             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2376
2377         def report_force_conversion(field, field_not, conversion):
2378             self.report_warning(
2379                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2380                 % (field, field_not, conversion))
2381
2382         def sanitize_string_field(info, string_field):
2383             field = info.get(string_field)
2384             if field is None or isinstance(field, compat_str):
2385                 return
2386             report_force_conversion(string_field, 'a string', 'string')
2387             info[string_field] = compat_str(field)
2388
2389         def sanitize_numeric_fields(info):
2390             for numeric_field in self._NUMERIC_FIELDS:
2391                 field = info.get(numeric_field)
2392                 if field is None or isinstance(field, compat_numeric_types):
2393                     continue
2394                 report_force_conversion(numeric_field, 'numeric', 'int')
2395                 info[numeric_field] = int_or_none(field)
2396
2397         sanitize_string_field(info_dict, 'id')
2398         sanitize_numeric_fields(info_dict)
2399         if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2400             self.report_warning('"duration" field is negative, there is an error in extractor')
2401
2402         if 'playlist' not in info_dict:
2403             # It isn't part of a playlist
2404             info_dict['playlist'] = None
2405             info_dict['playlist_index'] = None
2406
2407         self._sanitize_thumbnails(info_dict)
2408
2409         thumbnail = info_dict.get('thumbnail')
2410         thumbnails = info_dict.get('thumbnails')
2411         if thumbnail:
2412             info_dict['thumbnail'] = sanitize_url(thumbnail)
2413         elif thumbnails:
2414             info_dict['thumbnail'] = thumbnails[-1]['url']
2415
2416         if info_dict.get('display_id') is None and 'id' in info_dict:
2417             info_dict['display_id'] = info_dict['id']
2418
2419         self._fill_common_fields(info_dict)
2420
2421         for cc_kind in ('subtitles', 'automatic_captions'):
2422             cc = info_dict.get(cc_kind)
2423             if cc:
2424                 for _, subtitle in cc.items():
2425                     for subtitle_format in subtitle:
2426                         if subtitle_format.get('url'):
2427                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2428                         if subtitle_format.get('ext') is None:
2429                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2430
2431         automatic_captions = info_dict.get('automatic_captions')
2432         subtitles = info_dict.get('subtitles')
2433
2434         info_dict['requested_subtitles'] = self.process_subtitles(
2435             info_dict['id'], subtitles, automatic_captions)
2436
2437         if info_dict.get('formats') is None:
2438             # There's only one format available
2439             formats = [info_dict]
2440         else:
2441             formats = info_dict['formats']
2442
2443         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2444         if not self.params.get('allow_unplayable_formats'):
2445             formats = [f for f in formats if not f.get('has_drm')]
2446
2447         get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2448         if not get_from_start:
2449             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2450         if info_dict.get('is_live') and formats:
2451             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2452             if get_from_start and not formats:
2453                 self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2454                                                      'If you want to download from the current time, pass --no-live-from-start')
2455
2456         if not formats:
2457             self.raise_no_formats(info_dict)
2458
2459         def is_wellformed(f):
2460             url = f.get('url')
2461             if not url:
2462                 self.report_warning(
2463                     '"url" field is missing or empty - skipping format, '
2464                     'there is an error in extractor')
2465                 return False
2466             if isinstance(url, bytes):
2467                 sanitize_string_field(f, 'url')
2468             return True
2469
2470         # Filter out malformed formats for better extraction robustness
2471         formats = list(filter(is_wellformed, formats))
2472
2473         formats_dict = {}
2474
2475         # We check that all the formats have the format and format_id fields
2476         for i, format in enumerate(formats):
2477             sanitize_string_field(format, 'format_id')
2478             sanitize_numeric_fields(format)
2479             format['url'] = sanitize_url(format['url'])
2480             if not format.get('format_id'):
2481                 format['format_id'] = compat_str(i)
2482             else:
2483                 # Sanitize format_id from characters used in format selector expression
2484                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2485             format_id = format['format_id']
2486             if format_id not in formats_dict:
2487                 formats_dict[format_id] = []
2488             formats_dict[format_id].append(format)
2489
2490         # Make sure all formats have unique format_id
2491         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2492         for format_id, ambiguous_formats in formats_dict.items():
2493             ambigious_id = len(ambiguous_formats) > 1
2494             for i, format in enumerate(ambiguous_formats):
2495                 if ambigious_id:
2496                     format['format_id'] = '%s-%d' % (format_id, i)
2497                 if format.get('ext') is None:
2498                     format['ext'] = determine_ext(format['url']).lower()
2499                 # Ensure there is no conflict between id and ext in format selection
2500                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2501                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2502                     format['format_id'] = 'f%s' % format['format_id']
2503
2504         for i, format in enumerate(formats):
2505             if format.get('format') is None:
2506                 format['format'] = '{id} - {res}{note}'.format(
2507                     id=format['format_id'],
2508                     res=self.format_resolution(format),
2509                     note=format_field(format, 'format_note', ' (%s)'),
2510                 )
2511             if format.get('protocol') is None:
2512                 format['protocol'] = determine_protocol(format)
2513             if format.get('resolution') is None:
2514                 format['resolution'] = self.format_resolution(format, default=None)
2515             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2516                 format['dynamic_range'] = 'SDR'
2517             if (info_dict.get('duration') and format.get('tbr')
2518                     and not format.get('filesize') and not format.get('filesize_approx')):
2519                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2520
2521             # Add HTTP headers, so that external programs can use them from the
2522             # json output
2523             full_format_info = info_dict.copy()
2524             full_format_info.update(format)
2525             format['http_headers'] = self._calc_headers(full_format_info)
2526         # Remove private housekeeping stuff
2527         if '__x_forwarded_for_ip' in info_dict:
2528             del info_dict['__x_forwarded_for_ip']
2529
2530         if self.params.get('check_formats') is True:
2531             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2532
2533         if not formats or formats[0] is not info_dict:
2534             # only set the 'formats' fields if the original info_dict list them
2535             # otherwise we end up with a circular reference, the first (and unique)
2536             # element in the 'formats' field in info_dict is info_dict itself,
2537             # which can't be exported to json
2538             info_dict['formats'] = formats
2539
2540         info_dict, _ = self.pre_process(info_dict)
2541
2542         if self._match_entry(info_dict) is not None:
2543             return info_dict
2544
2545         self.post_extract(info_dict)
2546         info_dict, _ = self.pre_process(info_dict, 'after_filter')
2547
2548         # The pre-processors may have modified the formats
2549         formats = info_dict.get('formats', [info_dict])
2550
2551         list_only = self.params.get('simulate') is None and (
2552             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2553         interactive_format_selection = not list_only and self.format_selector == '-'
2554         if self.params.get('list_thumbnails'):
2555             self.list_thumbnails(info_dict)
2556         if self.params.get('listsubtitles'):
2557             if 'automatic_captions' in info_dict:
2558                 self.list_subtitles(
2559                     info_dict['id'], automatic_captions, 'automatic captions')
2560             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2561         if self.params.get('listformats') or interactive_format_selection:
2562             self.list_formats(info_dict)
2563         if list_only:
2564             # Without this printing, -F --print-json will not work
2565             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2566             return
2567
2568         format_selector = self.format_selector
2569         if format_selector is None:
2570             req_format = self._default_format_spec(info_dict, download=download)
2571             self.write_debug('Default format spec: %s' % req_format)
2572             format_selector = self.build_format_selector(req_format)
2573
2574         while True:
2575             if interactive_format_selection:
2576                 req_format = input(
2577                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2578                 try:
2579                     format_selector = self.build_format_selector(req_format)
2580                 except SyntaxError as err:
2581                     self.report_error(err, tb=False, is_error=False)
2582                     continue
2583
2584             # While in format selection we may need to have an access to the original
2585             # format set in order to calculate some metrics or do some processing.
2586             # For now we need to be able to guess whether original formats provided
2587             # by extractor are incomplete or not (i.e. whether extractor provides only
2588             # video-only or audio-only formats) for proper formats selection for
2589             # extractors with such incomplete formats (see
2590             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2591             # Since formats may be filtered during format selection and may not match
2592             # the original formats the results may be incorrect. Thus original formats
2593             # or pre-calculated metrics should be passed to format selection routines
2594             # as well.
2595             # We will pass a context object containing all necessary additional data
2596             # instead of just formats.
2597             # This fixes incorrect format selection issue (see
2598             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2599             incomplete_formats = (
2600                 # All formats are video-only or
2601                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2602                 # all formats are audio-only
2603                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2604
2605             ctx = {
2606                 'formats': formats,
2607                 'incomplete_formats': incomplete_formats,
2608             }
2609
2610             formats_to_download = list(format_selector(ctx))
2611             if interactive_format_selection and not formats_to_download:
2612                 self.report_error('Requested format is not available', tb=False, is_error=False)
2613                 continue
2614             break
2615
2616         if not formats_to_download:
2617             if not self.params.get('ignore_no_formats_error'):
2618                 raise ExtractorError('Requested format is not available', expected=True,
2619                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2620             self.report_warning('Requested format is not available')
2621             # Process what we can, even without any available formats.
2622             formats_to_download = [{}]
2623
2624         best_format = formats_to_download[-1]
2625         if download:
2626             if best_format:
2627                 self.to_screen(
2628                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2629                     + ', '.join([f['format_id'] for f in formats_to_download]))
2630             max_downloads_reached = False
2631             for i, fmt in enumerate(formats_to_download):
2632                 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
2633                 new_info.update(fmt)
2634                 try:
2635                     self.process_info(new_info)
2636                 except MaxDownloadsReached:
2637                     max_downloads_reached = True
2638                 # Remove copied info
2639                 for key, val in tuple(new_info.items()):
2640                     if info_dict.get(key) == val:
2641                         new_info.pop(key)
2642                 if max_downloads_reached:
2643                     break
2644
2645             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2646             assert write_archive.issubset({True, False, 'ignore'})
2647             if True in write_archive and False not in write_archive:
2648                 self.record_download_archive(info_dict)
2649
2650             info_dict['requested_downloads'] = formats_to_download
2651             info_dict = self.run_all_pps('after_video', info_dict)
2652             if max_downloads_reached:
2653                 raise MaxDownloadsReached()
2654
2655         # We update the info dict with the selected best quality format (backwards compatibility)
2656         info_dict.update(best_format)
2657         return info_dict
2658
2659     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2660         """Select the requested subtitles and their format"""
2661         available_subs = {}
2662         if normal_subtitles and self.params.get('writesubtitles'):
2663             available_subs.update(normal_subtitles)
2664         if automatic_captions and self.params.get('writeautomaticsub'):
2665             for lang, cap_info in automatic_captions.items():
2666                 if lang not in available_subs:
2667                     available_subs[lang] = cap_info
2668
2669         if (not self.params.get('writesubtitles') and not
2670                 self.params.get('writeautomaticsub') or not
2671                 available_subs):
2672             return None
2673
2674         all_sub_langs = available_subs.keys()
2675         if self.params.get('allsubtitles', False):
2676             requested_langs = all_sub_langs
2677         elif self.params.get('subtitleslangs', False):
2678             # A list is used so that the order of languages will be the same as
2679             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2680             requested_langs = []
2681             for lang_re in self.params.get('subtitleslangs'):
2682                 discard = lang_re[0] == '-'
2683                 if discard:
2684                     lang_re = lang_re[1:]
2685                 if lang_re == 'all':
2686                     if discard:
2687                         requested_langs = []
2688                     else:
2689                         requested_langs.extend(all_sub_langs)
2690                     continue
2691                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2692                 if discard:
2693                     for lang in current_langs:
2694                         while lang in requested_langs:
2695                             requested_langs.remove(lang)
2696                 else:
2697                     requested_langs.extend(current_langs)
2698             requested_langs = orderedSet(requested_langs)
2699         elif 'en' in available_subs:
2700             requested_langs = ['en']
2701         else:
2702             requested_langs = [list(all_sub_langs)[0]]
2703         if requested_langs:
2704             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2705
2706         formats_query = self.params.get('subtitlesformat', 'best')
2707         formats_preference = formats_query.split('/') if formats_query else []
2708         subs = {}
2709         for lang in requested_langs:
2710             formats = available_subs.get(lang)
2711             if formats is None:
2712                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2713                 continue
2714             for ext in formats_preference:
2715                 if ext == 'best':
2716                     f = formats[-1]
2717                     break
2718                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2719                 if matches:
2720                     f = matches[-1]
2721                     break
2722             else:
2723                 f = formats[-1]
2724                 self.report_warning(
2725                     'No subtitle format found matching "%s" for language %s, '
2726                     'using %s' % (formats_query, lang, f['ext']))
2727             subs[lang] = f
2728         return subs
2729
2730     def _forceprint(self, key, info_dict):
2731         if info_dict is None:
2732             return
2733         info_copy = info_dict.copy()
2734         info_copy['formats_table'] = self.render_formats_table(info_dict)
2735         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2736         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2737         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2738
2739         def format_tmpl(tmpl):
2740             mobj = re.match(r'\w+(=?)$', tmpl)
2741             if mobj and mobj.group(1):
2742                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2743             elif mobj:
2744                 return f'%({tmpl})s'
2745             return tmpl
2746
2747         for tmpl in self.params['forceprint'].get(key, []):
2748             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2749
2750         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2751             filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2752             tmpl = format_tmpl(tmpl)
2753             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2754             if self._ensure_dir_exists(filename):
2755                 with io.open(filename, 'a', encoding='utf-8') as f:
2756                     f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2757
2758     def __forced_printings(self, info_dict, filename, incomplete):
2759         def print_mandatory(field, actual_field=None):
2760             if actual_field is None:
2761                 actual_field = field
2762             if (self.params.get('force%s' % field, False)
2763                     and (not incomplete or info_dict.get(actual_field) is not None)):
2764                 self.to_stdout(info_dict[actual_field])
2765
2766         def print_optional(field):
2767             if (self.params.get('force%s' % field, False)
2768                     and info_dict.get(field) is not None):
2769                 self.to_stdout(info_dict[field])
2770
2771         info_dict = info_dict.copy()
2772         if filename is not None:
2773             info_dict['filename'] = filename
2774         if info_dict.get('requested_formats') is not None:
2775             # For RTMP URLs, also include the playpath
2776             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2777         elif 'url' in info_dict:
2778             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2779
2780         if (self.params.get('forcejson')
2781                 or self.params['forceprint'].get('video')
2782                 or self.params['print_to_file'].get('video')):
2783             self.post_extract(info_dict)
2784         self._forceprint('video', info_dict)
2785
2786         print_mandatory('title')
2787         print_mandatory('id')
2788         print_mandatory('url', 'urls')
2789         print_optional('thumbnail')
2790         print_optional('description')
2791         print_optional('filename')
2792         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2793             self.to_stdout(formatSeconds(info_dict['duration']))
2794         print_mandatory('format')
2795
2796         if self.params.get('forcejson'):
2797             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2798
2799     def dl(self, name, info, subtitle=False, test=False):
2800         if not info.get('url'):
2801             self.raise_no_formats(info, True)
2802
2803         if test:
2804             verbose = self.params.get('verbose')
2805             params = {
2806                 'test': True,
2807                 'quiet': self.params.get('quiet') or not verbose,
2808                 'verbose': verbose,
2809                 'noprogress': not verbose,
2810                 'nopart': True,
2811                 'skip_unavailable_fragments': False,
2812                 'keep_fragments': False,
2813                 'overwrites': True,
2814                 '_no_ytdl_file': True,
2815             }
2816         else:
2817             params = self.params
2818         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2819         if not test:
2820             for ph in self._progress_hooks:
2821                 fd.add_progress_hook(ph)
2822             urls = '", "'.join(
2823                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2824                 for f in info.get('requested_formats', []) or [info])
2825             self.write_debug('Invoking downloader on "%s"' % urls)
2826
2827         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2828         # But it may contain objects that are not deep-copyable
2829         new_info = self._copy_infodict(info)
2830         if new_info.get('http_headers') is None:
2831             new_info['http_headers'] = self._calc_headers(new_info)
2832         return fd.download(name, new_info, subtitle)
2833
2834     def existing_file(self, filepaths, *, default_overwrite=True):
2835         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2836         if existing_files and not self.params.get('overwrites', default_overwrite):
2837             return existing_files[0]
2838
2839         for file in existing_files:
2840             self.report_file_delete(file)
2841             os.remove(file)
2842         return None
2843
2844     def process_info(self, info_dict):
2845         """Process a single resolved IE result. (Modifies it in-place)"""
2846
2847         assert info_dict.get('_type', 'video') == 'video'
2848         original_infodict = info_dict
2849
2850         if 'format' not in info_dict and 'ext' in info_dict:
2851             info_dict['format'] = info_dict['ext']
2852
2853         # This is mostly just for backward compatibility of process_info
2854         # As a side-effect, this allows for format-specific filters
2855         if self._match_entry(info_dict) is not None:
2856             info_dict['__write_download_archive'] = 'ignore'
2857             return
2858
2859         # Does nothing under normal operation - for backward compatibility of process_info
2860         self.post_extract(info_dict)
2861
2862         # info_dict['_filename'] needs to be set for backward compatibility
2863         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2864         temp_filename = self.prepare_filename(info_dict, 'temp')
2865         files_to_move = {}
2866
2867         self._num_downloads += 1
2868
2869         # Forced printings
2870         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2871
2872         if self.params.get('simulate'):
2873             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2874             return
2875
2876         if full_filename is None:
2877             return
2878         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2879             return
2880         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2881             return
2882
2883         if self._write_description('video', info_dict,
2884                                    self.prepare_filename(info_dict, 'description')) is None:
2885             return
2886
2887         sub_files = self._write_subtitles(info_dict, temp_filename)
2888         if sub_files is None:
2889             return
2890         files_to_move.update(dict(sub_files))
2891
2892         thumb_files = self._write_thumbnails(
2893             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2894         if thumb_files is None:
2895             return
2896         files_to_move.update(dict(thumb_files))
2897
2898         infofn = self.prepare_filename(info_dict, 'infojson')
2899         _infojson_written = self._write_info_json('video', info_dict, infofn)
2900         if _infojson_written:
2901             info_dict['infojson_filename'] = infofn
2902             # For backward compatibility, even though it was a private field
2903             info_dict['__infojson_filename'] = infofn
2904         elif _infojson_written is None:
2905             return
2906
2907         # Note: Annotations are deprecated
2908         annofn = None
2909         if self.params.get('writeannotations', False):
2910             annofn = self.prepare_filename(info_dict, 'annotation')
2911         if annofn:
2912             if not self._ensure_dir_exists(encodeFilename(annofn)):
2913                 return
2914             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2915                 self.to_screen('[info] Video annotations are already present')
2916             elif not info_dict.get('annotations'):
2917                 self.report_warning('There are no annotations to write.')
2918             else:
2919                 try:
2920                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2921                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2922                         annofile.write(info_dict['annotations'])
2923                 except (KeyError, TypeError):
2924                     self.report_warning('There are no annotations to write.')
2925                 except (OSError, IOError):
2926                     self.report_error('Cannot write annotations file: ' + annofn)
2927                     return
2928
2929         # Write internet shortcut files
2930         def _write_link_file(link_type):
2931             url = try_get(info_dict['webpage_url'], iri_to_uri)
2932             if not url:
2933                 self.report_warning(
2934                     f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2935                 return True
2936             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2937             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2938                 return False
2939             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2940                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2941                 return True
2942             try:
2943                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2944                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2945                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2946                     template_vars = {'url': url}
2947                     if link_type == 'desktop':
2948                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2949                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2950             except (OSError, IOError):
2951                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2952                 return False
2953             return True
2954
2955         write_links = {
2956             'url': self.params.get('writeurllink'),
2957             'webloc': self.params.get('writewebloclink'),
2958             'desktop': self.params.get('writedesktoplink'),
2959         }
2960         if self.params.get('writelink'):
2961             link_type = ('webloc' if sys.platform == 'darwin'
2962                          else 'desktop' if sys.platform.startswith('linux')
2963                          else 'url')
2964             write_links[link_type] = True
2965
2966         if any(should_write and not _write_link_file(link_type)
2967                for link_type, should_write in write_links.items()):
2968             return
2969
2970         def replace_info_dict(new_info):
2971             nonlocal info_dict
2972             if new_info == info_dict:
2973                 return
2974             info_dict.clear()
2975             info_dict.update(new_info)
2976
2977         try:
2978             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2979             replace_info_dict(new_info)
2980         except PostProcessingError as err:
2981             self.report_error('Preprocessing: %s' % str(err))
2982             return
2983
2984         if self.params.get('skip_download'):
2985             info_dict['filepath'] = temp_filename
2986             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2987             info_dict['__files_to_move'] = files_to_move
2988             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2989             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2990         else:
2991             # Download
2992             info_dict.setdefault('__postprocessors', [])
2993             try:
2994
2995                 def existing_video_file(*filepaths):
2996                     ext = info_dict.get('ext')
2997                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2998                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2999                                               default_overwrite=False)
3000                     if file:
3001                         info_dict['ext'] = os.path.splitext(file)[1][1:]
3002                     return file
3003
3004                 success = True
3005                 if info_dict.get('requested_formats') is not None:
3006
3007                     def compatible_formats(formats):
3008                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3009                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
3010                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
3011                         if len(video_formats) > 2 or len(audio_formats) > 2:
3012                             return False
3013
3014                         # Check extension
3015                         exts = set(format.get('ext') for format in formats)
3016                         COMPATIBLE_EXTS = (
3017                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
3018                             set(('webm',)),
3019                         )
3020                         for ext_sets in COMPATIBLE_EXTS:
3021                             if ext_sets.issuperset(exts):
3022                                 return True
3023                         # TODO: Check acodec/vcodec
3024                         return False
3025
3026                     requested_formats = info_dict['requested_formats']
3027                     old_ext = info_dict['ext']
3028                     if self.params.get('merge_output_format') is None:
3029                         if not compatible_formats(requested_formats):
3030                             info_dict['ext'] = 'mkv'
3031                             self.report_warning(
3032                                 'Requested formats are incompatible for merge and will be merged into mkv')
3033                         if (info_dict['ext'] == 'webm'
3034                                 and info_dict.get('thumbnails')
3035                                 # check with type instead of pp_key, __name__, or isinstance
3036                                 # since we dont want any custom PPs to trigger this
3037                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3038                             info_dict['ext'] = 'mkv'
3039                             self.report_warning(
3040                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3041                     new_ext = info_dict['ext']
3042
3043                     def correct_ext(filename, ext=new_ext):
3044                         if filename == '-':
3045                             return filename
3046                         filename_real_ext = os.path.splitext(filename)[1][1:]
3047                         filename_wo_ext = (
3048                             os.path.splitext(filename)[0]
3049                             if filename_real_ext in (old_ext, new_ext)
3050                             else filename)
3051                         return '%s.%s' % (filename_wo_ext, ext)
3052
3053                     # Ensure filename always has a correct extension for successful merge
3054                     full_filename = correct_ext(full_filename)
3055                     temp_filename = correct_ext(temp_filename)
3056                     dl_filename = existing_video_file(full_filename, temp_filename)
3057                     info_dict['__real_download'] = False
3058
3059                     downloaded = []
3060                     merger = FFmpegMergerPP(self)
3061
3062                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3063                     if dl_filename is not None:
3064                         self.report_file_already_downloaded(dl_filename)
3065                     elif fd:
3066                         for f in requested_formats if fd != FFmpegFD else []:
3067                             f['filepath'] = fname = prepend_extension(
3068                                 correct_ext(temp_filename, info_dict['ext']),
3069                                 'f%s' % f['format_id'], info_dict['ext'])
3070                             downloaded.append(fname)
3071                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3072                         success, real_download = self.dl(temp_filename, info_dict)
3073                         info_dict['__real_download'] = real_download
3074                     else:
3075                         if self.params.get('allow_unplayable_formats'):
3076                             self.report_warning(
3077                                 'You have requested merging of multiple formats '
3078                                 'while also allowing unplayable formats to be downloaded. '
3079                                 'The formats won\'t be merged to prevent data corruption.')
3080                         elif not merger.available:
3081                             msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3082                             if not self.params.get('ignoreerrors'):
3083                                 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3084                                 return
3085                             self.report_warning(f'{msg}. The formats won\'t be merged')
3086
3087                         if temp_filename == '-':
3088                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3089                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3090                                       else 'but ffmpeg is not installed')
3091                             self.report_warning(
3092                                 f'You have requested downloading multiple formats to stdout {reason}. '
3093                                 'The formats will be streamed one after the other')
3094                             fname = temp_filename
3095                         for f in requested_formats:
3096                             new_info = dict(info_dict)
3097                             del new_info['requested_formats']
3098                             new_info.update(f)
3099                             if temp_filename != '-':
3100                                 fname = prepend_extension(
3101                                     correct_ext(temp_filename, new_info['ext']),
3102                                     'f%s' % f['format_id'], new_info['ext'])
3103                                 if not self._ensure_dir_exists(fname):
3104                                     return
3105                                 f['filepath'] = fname
3106                                 downloaded.append(fname)
3107                             partial_success, real_download = self.dl(fname, new_info)
3108                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3109                             success = success and partial_success
3110
3111                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3112                         info_dict['__postprocessors'].append(merger)
3113                         info_dict['__files_to_merge'] = downloaded
3114                         # Even if there were no downloads, it is being merged only now
3115                         info_dict['__real_download'] = True
3116                     else:
3117                         for file in downloaded:
3118                             files_to_move[file] = None
3119                 else:
3120                     # Just a single file
3121                     dl_filename = existing_video_file(full_filename, temp_filename)
3122                     if dl_filename is None or dl_filename == temp_filename:
3123                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3124                         # So we should try to resume the download
3125                         success, real_download = self.dl(temp_filename, info_dict)
3126                         info_dict['__real_download'] = real_download
3127                     else:
3128                         self.report_file_already_downloaded(dl_filename)
3129
3130                 dl_filename = dl_filename or temp_filename
3131                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3132
3133             except network_exceptions as err:
3134                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3135                 return
3136             except (OSError, IOError) as err:
3137                 raise UnavailableVideoError(err)
3138             except (ContentTooShortError, ) as err:
3139                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3140                 return
3141
3142             if success and full_filename != '-':
3143
3144                 def fixup():
3145                     do_fixup = True
3146                     fixup_policy = self.params.get('fixup')
3147                     vid = info_dict['id']
3148
3149                     if fixup_policy in ('ignore', 'never'):
3150                         return
3151                     elif fixup_policy == 'warn':
3152                         do_fixup = False
3153                     elif fixup_policy != 'force':
3154                         assert fixup_policy in ('detect_or_warn', None)
3155                         if not info_dict.get('__real_download'):
3156                             do_fixup = False
3157
3158                     def ffmpeg_fixup(cndn, msg, cls):
3159                         if not cndn:
3160                             return
3161                         if not do_fixup:
3162                             self.report_warning(f'{vid}: {msg}')
3163                             return
3164                         pp = cls(self)
3165                         if pp.available:
3166                             info_dict['__postprocessors'].append(pp)
3167                         else:
3168                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3169
3170                     stretched_ratio = info_dict.get('stretched_ratio')
3171                     ffmpeg_fixup(
3172                         stretched_ratio not in (1, None),
3173                         f'Non-uniform pixel ratio {stretched_ratio}',
3174                         FFmpegFixupStretchedPP)
3175
3176                     ffmpeg_fixup(
3177                         (info_dict.get('requested_formats') is None
3178                          and info_dict.get('container') == 'm4a_dash'
3179                          and info_dict.get('ext') == 'm4a'),
3180                         'writing DASH m4a. Only some players support this container',
3181                         FFmpegFixupM4aPP)
3182
3183                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3184                     downloader = downloader.__name__ if downloader else None
3185
3186                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3187                         ffmpeg_fixup(downloader == 'HlsFD',
3188                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3189                                      FFmpegFixupM3u8PP)
3190                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3191                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3192
3193                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3194                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3195
3196                 fixup()
3197                 try:
3198                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3199                 except PostProcessingError as err:
3200                     self.report_error('Postprocessing: %s' % str(err))
3201                     return
3202                 try:
3203                     for ph in self._post_hooks:
3204                         ph(info_dict['filepath'])
3205                 except Exception as err:
3206                     self.report_error('post hooks: %s' % str(err))
3207                     return
3208                 info_dict['__write_download_archive'] = True
3209
3210         if self.params.get('force_write_download_archive'):
3211             info_dict['__write_download_archive'] = True
3212
3213         # Make sure the info_dict was modified in-place
3214         assert info_dict is original_infodict
3215
3216         max_downloads = self.params.get('max_downloads')
3217         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3218             raise MaxDownloadsReached()
3219
3220     def __download_wrapper(self, func):
3221         @functools.wraps(func)
3222         def wrapper(*args, **kwargs):
3223             try:
3224                 res = func(*args, **kwargs)
3225             except UnavailableVideoError as e:
3226                 self.report_error(e)
3227             except MaxDownloadsReached as e:
3228                 self.to_screen(f'[info] {e}')
3229                 raise
3230             except DownloadCancelled as e:
3231                 self.to_screen(f'[info] {e}')
3232                 if not self.params.get('break_per_url'):
3233                     raise
3234             else:
3235                 if self.params.get('dump_single_json', False):
3236                     self.post_extract(res)
3237                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3238         return wrapper
3239
3240     def download(self, url_list):
3241         """Download a given list of URLs."""
3242         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3243         outtmpl = self.outtmpl_dict['default']
3244         if (len(url_list) > 1
3245                 and outtmpl != '-'
3246                 and '%' not in outtmpl
3247                 and self.params.get('max_downloads') != 1):
3248             raise SameFileError(outtmpl)
3249
3250         for url in url_list:
3251             self.__download_wrapper(self.extract_info)(
3252                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3253
3254         return self._download_retcode
3255
3256     def download_with_info_file(self, info_filename):
3257         with contextlib.closing(fileinput.FileInput(
3258                 [info_filename], mode='r',
3259                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3260             # FileInput doesn't have a read method, we can't call json.load
3261             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3262         try:
3263             self.__download_wrapper(self.process_ie_result)(info, download=True)
3264         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3265             if not isinstance(e, EntryNotInPlaylist):
3266                 self.to_stderr('\r')
3267             webpage_url = info.get('webpage_url')
3268             if webpage_url is not None:
3269                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3270                 return self.download([webpage_url])
3271             else:
3272                 raise
3273         return self._download_retcode
3274
3275     @staticmethod
3276     def sanitize_info(info_dict, remove_private_keys=False):
3277         ''' Sanitize the infodict for converting to json '''
3278         if info_dict is None:
3279             return info_dict
3280         info_dict.setdefault('epoch', int(time.time()))
3281         info_dict.setdefault('_type', 'video')
3282
3283         if remove_private_keys:
3284             reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
3285                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3286                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3287             }
3288         else:
3289             reject = lambda k, v: False
3290
3291         def filter_fn(obj):
3292             if isinstance(obj, dict):
3293                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3294             elif isinstance(obj, (list, tuple, set, LazyList)):
3295                 return list(map(filter_fn, obj))
3296             elif obj is None or isinstance(obj, (str, int, float, bool)):
3297                 return obj
3298             else:
3299                 return repr(obj)
3300
3301         return filter_fn(info_dict)
3302
3303     @staticmethod
3304     def filter_requested_info(info_dict, actually_filter=True):
3305         ''' Alias of sanitize_info for backward compatibility '''
3306         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3307
3308     @staticmethod
3309     def post_extract(info_dict):
3310         def actual_post_extract(info_dict):
3311             if info_dict.get('_type') in ('playlist', 'multi_video'):
3312                 for video_dict in info_dict.get('entries', {}):
3313                     actual_post_extract(video_dict or {})
3314                 return
3315
3316             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3317             info_dict.update(post_extractor())
3318
3319         actual_post_extract(info_dict or {})
3320
3321     def run_pp(self, pp, infodict):
3322         files_to_delete = []
3323         if '__files_to_move' not in infodict:
3324             infodict['__files_to_move'] = {}
3325         try:
3326             files_to_delete, infodict = pp.run(infodict)
3327         except PostProcessingError as e:
3328             # Must be True and not 'only_download'
3329             if self.params.get('ignoreerrors') is True:
3330                 self.report_error(e)
3331                 return infodict
3332             raise
3333
3334         if not files_to_delete:
3335             return infodict
3336         if self.params.get('keepvideo', False):
3337             for f in files_to_delete:
3338                 infodict['__files_to_move'].setdefault(f, '')
3339         else:
3340             for old_filename in set(files_to_delete):
3341                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3342                 try:
3343                     os.remove(encodeFilename(old_filename))
3344                 except (IOError, OSError):
3345                     self.report_warning('Unable to remove downloaded original file')
3346                 if old_filename in infodict['__files_to_move']:
3347                     del infodict['__files_to_move'][old_filename]
3348         return infodict
3349
3350     def run_all_pps(self, key, info, *, additional_pps=None):
3351         self._forceprint(key, info)
3352         for pp in (additional_pps or []) + self._pps[key]:
3353             info = self.run_pp(pp, info)
3354         return info
3355
3356     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3357         info = dict(ie_info)
3358         info['__files_to_move'] = files_to_move or {}
3359         info = self.run_all_pps(key, info)
3360         return info, info.pop('__files_to_move', None)
3361
3362     def post_process(self, filename, info, files_to_move=None):
3363         """Run all the postprocessors on the given file."""
3364         info['filepath'] = filename
3365         info['__files_to_move'] = files_to_move or {}
3366         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3367         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3368         del info['__files_to_move']
3369         return self.run_all_pps('after_move', info)
3370
3371     def _make_archive_id(self, info_dict):
3372         video_id = info_dict.get('id')
3373         if not video_id:
3374             return
3375         # Future-proof against any change in case
3376         # and backwards compatibility with prior versions
3377         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3378         if extractor is None:
3379             url = str_or_none(info_dict.get('url'))
3380             if not url:
3381                 return
3382             # Try to find matching extractor for the URL and take its ie_key
3383             for ie_key, ie in self._ies.items():
3384                 if ie.suitable(url):
3385                     extractor = ie_key
3386                     break
3387             else:
3388                 return
3389         return '%s %s' % (extractor.lower(), video_id)
3390
3391     def in_download_archive(self, info_dict):
3392         fn = self.params.get('download_archive')
3393         if fn is None:
3394             return False
3395
3396         vid_id = self._make_archive_id(info_dict)
3397         if not vid_id:
3398             return False  # Incomplete video information
3399
3400         return vid_id in self.archive
3401
3402     def record_download_archive(self, info_dict):
3403         fn = self.params.get('download_archive')
3404         if fn is None:
3405             return
3406         vid_id = self._make_archive_id(info_dict)
3407         assert vid_id
3408         self.write_debug(f'Adding to archive: {vid_id}')
3409         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3410             archive_file.write(vid_id + '\n')
3411         self.archive.add(vid_id)
3412
3413     @staticmethod
3414     def format_resolution(format, default='unknown'):
3415         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3416             return 'audio only'
3417         if format.get('resolution') is not None:
3418             return format['resolution']
3419         if format.get('width') and format.get('height'):
3420             return '%dx%d' % (format['width'], format['height'])
3421         elif format.get('height'):
3422             return '%sp' % format['height']
3423         elif format.get('width'):
3424             return '%dx?' % format['width']
3425         return default
3426
3427     def _list_format_headers(self, *headers):
3428         if self.params.get('listformats_table', True) is not False:
3429             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3430         return headers
3431
3432     def _format_note(self, fdict):
3433         res = ''
3434         if fdict.get('ext') in ['f4f', 'f4m']:
3435             res += '(unsupported)'
3436         if fdict.get('language'):
3437             if res:
3438                 res += ' '
3439             res += '[%s]' % fdict['language']
3440         if fdict.get('format_note') is not None:
3441             if res:
3442                 res += ' '
3443             res += fdict['format_note']
3444         if fdict.get('tbr') is not None:
3445             if res:
3446                 res += ', '
3447             res += '%4dk' % fdict['tbr']
3448         if fdict.get('container') is not None:
3449             if res:
3450                 res += ', '
3451             res += '%s container' % fdict['container']
3452         if (fdict.get('vcodec') is not None
3453                 and fdict.get('vcodec') != 'none'):
3454             if res:
3455                 res += ', '
3456             res += fdict['vcodec']
3457             if fdict.get('vbr') is not None:
3458                 res += '@'
3459         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3460             res += 'video@'
3461         if fdict.get('vbr') is not None:
3462             res += '%4dk' % fdict['vbr']
3463         if fdict.get('fps') is not None:
3464             if res:
3465                 res += ', '
3466             res += '%sfps' % fdict['fps']
3467         if fdict.get('acodec') is not None:
3468             if res:
3469                 res += ', '
3470             if fdict['acodec'] == 'none':
3471                 res += 'video only'
3472             else:
3473                 res += '%-5s' % fdict['acodec']
3474         elif fdict.get('abr') is not None:
3475             if res:
3476                 res += ', '
3477             res += 'audio'
3478         if fdict.get('abr') is not None:
3479             res += '@%3dk' % fdict['abr']
3480         if fdict.get('asr') is not None:
3481             res += ' (%5dHz)' % fdict['asr']
3482         if fdict.get('filesize') is not None:
3483             if res:
3484                 res += ', '
3485             res += format_bytes(fdict['filesize'])
3486         elif fdict.get('filesize_approx') is not None:
3487             if res:
3488                 res += ', '
3489             res += '~' + format_bytes(fdict['filesize_approx'])
3490         return res
3491
3492     def render_formats_table(self, info_dict):
3493         if not info_dict.get('formats') and not info_dict.get('url'):
3494             return None
3495
3496         formats = info_dict.get('formats', [info_dict])
3497         if not self.params.get('listformats_table', True) is not False:
3498             table = [
3499                 [
3500                     format_field(f, 'format_id'),
3501                     format_field(f, 'ext'),
3502                     self.format_resolution(f),
3503                     self._format_note(f)
3504                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3505             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3506
3507         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3508         table = [
3509             [
3510                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3511                 format_field(f, 'ext'),
3512                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3513                 format_field(f, 'fps', '\t%d'),
3514                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3515                 delim,
3516                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3517                 format_field(f, 'tbr', '\t%dk'),
3518                 shorten_protocol_name(f.get('protocol', '')),
3519                 delim,
3520                 format_field(f, 'vcodec', default='unknown').replace(
3521                     'none', 'images' if f.get('acodec') == 'none'
3522                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3523                 format_field(f, 'vbr', '\t%dk'),
3524                 format_field(f, 'acodec', default='unknown').replace(
3525                     'none', '' if f.get('vcodec') == 'none'
3526                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3527                 format_field(f, 'abr', '\t%dk'),
3528                 format_field(f, 'asr', '\t%dHz'),
3529                 join_nonempty(
3530                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3531                     format_field(f, 'language', '[%s]'),
3532                     join_nonempty(format_field(f, 'format_note'),
3533                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3534                                   delim=', '),
3535                     delim=' '),
3536             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3537         header_line = self._list_format_headers(
3538             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3539             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3540
3541         return render_table(
3542             header_line, table, hide_empty=True,
3543             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3544
3545     def render_thumbnails_table(self, info_dict):
3546         thumbnails = list(info_dict.get('thumbnails') or [])
3547         if not thumbnails:
3548             return None
3549         return render_table(
3550             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3551             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3552
3553     def render_subtitles_table(self, video_id, subtitles):
3554         def _row(lang, formats):
3555             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3556             if len(set(names)) == 1:
3557                 names = [] if names[0] == 'unknown' else names[:1]
3558             return [lang, ', '.join(names), ', '.join(exts)]
3559
3560         if not subtitles:
3561             return None
3562         return render_table(
3563             self._list_format_headers('Language', 'Name', 'Formats'),
3564             [_row(lang, formats) for lang, formats in subtitles.items()],
3565             hide_empty=True)
3566
3567     def __list_table(self, video_id, name, func, *args):
3568         table = func(*args)
3569         if not table:
3570             self.to_screen(f'{video_id} has no {name}')
3571             return
3572         self.to_screen(f'[info] Available {name} for {video_id}:')
3573         self.to_stdout(table)
3574
3575     def list_formats(self, info_dict):
3576         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3577
3578     def list_thumbnails(self, info_dict):
3579         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3580
3581     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3582         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3583
3584     def urlopen(self, req):
3585         """ Start an HTTP download """
3586         if isinstance(req, compat_basestring):
3587             req = sanitized_Request(req)
3588         return self._opener.open(req, timeout=self._socket_timeout)
3589
3590     def print_debug_header(self):
3591         if not self.params.get('verbose'):
3592             return
3593
3594         def get_encoding(stream):
3595             ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3596             if not supports_terminal_sequences(stream):
3597                 from .compat import WINDOWS_VT_MODE
3598                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3599             return ret
3600
3601         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3602             locale.getpreferredencoding(),
3603             sys.getfilesystemencoding(),
3604             get_encoding(self._screen_file), get_encoding(self._err_file),
3605             self.get_encoding())
3606
3607         logger = self.params.get('logger')
3608         if logger:
3609             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3610             write_debug(encoding_str)
3611         else:
3612             write_string(f'[debug] {encoding_str}\n', encoding=None)
3613             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3614
3615         source = detect_variant()
3616         write_debug(join_nonempty(
3617             'yt-dlp version', __version__,
3618             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3619             '' if source == 'unknown' else f'({source})',
3620             delim=' '))
3621         if not _LAZY_LOADER:
3622             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3623                 write_debug('Lazy loading extractors is forcibly disabled')
3624             else:
3625                 write_debug('Lazy loading extractors is disabled')
3626         if plugin_extractors or plugin_postprocessors:
3627             write_debug('Plugins: %s' % [
3628                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3629                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3630         if self.params.get('compat_opts'):
3631             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3632
3633         if source == 'source':
3634             try:
3635                 sp = Popen(
3636                     ['git', 'rev-parse', '--short', 'HEAD'],
3637                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3638                     cwd=os.path.dirname(os.path.abspath(__file__)))
3639                 out, err = sp.communicate_or_kill()
3640                 out = out.decode().strip()
3641                 if re.match('[0-9a-f]+', out):
3642                     write_debug('Git HEAD: %s' % out)
3643             except Exception:
3644                 try:
3645                     sys.exc_clear()
3646                 except Exception:
3647                     pass
3648
3649         def python_implementation():
3650             impl_name = platform.python_implementation()
3651             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3652                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3653             return impl_name
3654
3655         write_debug('Python version %s (%s %s) - %s' % (
3656             platform.python_version(),
3657             python_implementation(),
3658             platform.architecture()[0],
3659             platform_name()))
3660
3661         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3662         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3663         if ffmpeg_features:
3664             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3665
3666         exe_versions['rtmpdump'] = rtmpdump_version()
3667         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3668         exe_str = ', '.join(
3669             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3670         ) or 'none'
3671         write_debug('exe versions: %s' % exe_str)
3672
3673         from .downloader.websocket import has_websockets
3674         from .postprocessor.embedthumbnail import has_mutagen
3675         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3676
3677         lib_str = join_nonempty(
3678             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3679             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3680             has_mutagen and 'mutagen',
3681             SQLITE_AVAILABLE and 'sqlite',
3682             has_websockets and 'websockets',
3683             delim=', ') or 'none'
3684         write_debug('Optional libraries: %s' % lib_str)
3685
3686         proxy_map = {}
3687         for handler in self._opener.handlers:
3688             if hasattr(handler, 'proxies'):
3689                 proxy_map.update(handler.proxies)
3690         write_debug(f'Proxy map: {proxy_map}')
3691
3692         # Not implemented
3693         if False and self.params.get('call_home'):
3694             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3695             write_debug('Public IP address: %s' % ipaddr)
3696             latest_version = self.urlopen(
3697                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3698             if version_tuple(latest_version) > version_tuple(__version__):
3699                 self.report_warning(
3700                     'You are using an outdated version (newest version: %s)! '
3701                     'See https://yt-dl.org/update if you need help updating.' %
3702                     latest_version)
3703
3704     def _setup_opener(self):
3705         timeout_val = self.params.get('socket_timeout')
3706         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3707
3708         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3709         opts_cookiefile = self.params.get('cookiefile')
3710         opts_proxy = self.params.get('proxy')
3711
3712         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3713
3714         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3715         if opts_proxy is not None:
3716             if opts_proxy == '':
3717                 proxies = {}
3718             else:
3719                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3720         else:
3721             proxies = compat_urllib_request.getproxies()
3722             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3723             if 'http' in proxies and 'https' not in proxies:
3724                 proxies['https'] = proxies['http']
3725         proxy_handler = PerRequestProxyHandler(proxies)
3726
3727         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3728         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3729         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3730         redirect_handler = YoutubeDLRedirectHandler()
3731         data_handler = compat_urllib_request_DataHandler()
3732
3733         # When passing our own FileHandler instance, build_opener won't add the
3734         # default FileHandler and allows us to disable the file protocol, which
3735         # can be used for malicious purposes (see
3736         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3737         file_handler = compat_urllib_request.FileHandler()
3738
3739         def file_open(*args, **kwargs):
3740             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3741         file_handler.file_open = file_open
3742
3743         opener = compat_urllib_request.build_opener(
3744             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3745
3746         # Delete the default user-agent header, which would otherwise apply in
3747         # cases where our custom HTTP handler doesn't come into play
3748         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3749         opener.addheaders = []
3750         self._opener = opener
3751
3752     def encode(self, s):
3753         if isinstance(s, bytes):
3754             return s  # Already encoded
3755
3756         try:
3757             return s.encode(self.get_encoding())
3758         except UnicodeEncodeError as err:
3759             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3760             raise
3761
3762     def get_encoding(self):
3763         encoding = self.params.get('encoding')
3764         if encoding is None:
3765             encoding = preferredencoding()
3766         return encoding
3767
3768     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3769         ''' Write infojson and returns True = written, False = skip, None = error '''
3770         if overwrite is None:
3771             overwrite = self.params.get('overwrites', True)
3772         if not self.params.get('writeinfojson'):
3773             return False
3774         elif not infofn:
3775             self.write_debug(f'Skipping writing {label} infojson')
3776             return False
3777         elif not self._ensure_dir_exists(infofn):
3778             return None
3779         elif not overwrite and os.path.exists(infofn):
3780             self.to_screen(f'[info] {label.title()} metadata is already present')
3781         else:
3782             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3783             try:
3784                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3785             except (OSError, IOError):
3786                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3787                 return None
3788         return True
3789
3790     def _write_description(self, label, ie_result, descfn):
3791         ''' Write description and returns True = written, False = skip, None = error '''
3792         if not self.params.get('writedescription'):
3793             return False
3794         elif not descfn:
3795             self.write_debug(f'Skipping writing {label} description')
3796             return False
3797         elif not self._ensure_dir_exists(descfn):
3798             return None
3799         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3800             self.to_screen(f'[info] {label.title()} description is already present')
3801         elif ie_result.get('description') is None:
3802             self.report_warning(f'There\'s no {label} description to write')
3803             return False
3804         else:
3805             try:
3806                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3807                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3808                     descfile.write(ie_result['description'])
3809             except (OSError, IOError):
3810                 self.report_error(f'Cannot write {label} description file {descfn}')
3811                 return None
3812         return True
3813
3814     def _write_subtitles(self, info_dict, filename):
3815         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3816         ret = []
3817         subtitles = info_dict.get('requested_subtitles')
3818         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3819             # subtitles download errors are already managed as troubles in relevant IE
3820             # that way it will silently go on when used with unsupporting IE
3821             return ret
3822
3823         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3824         if not sub_filename_base:
3825             self.to_screen('[info] Skipping writing video subtitles')
3826             return ret
3827         for sub_lang, sub_info in subtitles.items():
3828             sub_format = sub_info['ext']
3829             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3830             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3831             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3832             if existing_sub:
3833                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3834                 sub_info['filepath'] = existing_sub
3835                 ret.append((existing_sub, sub_filename_final))
3836                 continue
3837
3838             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3839             if sub_info.get('data') is not None:
3840                 try:
3841                     # Use newline='' to prevent conversion of newline characters
3842                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3843                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3844                         subfile.write(sub_info['data'])
3845                     sub_info['filepath'] = sub_filename
3846                     ret.append((sub_filename, sub_filename_final))
3847                     continue
3848                 except (OSError, IOError):
3849                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3850                     return None
3851
3852             try:
3853                 sub_copy = sub_info.copy()
3854                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3855                 self.dl(sub_filename, sub_copy, subtitle=True)
3856                 sub_info['filepath'] = sub_filename
3857                 ret.append((sub_filename, sub_filename_final))
3858             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3859                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3860                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3861                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3862         return ret
3863
3864     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3865         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3866         write_all = self.params.get('write_all_thumbnails', False)
3867         thumbnails, ret = [], []
3868         if write_all or self.params.get('writethumbnail', False):
3869             thumbnails = info_dict.get('thumbnails') or []
3870         multiple = write_all and len(thumbnails) > 1
3871
3872         if thumb_filename_base is None:
3873             thumb_filename_base = filename
3874         if thumbnails and not thumb_filename_base:
3875             self.write_debug(f'Skipping writing {label} thumbnail')
3876             return ret
3877
3878         for idx, t in list(enumerate(thumbnails))[::-1]:
3879             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3880             thumb_display_id = f'{label} thumbnail {t["id"]}'
3881             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3882             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3883
3884             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3885             if existing_thumb:
3886                 self.to_screen('[info] %s is already present' % (
3887                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3888                 t['filepath'] = existing_thumb
3889                 ret.append((existing_thumb, thumb_filename_final))
3890             else:
3891                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3892                 try:
3893                     uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3894                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3895                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3896                         shutil.copyfileobj(uf, thumbf)
3897                     ret.append((thumb_filename, thumb_filename_final))
3898                     t['filepath'] = thumb_filename
3899                 except network_exceptions as err:
3900                     thumbnails.pop(idx)
3901                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3902             if ret and not write_all:
3903                 break
3904         return ret