yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     InAdvancePagedList,
  76     int_or_none,
  77     iri_to_uri,
  78     ISO3166Utils,
  79     join_nonempty,
  80     LazyList,
  81     LINK_TEMPLATES,
  82     locked_file,
  83     make_dir,
  84     make_HTTPS_handler,
  85     MaxDownloadsReached,
  86     network_exceptions,
  87     number_of_digits,
  88     orderedSet,
  89     OUTTMPL_TYPES,
  90     PagedList,
  91     parse_filesize,
  92     PerRequestProxyHandler,
  93     platform_name,
  94     Popen,
  95     POSTPROCESS_WHEN,
  96     PostProcessingError,
  97     preferredencoding,
  98     prepend_extension,
  99     ReExtractInfo,
 100     register_socks_protocols,
 101     RejectedVideoReached,
 102     remove_terminal_sequences,
 103     render_table,
 104     replace_extension,
 105     SameFileError,
 106     sanitize_filename,
 107     sanitize_path,
 108     sanitize_url,
 109     sanitized_Request,
 110     std_headers,
 111     STR_FORMAT_RE_TMPL,
 112     STR_FORMAT_TYPES,
 113     str_or_none,
 114     strftime_or_none,
 115     subtitles_filename,
 116     supports_terminal_sequences,
 117     timetuple_from_msec,
 118     to_high_limit_path,
 119     traverse_obj,
 120     try_get,
 121     UnavailableVideoError,
 122     url_basename,
 123     variadic,
 124     version_tuple,
 125     write_json_file,
 126     write_string,
 127     YoutubeDLCookieProcessor,
 128     YoutubeDLHandler,
 129     YoutubeDLRedirectHandler,
 130 )
 131 from .cache import Cache
 132 from .minicurses import format_text
 133 from .extractor import (
 134     gen_extractor_classes,
 135     get_info_extractor,
 136     _LAZY_LOADER,
 137     _PLUGIN_CLASSES as plugin_extractors
 138 )
 139 from .extractor.openload import PhantomJSwrapper
 140 from .downloader import (
 141     FFmpegFD,
 142     get_suitable_downloader,
 143     shorten_protocol_name
 144 )
 145 from .downloader.rtmp import rtmpdump_version
 146 from .postprocessor import (
 147     get_postprocessor,
 148     EmbedThumbnailPP,
 149     FFmpegFixupDuplicateMoovPP,
 150     FFmpegFixupDurationPP,
 151     FFmpegFixupM3u8PP,
 152     FFmpegFixupM4aPP,
 153     FFmpegFixupStretchedPP,
 154     FFmpegFixupTimestampPP,
 155     FFmpegMergerPP,
 156     FFmpegPostProcessor,
 157     MoveFilesAfterDownloadPP,
 158     _PLUGIN_CLASSES as plugin_postprocessors
 159 )
 160 from .update import detect_variant
 161 from .version import __version__, RELEASE_GIT_HEAD
 162
 163 if compat_os_name == 'nt':
 164     import ctypes
 165
 166
 167 class YoutubeDL(object):
 168     """YoutubeDL class.
 169
 170     YoutubeDL objects are the ones responsible of downloading the
 171     actual video file and writing it to disk if the user has requested
 172     it, among some other tasks. In most cases there should be one per
 173     program. As, given a video URL, the downloader doesn't know how to
 174     extract all the needed information, task that InfoExtractors do, it
 175     has to pass the URL to one of them.
 176
 177     For this, YoutubeDL objects have a method that allows
 178     InfoExtractors to be registered in a given order. When it is passed
 179     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 180     finds that reports being able to handle it. The InfoExtractor extracts
 181     all the information about the video or videos the URL refers to, and
 182     YoutubeDL process the extracted information, possibly using a File
 183     Downloader to download the video.
 184
 185     YoutubeDL objects accept a lot of parameters. In order not to saturate
 186     the object constructor with arguments, it receives a dictionary of
 187     options instead. These options are available through the params
 188     attribute for the InfoExtractors to use. The YoutubeDL also
 189     registers itself as the downloader in charge for the InfoExtractors
 190     that are added to it, so this is a "mutual registration".
 191
 192     Available options:
 193
 194     username:          Username for authentication purposes.
 195     password:          Password for authentication purposes.
 196     videopassword:     Password for accessing a video.
 197     ap_mso:            Adobe Pass multiple-system operator identifier.
 198     ap_username:       Multiple-system operator account username.
 199     ap_password:       Multiple-system operator account password.
 200     usenetrc:          Use netrc for authentication instead.
 201     verbose:           Print additional info to stdout.
 202     quiet:             Do not print messages to stdout.
 203     no_warnings:       Do not print out anything for warnings.
 204     forceprint:        A dict with keys video/playlist mapped to
 205                        a list of templates to force print to stdout
 206                        For compatibility, a single list is also accepted
 207     forceurl:          Force printing final URL. (Deprecated)
 208     forcetitle:        Force printing title. (Deprecated)
 209     forceid:           Force printing ID. (Deprecated)
 210     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 211     forcedescription:  Force printing description. (Deprecated)
 212     forcefilename:     Force printing final filename. (Deprecated)
 213     forceduration:     Force printing duration. (Deprecated)
 214     forcejson:         Force printing info_dict as JSON.
 215     dump_single_json:  Force printing the info_dict of the whole playlist
 216                        (or video) as a single JSON line.
 217     force_write_download_archive: Force writing download archive regardless
 218                        of 'skip_download' or 'simulate'.
 219     simulate:          Do not download the video files. If unset (or None),
 220                        simulate only if listsubtitles, listformats or list_thumbnails is used
 221     format:            Video format code. see "FORMAT SELECTION" for more details.
 222                        You can also pass a function. The function takes 'ctx' as
 223                        argument and returns the formats to download.
 224                        See "build_format_selector" for an implementation
 225     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 226     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 227                        extracting metadata even if the video is not actually
 228                        available for download (experimental)
 229     format_sort:       A list of fields by which to sort the video formats.
 230                        See "Sorting Formats" for more details.
 231     format_sort_force: Force the given format_sort. see "Sorting Formats"
 232                        for more details.
 233     allow_multiple_video_streams:   Allow multiple video streams to be merged
 234                        into a single file
 235     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 236                        into a single file
 237     check_formats      Whether to test if the formats are downloadable.
 238                        Can be True (check all), False (check none),
 239                        'selected' (check selected formats),
 240                        or None (check only if requested by extractor)
 241     paths:             Dictionary of output paths. The allowed keys are 'home'
 242                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 243     outtmpl:           Dictionary of templates for output names. Allowed keys
 244                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 245                        For compatibility with youtube-dl, a single string can also be used
 246     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 247     restrictfilenames: Do not allow "&" and spaces in file names
 248     trim_file_name:    Limit length of filename (extension excluded)
 249     windowsfilenames:  Force the filenames to be windows compatible
 250     ignoreerrors:      Do not stop on download/postprocessing errors.
 251                        Can be 'only_download' to ignore only download errors.
 252                        Default is 'only_download' for CLI, but False for API
 253     skip_playlist_after_errors: Number of allowed failures until the rest of
 254                        the playlist is skipped
 255     force_generic_extractor: Force downloader to use the generic extractor
 256     overwrites:        Overwrite all video and metadata files if True,
 257                        overwrite only non-video files if None
 258                        and don't overwrite any file if False
 259                        For compatibility with youtube-dl,
 260                        "nooverwrites" may also be used instead
 261     playliststart:     Playlist item to start at.
 262     playlistend:       Playlist item to end at.
 263     playlist_items:    Specific indices of playlist to download.
 264     playlistreverse:   Download playlist items in reverse order.
 265     playlistrandom:    Download playlist items in random order.
 266     matchtitle:        Download only matching titles.
 267     rejecttitle:       Reject downloads for matching titles.
 268     logger:            Log messages to a logging.Logger instance.
 269     logtostderr:       Log messages to stderr instead of stdout.
 270     consoletitle:       Display progress in console window's titlebar.
 271     writedescription:  Write the video description to a .description file
 272     writeinfojson:     Write the video description to a .info.json file
 273     clean_infojson:    Remove private fields from the infojson
 274     getcomments:       Extract video comments. This will not be written to disk
 275                        unless writeinfojson is also given
 276     writeannotations:  Write the video annotations to a .annotations.xml file
 277     writethumbnail:    Write the thumbnail image to a file
 278     allow_playlist_files: Whether to write playlists' description, infojson etc
 279                        also to disk when using the 'write*' options
 280     write_all_thumbnails:  Write all thumbnail formats to files
 281     writelink:         Write an internet shortcut file, depending on the
 282                        current platform (.url/.webloc/.desktop)
 283     writeurllink:      Write a Windows internet shortcut file (.url)
 284     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 285     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 286     writesubtitles:    Write the video subtitles to a file
 287     writeautomaticsub: Write the automatically generated subtitles to a file
 288     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 289                        Downloads all the subtitles of the video
 290                        (requires writesubtitles or writeautomaticsub)
 291     listsubtitles:     Lists all available subtitles for the video
 292     subtitlesformat:   The format code for subtitles
 293     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 294                        The list may contain "all" to refer to all the available
 295                        subtitles. The language can be prefixed with a "-" to
 296                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 297     keepvideo:         Keep the video file after post-processing
 298     daterange:         A DateRange object, download only if the upload_date is in the range.
 299     skip_download:     Skip the actual download of the video file
 300     cachedir:          Location of the cache files in the filesystem.
 301                        False to disable filesystem cache.
 302     noplaylist:        Download single video instead of a playlist if in doubt.
 303     age_limit:         An integer representing the user's age in years.
 304                        Unsuitable videos for the given age are skipped.
 305     min_views:         An integer representing the minimum view count the video
 306                        must have in order to not be skipped.
 307                        Videos without view count information are always
 308                        downloaded. None for no limit.
 309     max_views:         An integer representing the maximum view count.
 310                        Videos that are more popular than that are not
 311                        downloaded.
 312                        Videos without view count information are always
 313                        downloaded. None for no limit.
 314     download_archive:  File name of a file where all downloads are recorded.
 315                        Videos already present in the file are not downloaded
 316                        again.
 317     break_on_existing: Stop the download process after attempting to download a
 318                        file that is in the archive.
 319     break_on_reject:   Stop the download process when encountering a video that
 320                        has been filtered out.
 321     break_per_url:     Whether break_on_reject and break_on_existing
 322                        should act on each input URL as opposed to for the entire queue
 323     cookiefile:        File name where cookies should be read from and dumped to
 324     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 325                        name/pathfrom where cookies are loaded, and the name of the
 326                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 327     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 328                        support RFC 5746 secure renegotiation
 329     nocheckcertificate:  Do not verify SSL certificates
 330     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 331                        At the moment, this is only supported by YouTube.
 332     proxy:             URL of the proxy server to use
 333     geo_verification_proxy:  URL of the proxy to use for IP address verification
 334                        on geo-restricted sites.
 335     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 336     bidi_workaround:   Work around buggy terminals without bidirectional text
 337                        support, using fridibi
 338     debug_printtraffic:Print out sent and received HTTP traffic
 339     include_ads:       Download ads as well (deprecated)
 340     default_search:    Prepend this string if an input url is not valid.
 341                        'auto' for elaborate guessing
 342     encoding:          Use this encoding instead of the system-specified.
 343     extract_flat:      Do not resolve URLs, return the immediate result.
 344                        Pass in 'in_playlist' to only show this behavior for
 345                        playlist items.
 346     wait_for_video:    If given, wait for scheduled streams to become available.
 347                        The value should be a tuple containing the range
 348                        (min_secs, max_secs) to wait between retries
 349     postprocessors:    A list of dictionaries, each with an entry
 350                        * key:  The name of the postprocessor. See
 351                                yt_dlp/postprocessor/__init__.py for a list.
 352                        * when: When to run the postprocessor. Can be one of
 353                                pre_process|before_dl|post_process|after_move.
 354                                Assumed to be 'post_process' if not given
 355     post_hooks:        Deprecated - Register a custom postprocessor instead
 356                        A list of functions that get called as the final step
 357                        for each video file, after all postprocessors have been
 358                        called. The filename will be passed as the only argument.
 359     progress_hooks:    A list of functions that get called on download
 360                        progress, with a dictionary with the entries
 361                        * status: One of "downloading", "error", or "finished".
 362                                  Check this first and ignore unknown values.
 363                        * info_dict: The extracted info_dict
 364
 365                        If status is one of "downloading", or "finished", the
 366                        following properties may also be present:
 367                        * filename: The final filename (always present)
 368                        * tmpfilename: The filename we're currently writing to
 369                        * downloaded_bytes: Bytes on disk
 370                        * total_bytes: Size of the whole file, None if unknown
 371                        * total_bytes_estimate: Guess of the eventual file size,
 372                                                None if unavailable.
 373                        * elapsed: The number of seconds since download started.
 374                        * eta: The estimated time in seconds, None if unknown
 375                        * speed: The download speed in bytes/second, None if
 376                                 unknown
 377                        * fragment_index: The counter of the currently
 378                                          downloaded video fragment.
 379                        * fragment_count: The number of fragments (= individual
 380                                          files that will be merged)
 381
 382                        Progress hooks are guaranteed to be called at least once
 383                        (with status "finished") if the download is successful.
 384     postprocessor_hooks:  A list of functions that get called on postprocessing
 385                        progress, with a dictionary with the entries
 386                        * status: One of "started", "processing", or "finished".
 387                                  Check this first and ignore unknown values.
 388                        * postprocessor: Name of the postprocessor
 389                        * info_dict: The extracted info_dict
 390
 391                        Progress hooks are guaranteed to be called at least twice
 392                        (with status "started" and "finished") if the processing is successful.
 393     merge_output_format: Extension to use when merging formats.
 394     final_ext:         Expected final extension; used to detect when the file was
 395                        already downloaded and converted
 396     fixup:             Automatically correct known faults of the file.
 397                        One of:
 398                        - "never": do nothing
 399                        - "warn": only emit a warning
 400                        - "detect_or_warn": check whether we can do anything
 401                                            about it, warn otherwise (default)
 402     source_address:    Client-side IP address to bind to.
 403     call_home:         Boolean, true iff we are allowed to contact the
 404                        yt-dlp servers for debugging. (BROKEN)
 405     sleep_interval_requests: Number of seconds to sleep between requests
 406                        during extraction
 407     sleep_interval:    Number of seconds to sleep before each download when
 408                        used alone or a lower bound of a range for randomized
 409                        sleep before each download (minimum possible number
 410                        of seconds to sleep) when used along with
 411                        max_sleep_interval.
 412     max_sleep_interval:Upper bound of a range for randomized sleep before each
 413                        download (maximum possible number of seconds to sleep).
 414                        Must only be used along with sleep_interval.
 415                        Actual sleep time will be a random float from range
 416                        [sleep_interval; max_sleep_interval].
 417     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 418     listformats:       Print an overview of available video formats and exit.
 419     list_thumbnails:   Print a table of all thumbnails and exit.
 420     match_filter:      A function that gets called with the info_dict of
 421                        every video.
 422                        If it returns a message, the video is ignored.
 423                        If it returns None, the video is downloaded.
 424                        match_filter_func in utils.py is one example for this.
 425     no_color:          Do not emit color codes in output.
 426     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 427                        HTTP header
 428     geo_bypass_country:
 429                        Two-letter ISO 3166-2 country code that will be used for
 430                        explicit geographic restriction bypassing via faking
 431                        X-Forwarded-For HTTP header
 432     geo_bypass_ip_block:
 433                        IP range in CIDR notation that will be used similarly to
 434                        geo_bypass_country
 435
 436     The following options determine which downloader is picked:
 437     external_downloader: A dictionary of protocol keys and the executable of the
 438                        external downloader to use for it. The allowed protocols
 439                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 440                        Set the value to 'native' to use the native downloader
 441     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 442                        or {'m3u8': 'ffmpeg'} instead.
 443                        Use the native HLS downloader instead of ffmpeg/avconv
 444                        if True, otherwise use ffmpeg/avconv if False, otherwise
 445                        use downloader suggested by extractor if None.
 446     compat_opts:       Compatibility options. See "Differences in default behavior".
 447                        The following options do not work when used through the API:
 448                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 449                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 450                        Refer __init__.py for their implementation
 451     progress_template: Dictionary of templates for progress outputs.
 452                        Allowed keys are 'download', 'postprocess',
 453                        'download-title' (console title) and 'postprocess-title'.
 454                        The template is mapped on a dictionary with keys 'progress' and 'info'
 455
 456     The following parameters are not used by YoutubeDL itself, they are used by
 457     the downloader (see yt_dlp/downloader/common.py):
 458     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 459     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 460     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 461     external_downloader_args, concurrent_fragment_downloads.
 462
 463     The following options are used by the post processors:
 464     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 465                        otherwise prefer ffmpeg. (avconv support is deprecated)
 466     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 467                        to the binary or its containing directory.
 468     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 469                        and a list of additional command-line arguments for the
 470                        postprocessor/executable. The dict can also have "PP+EXE" keys
 471                        which are used when the given exe is used by the given PP.
 472                        Use 'default' as the name for arguments to passed to all PP
 473                        For compatibility with youtube-dl, a single list of args
 474                        can also be used
 475
 476     The following options are used by the extractors:
 477     extractor_retries: Number of times to retry for known errors
 478     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 479     hls_split_discontinuity: Split HLS playlists to different formats at
 480                        discontinuities such as ad breaks (default: False)
 481     extractor_args:    A dictionary of arguments to be passed to the extractors.
 482                        See "EXTRACTOR ARGUMENTS" for details.
 483                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 484     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 485                        If True (default), DASH manifests and related
 486                        data will be downloaded and processed by extractor.
 487                        You can reduce network I/O by disabling it if you don't
 488                        care about DASH. (only for youtube)
 489     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 490                        If True (default), HLS manifests and related
 491                        data will be downloaded and processed by extractor.
 492                        You can reduce network I/O by disabling it if you don't
 493                        care about HLS. (only for youtube)
 494     """
 495
 496     _NUMERIC_FIELDS = set((
 497         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 498         'timestamp', 'release_timestamp',
 499         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 500         'average_rating', 'comment_count', 'age_limit',
 501         'start_time', 'end_time',
 502         'chapter_number', 'season_number', 'episode_number',
 503         'track_number', 'disc_number', 'release_year',
 504     ))
 505
 506     _format_selection_exts = {
 507         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 508         'video': {'mp4', 'flv', 'webm', '3gp'},
 509         'storyboards': {'mhtml'},
 510     }
 511
 512     params = None
 513     _ies = {}
 514     _pps = {k: [] for k in POSTPROCESS_WHEN}
 515     _printed_messages = set()
 516     _first_webpage_request = True
 517     _download_retcode = None
 518     _num_downloads = None
 519     _playlist_level = 0
 520     _playlist_urls = set()
 521     _screen_file = None
 522
 523     def __init__(self, params=None, auto_init=True):
 524         """Create a FileDownloader object with the given options.
 525         @param auto_init    Whether to load the default extractors and print header (if verbose).
 526                             Set to 'no_verbose_header' to not print the header
 527         """
 528         if params is None:
 529             params = {}
 530         self._ies = {}
 531         self._ies_instances = {}
 532         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 533         self._printed_messages = set()
 534         self._first_webpage_request = True
 535         self._post_hooks = []
 536         self._progress_hooks = []
 537         self._postprocessor_hooks = []
 538         self._download_retcode = 0
 539         self._num_downloads = 0
 540         self._num_videos = 0
 541         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 542         self._err_file = sys.stderr
 543         self.params = params
 544         self.cache = Cache(self)
 545
 546         windows_enable_vt_mode()
 547         self._allow_colors = {
 548             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 549             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 550         }
 551
 552         if sys.version_info < (3, 6):
 553             self.report_warning(
 554                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 555
 556         if self.params.get('allow_unplayable_formats'):
 557             self.report_warning(
 558                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 559                 'This is a developer option intended for debugging. \n'
 560                 '         If you experience any issues while using this option, '
 561                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 562
 563         def check_deprecated(param, option, suggestion):
 564             if self.params.get(param) is not None:
 565                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 566                 return True
 567             return False
 568
 569         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 570             if self.params.get('geo_verification_proxy') is None:
 571                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 572
 573         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 574         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 575         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 576
 577         for msg in self.params.get('_warnings', []):
 578             self.report_warning(msg)
 579         for msg in self.params.get('_deprecation_warnings', []):
 580             self.deprecation_warning(msg)
 581
 582         if 'list-formats' in self.params.get('compat_opts', []):
 583             self.params['listformats_table'] = False
 584
 585         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 586             # nooverwrites was unnecessarily changed to overwrites
 587             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 588             # This ensures compatibility with both keys
 589             self.params['overwrites'] = not self.params['nooverwrites']
 590         elif self.params.get('overwrites') is None:
 591             self.params.pop('overwrites', None)
 592         else:
 593             self.params['nooverwrites'] = not self.params['overwrites']
 594
 595         # Compatibility with older syntax
 596         params.setdefault('forceprint', {})
 597         if not isinstance(params['forceprint'], dict):
 598             params['forceprint'] = {'video': params['forceprint']}
 599
 600         if params.get('bidi_workaround', False):
 601             try:
 602                 import pty
 603                 master, slave = pty.openpty()
 604                 width = compat_get_terminal_size().columns
 605                 if width is None:
 606                     width_args = []
 607                 else:
 608                     width_args = ['-w', str(width)]
 609                 sp_kwargs = dict(
 610                     stdin=subprocess.PIPE,
 611                     stdout=slave,
 612                     stderr=self._err_file)
 613                 try:
 614                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 615                 except OSError:
 616                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 617                 self._output_channel = os.fdopen(master, 'rb')
 618             except OSError as ose:
 619                 if ose.errno == errno.ENOENT:
 620                     self.report_warning(
 621                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 622                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 623                 else:
 624                     raise
 625
 626         if (sys.platform != 'win32'
 627                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 628                 and not params.get('restrictfilenames', False)):
 629             # Unicode filesystem API will throw errors (#1474, #13027)
 630             self.report_warning(
 631                 'Assuming --restrict-filenames since file system encoding '
 632                 'cannot encode all characters. '
 633                 'Set the LC_ALL environment variable to fix this.')
 634             self.params['restrictfilenames'] = True
 635
 636         self.outtmpl_dict = self.parse_outtmpl()
 637
 638         # Creating format selector here allows us to catch syntax errors before the extraction
 639         self.format_selector = (
 640             self.params.get('format') if self.params.get('format') in (None, '-')
 641             else self.params['format'] if callable(self.params['format'])
 642             else self.build_format_selector(self.params['format']))
 643
 644         self._setup_opener()
 645
 646         if auto_init:
 647             if auto_init != 'no_verbose_header':
 648                 self.print_debug_header()
 649             self.add_default_info_extractors()
 650
 651         hooks = {
 652             'post_hooks': self.add_post_hook,
 653             'progress_hooks': self.add_progress_hook,
 654             'postprocessor_hooks': self.add_postprocessor_hook,
 655         }
 656         for opt, fn in hooks.items():
 657             for ph in self.params.get(opt, []):
 658                 fn(ph)
 659
 660         for pp_def_raw in self.params.get('postprocessors', []):
 661             pp_def = dict(pp_def_raw)
 662             when = pp_def.pop('when', 'post_process')
 663             self.add_post_processor(
 664                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 665                 when=when)
 666
 667         register_socks_protocols()
 668
 669         def preload_download_archive(fn):
 670             """Preload the archive, if any is specified"""
 671             if fn is None:
 672                 return False
 673             self.write_debug(f'Loading archive file {fn!r}')
 674             try:
 675                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 676                     for line in archive_file:
 677                         self.archive.add(line.strip())
 678             except IOError as ioe:
 679                 if ioe.errno != errno.ENOENT:
 680                     raise
 681                 return False
 682             return True
 683
 684         self.archive = set()
 685         preload_download_archive(self.params.get('download_archive'))
 686
 687     def warn_if_short_id(self, argv):
 688         # short YouTube ID starting with dash?
 689         idxs = [
 690             i for i, a in enumerate(argv)
 691             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 692         if idxs:
 693             correct_argv = (
 694                 ['yt-dlp']
 695                 + [a for i, a in enumerate(argv) if i not in idxs]
 696                 + ['--'] + [argv[i] for i in idxs]
 697             )
 698             self.report_warning(
 699                 'Long argument string detected. '
 700                 'Use -- to separate parameters and URLs, like this:\n%s' %
 701                 args_to_str(correct_argv))
 702
 703     def add_info_extractor(self, ie):
 704         """Add an InfoExtractor object to the end of the list."""
 705         ie_key = ie.ie_key()
 706         self._ies[ie_key] = ie
 707         if not isinstance(ie, type):
 708             self._ies_instances[ie_key] = ie
 709             ie.set_downloader(self)
 710
 711     def _get_info_extractor_class(self, ie_key):
 712         ie = self._ies.get(ie_key)
 713         if ie is None:
 714             ie = get_info_extractor(ie_key)
 715             self.add_info_extractor(ie)
 716         return ie
 717
 718     def get_info_extractor(self, ie_key):
 719         """
 720         Get an instance of an IE with name ie_key, it will try to get one from
 721         the _ies list, if there's no instance it will create a new one and add
 722         it to the extractor list.
 723         """
 724         ie = self._ies_instances.get(ie_key)
 725         if ie is None:
 726             ie = get_info_extractor(ie_key)()
 727             self.add_info_extractor(ie)
 728         return ie
 729
 730     def add_default_info_extractors(self):
 731         """
 732         Add the InfoExtractors returned by gen_extractors to the end of the list
 733         """
 734         for ie in gen_extractor_classes():
 735             self.add_info_extractor(ie)
 736
 737     def add_post_processor(self, pp, when='post_process'):
 738         """Add a PostProcessor object to the end of the chain."""
 739         self._pps[when].append(pp)
 740         pp.set_downloader(self)
 741
 742     def add_post_hook(self, ph):
 743         """Add the post hook"""
 744         self._post_hooks.append(ph)
 745
 746     def add_progress_hook(self, ph):
 747         """Add the download progress hook"""
 748         self._progress_hooks.append(ph)
 749
 750     def add_postprocessor_hook(self, ph):
 751         """Add the postprocessing progress hook"""
 752         self._postprocessor_hooks.append(ph)
 753         for pps in self._pps.values():
 754             for pp in pps:
 755                 pp.add_progress_hook(ph)
 756
 757     def _bidi_workaround(self, message):
 758         if not hasattr(self, '_output_channel'):
 759             return message
 760
 761         assert hasattr(self, '_output_process')
 762         assert isinstance(message, compat_str)
 763         line_count = message.count('\n') + 1
 764         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 765         self._output_process.stdin.flush()
 766         res = ''.join(self._output_channel.readline().decode('utf-8')
 767                       for _ in range(line_count))
 768         return res[:-len('\n')]
 769
 770     def _write_string(self, message, out=None, only_once=False):
 771         if only_once:
 772             if message in self._printed_messages:
 773                 return
 774             self._printed_messages.add(message)
 775         write_string(message, out=out, encoding=self.params.get('encoding'))
 776
 777     def to_stdout(self, message, skip_eol=False, quiet=False):
 778         """Print message to stdout"""
 779         if self.params.get('logger'):
 780             self.params['logger'].debug(message)
 781         elif not quiet or self.params.get('verbose'):
 782             self._write_string(
 783                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 784                 self._err_file if quiet else self._screen_file)
 785
 786     def to_stderr(self, message, only_once=False):
 787         """Print message to stderr"""
 788         assert isinstance(message, compat_str)
 789         if self.params.get('logger'):
 790             self.params['logger'].error(message)
 791         else:
 792             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 793
 794     def to_console_title(self, message):
 795         if not self.params.get('consoletitle', False):
 796             return
 797         message = remove_terminal_sequences(message)
 798         if compat_os_name == 'nt':
 799             if ctypes.windll.kernel32.GetConsoleWindow():
 800                 # c_wchar_p() might not be necessary if `message` is
 801                 # already of type unicode()
 802                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 803         elif 'TERM' in os.environ:
 804             self._write_string('\033]0;%s\007' % message, self._screen_file)
 805
 806     def save_console_title(self):
 807         if not self.params.get('consoletitle', False):
 808             return
 809         if self.params.get('simulate'):
 810             return
 811         if compat_os_name != 'nt' and 'TERM' in os.environ:
 812             # Save the title on stack
 813             self._write_string('\033[22;0t', self._screen_file)
 814
 815     def restore_console_title(self):
 816         if not self.params.get('consoletitle', False):
 817             return
 818         if self.params.get('simulate'):
 819             return
 820         if compat_os_name != 'nt' and 'TERM' in os.environ:
 821             # Restore the title from stack
 822             self._write_string('\033[23;0t', self._screen_file)
 823
 824     def __enter__(self):
 825         self.save_console_title()
 826         return self
 827
 828     def __exit__(self, *args):
 829         self.restore_console_title()
 830
 831         if self.params.get('cookiefile') is not None:
 832             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 833
 834     def trouble(self, message=None, tb=None, is_error=True):
 835         """Determine action to take when a download problem appears.
 836
 837         Depending on if the downloader has been configured to ignore
 838         download errors or not, this method may throw an exception or
 839         not when errors are found, after printing the message.
 840
 841         @param tb          If given, is additional traceback information
 842         @param is_error    Whether to raise error according to ignorerrors
 843         """
 844         if message is not None:
 845             self.to_stderr(message)
 846         if self.params.get('verbose'):
 847             if tb is None:
 848                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 849                     tb = ''
 850                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 851                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 852                     tb += encode_compat_str(traceback.format_exc())
 853                 else:
 854                     tb_data = traceback.format_list(traceback.extract_stack())
 855                     tb = ''.join(tb_data)
 856             if tb:
 857                 self.to_stderr(tb)
 858         if not is_error:
 859             return
 860         if not self.params.get('ignoreerrors'):
 861             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 862                 exc_info = sys.exc_info()[1].exc_info
 863             else:
 864                 exc_info = sys.exc_info()
 865             raise DownloadError(message, exc_info)
 866         self._download_retcode = 1
 867
 868     def to_screen(self, message, skip_eol=False):
 869         """Print message to stdout if not in quiet mode"""
 870         self.to_stdout(
 871             message, skip_eol, quiet=self.params.get('quiet', False))
 872
 873     class Styles(Enum):
 874         HEADERS = 'yellow'
 875         EMPHASIS = 'light blue'
 876         ID = 'green'
 877         DELIM = 'blue'
 878         ERROR = 'red'
 879         WARNING = 'yellow'
 880         SUPPRESS = 'light black'
 881
 882     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 883         if test_encoding:
 884             original_text = text
 885             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 886             text = text.encode(encoding, 'ignore').decode(encoding)
 887             if fallback is not None and text != original_text:
 888                 text = fallback
 889         if isinstance(f, self.Styles):
 890             f = f.value
 891         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 892
 893     def _format_screen(self, *args, **kwargs):
 894         return self._format_text(
 895             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 896
 897     def _format_err(self, *args, **kwargs):
 898         return self._format_text(
 899             self._err_file, self._allow_colors['err'], *args, **kwargs)
 900
 901     def report_warning(self, message, only_once=False):
 902         '''
 903         Print the message to stderr, it will be prefixed with 'WARNING:'
 904         If stderr is a tty file the 'WARNING:' will be colored
 905         '''
 906         if self.params.get('logger') is not None:
 907             self.params['logger'].warning(message)
 908         else:
 909             if self.params.get('no_warnings'):
 910                 return
 911             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 912
 913     def deprecation_warning(self, message):
 914         if self.params.get('logger') is not None:
 915             self.params['logger'].warning('DeprecationWarning: {message}')
 916         else:
 917             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 918
 919     def report_error(self, message, *args, **kwargs):
 920         '''
 921         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 922         in red if stderr is a tty file.
 923         '''
 924         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 925
 926     def write_debug(self, message, only_once=False):
 927         '''Log debug message or Print message to stderr'''
 928         if not self.params.get('verbose', False):
 929             return
 930         message = '[debug] %s' % message
 931         if self.params.get('logger'):
 932             self.params['logger'].debug(message)
 933         else:
 934             self.to_stderr(message, only_once)
 935
 936     def report_file_already_downloaded(self, file_name):
 937         """Report file has already been fully downloaded."""
 938         try:
 939             self.to_screen('[download] %s has already been downloaded' % file_name)
 940         except UnicodeEncodeError:
 941             self.to_screen('[download] The file has already been downloaded')
 942
 943     def report_file_delete(self, file_name):
 944         """Report that existing file will be deleted."""
 945         try:
 946             self.to_screen('Deleting existing file %s' % file_name)
 947         except UnicodeEncodeError:
 948             self.to_screen('Deleting existing file')
 949
 950     def raise_no_formats(self, info, forced=False):
 951         has_drm = info.get('__has_drm')
 952         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 953         expected = self.params.get('ignore_no_formats_error')
 954         if forced or not expected:
 955             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 956                                  expected=has_drm or expected)
 957         else:
 958             self.report_warning(msg)
 959
 960     def parse_outtmpl(self):
 961         outtmpl_dict = self.params.get('outtmpl', {})
 962         if not isinstance(outtmpl_dict, dict):
 963             outtmpl_dict = {'default': outtmpl_dict}
 964         # Remove spaces in the default template
 965         if self.params.get('restrictfilenames'):
 966             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 967         else:
 968             sanitize = lambda x: x
 969         outtmpl_dict.update({
 970             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 971             if outtmpl_dict.get(k) is None})
 972         for key, val in outtmpl_dict.items():
 973             if isinstance(val, bytes):
 974                 self.report_warning(
 975                     'Parameter outtmpl is bytes, but should be a unicode string. '
 976                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 977         return outtmpl_dict
 978
 979     def get_output_path(self, dir_type='', filename=None):
 980         paths = self.params.get('paths', {})
 981         assert isinstance(paths, dict)
 982         path = os.path.join(
 983             expand_path(paths.get('home', '').strip()),
 984             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 985             filename or '')
 986
 987         # Temporary fix for #4787
 988         # 'Treat' all problem characters by passing filename through preferredencoding
 989         # to workaround encoding issues with subprocess on python2 @ Windows
 990         if sys.version_info < (3, 0) and sys.platform == 'win32':
 991             path = encodeFilename(path, True).decode(preferredencoding())
 992         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 993
 994     @staticmethod
 995     def _outtmpl_expandpath(outtmpl):
 996         # expand_path translates '%%' into '%' and '$$' into '$'
 997         # correspondingly that is not what we want since we need to keep
 998         # '%%' intact for template dict substitution step. Working around
 999         # with boundary-alike separator hack.
1000         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1001         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1002
1003         # outtmpl should be expand_path'ed before template dict substitution
1004         # because meta fields may contain env variables we don't want to
1005         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1006         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1007         return expand_path(outtmpl).replace(sep, '')
1008
1009     @staticmethod
1010     def escape_outtmpl(outtmpl):
1011         ''' Escape any remaining strings like %s, %abc% etc. '''
1012         return re.sub(
1013             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1014             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1015             outtmpl)
1016
1017     @classmethod
1018     def validate_outtmpl(cls, outtmpl):
1019         ''' @return None or Exception object '''
1020         outtmpl = re.sub(
1021             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1022             lambda mobj: f'{mobj.group(0)[:-1]}s',
1023             cls._outtmpl_expandpath(outtmpl))
1024         try:
1025             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1026             return None
1027         except ValueError as err:
1028             return err
1029
1030     @staticmethod
1031     def _copy_infodict(info_dict):
1032         info_dict = dict(info_dict)
1033         for key in ('__original_infodict', '__postprocessors'):
1034             info_dict.pop(key, None)
1035         return info_dict
1036
1037     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1038         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1039         @param sanitize    Whether to sanitize the output as a filename.
1040                            For backward compatibility, a function can also be passed
1041         """
1042
1043         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1044
1045         info_dict = self._copy_infodict(info_dict)
1046         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1047             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1048             if info_dict.get('duration', None) is not None
1049             else None)
1050         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1051         info_dict['video_autonumber'] = self._num_videos
1052         if info_dict.get('resolution') is None:
1053             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1054
1055         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1056         # of %(field)s to %(field)0Nd for backward compatibility
1057         field_size_compat_map = {
1058             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1059             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1060             'autonumber': self.params.get('autonumber_size') or 5,
1061         }
1062
1063         TMPL_DICT = {}
1064         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1065         MATH_FUNCTIONS = {
1066             '+': float.__add__,
1067             '-': float.__sub__,
1068         }
1069         # Field is of the form key1.key2...
1070         # where keys (except first) can be string, int or slice
1071         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1072         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1073         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1074         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1075             (?P<negate>-)?
1076             (?P<fields>{field})
1077             (?P<maths>(?:{math_op}{math_field})*)
1078             (?:>(?P<strf_format>.+?))?
1079             (?P<alternate>(?<!\\),[^|&)]+)?
1080             (?:&(?P<replacement>.*?))?
1081             (?:\|(?P<default>.*?))?
1082             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1083
1084         def _traverse_infodict(k):
1085             k = k.split('.')
1086             if k[0] == '':
1087                 k.pop(0)
1088             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1089
1090         def get_value(mdict):
1091             # Object traversal
1092             value = _traverse_infodict(mdict['fields'])
1093             # Negative
1094             if mdict['negate']:
1095                 value = float_or_none(value)
1096                 if value is not None:
1097                     value *= -1
1098             # Do maths
1099             offset_key = mdict['maths']
1100             if offset_key:
1101                 value = float_or_none(value)
1102                 operator = None
1103                 while offset_key:
1104                     item = re.match(
1105                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1106                         offset_key).group(0)
1107                     offset_key = offset_key[len(item):]
1108                     if operator is None:
1109                         operator = MATH_FUNCTIONS[item]
1110                         continue
1111                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1112                     offset = float_or_none(item)
1113                     if offset is None:
1114                         offset = float_or_none(_traverse_infodict(item))
1115                     try:
1116                         value = operator(value, multiplier * offset)
1117                     except (TypeError, ZeroDivisionError):
1118                         return None
1119                     operator = None
1120             # Datetime formatting
1121             if mdict['strf_format']:
1122                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1123
1124             return value
1125
1126         na = self.params.get('outtmpl_na_placeholder', 'NA')
1127
1128         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1129             return sanitize_filename(str(value), restricted=restricted,
1130                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1131
1132         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1133         sanitize = bool(sanitize)
1134
1135         def _dumpjson_default(obj):
1136             if isinstance(obj, (set, LazyList)):
1137                 return list(obj)
1138             return repr(obj)
1139
1140         def create_key(outer_mobj):
1141             if not outer_mobj.group('has_key'):
1142                 return outer_mobj.group(0)
1143             key = outer_mobj.group('key')
1144             mobj = re.match(INTERNAL_FORMAT_RE, key)
1145             initial_field = mobj.group('fields') if mobj else ''
1146             value, replacement, default = None, None, na
1147             while mobj:
1148                 mobj = mobj.groupdict()
1149                 default = mobj['default'] if mobj['default'] is not None else default
1150                 value = get_value(mobj)
1151                 replacement = mobj['replacement']
1152                 if value is None and mobj['alternate']:
1153                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1154                 else:
1155                     break
1156
1157             fmt = outer_mobj.group('format')
1158             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1159                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1160
1161             value = default if value is None else value if replacement is None else replacement
1162
1163             flags = outer_mobj.group('conversion') or ''
1164             str_fmt = f'{fmt[:-1]}s'
1165             if fmt[-1] == 'l':  # list
1166                 delim = '\n' if '#' in flags else ', '
1167                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1168             elif fmt[-1] == 'j':  # json
1169                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1170             elif fmt[-1] == 'q':  # quoted
1171                 value = map(str, variadic(value) if '#' in flags else [value])
1172                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1173             elif fmt[-1] == 'B':  # bytes
1174                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1175                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1176             elif fmt[-1] == 'U':  # unicode normalized
1177                 value, fmt = unicodedata.normalize(
1178                     # "+" = compatibility equivalence, "#" = NFD
1179                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1180                     value), str_fmt
1181             elif fmt[-1] == 'D':  # decimal suffix
1182                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1183                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1184                                               factor=1024 if '#' in flags else 1000)
1185             elif fmt[-1] == 'S':  # filename sanitization
1186                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1187             elif fmt[-1] == 'c':
1188                 if value:
1189                     value = str(value)[0]
1190                 else:
1191                     fmt = str_fmt
1192             elif fmt[-1] not in 'rs':  # numeric
1193                 value = float_or_none(value)
1194                 if value is None:
1195                     value, fmt = default, 's'
1196
1197             if sanitize:
1198                 if fmt[-1] == 'r':
1199                     # If value is an object, sanitize might convert it to a string
1200                     # So we convert it to repr first
1201                     value, fmt = repr(value), str_fmt
1202                 if fmt[-1] in 'csr':
1203                     value = sanitizer(initial_field, value)
1204
1205             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1206             TMPL_DICT[key] = value
1207             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1208
1209         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1210
1211     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1212         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1213         return self.escape_outtmpl(outtmpl) % info_dict
1214
1215     def _prepare_filename(self, info_dict, tmpl_type='default'):
1216         try:
1217             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1218             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1219
1220             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1221             if filename and force_ext is not None:
1222                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1223
1224             # https://github.com/blackjack4494/youtube-dlc/issues/85
1225             trim_file_name = self.params.get('trim_file_name', False)
1226             if trim_file_name:
1227                 no_ext, *ext = filename.rsplit('.', 2)
1228                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1229
1230             return filename
1231         except ValueError as err:
1232             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1233             return None
1234
1235     def prepare_filename(self, info_dict, dir_type='', warn=False):
1236         """Generate the output filename."""
1237
1238         filename = self._prepare_filename(info_dict, dir_type or 'default')
1239         if not filename and dir_type not in ('', 'temp'):
1240             return ''
1241
1242         if warn:
1243             if not self.params.get('paths'):
1244                 pass
1245             elif filename == '-':
1246                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1247             elif os.path.isabs(filename):
1248                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1249         if filename == '-' or not filename:
1250             return filename
1251
1252         return self.get_output_path(dir_type, filename)
1253
1254     def _match_entry(self, info_dict, incomplete=False, silent=False):
1255         """ Returns None if the file should be downloaded """
1256
1257         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1258
1259         def check_filter():
1260             if 'title' in info_dict:
1261                 # This can happen when we're just evaluating the playlist
1262                 title = info_dict['title']
1263                 matchtitle = self.params.get('matchtitle', False)
1264                 if matchtitle:
1265                     if not re.search(matchtitle, title, re.IGNORECASE):
1266                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1267                 rejecttitle = self.params.get('rejecttitle', False)
1268                 if rejecttitle:
1269                     if re.search(rejecttitle, title, re.IGNORECASE):
1270                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1271             date = info_dict.get('upload_date')
1272             if date is not None:
1273                 dateRange = self.params.get('daterange', DateRange())
1274                 if date not in dateRange:
1275                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1276             view_count = info_dict.get('view_count')
1277             if view_count is not None:
1278                 min_views = self.params.get('min_views')
1279                 if min_views is not None and view_count < min_views:
1280                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1281                 max_views = self.params.get('max_views')
1282                 if max_views is not None and view_count > max_views:
1283                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1284             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1285                 return 'Skipping "%s" because it is age restricted' % video_title
1286
1287             match_filter = self.params.get('match_filter')
1288             if match_filter is not None:
1289                 try:
1290                     ret = match_filter(info_dict, incomplete=incomplete)
1291                 except TypeError:
1292                     # For backward compatibility
1293                     ret = None if incomplete else match_filter(info_dict)
1294                 if ret is not None:
1295                     return ret
1296             return None
1297
1298         if self.in_download_archive(info_dict):
1299             reason = '%s has already been recorded in the archive' % video_title
1300             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1301         else:
1302             reason = check_filter()
1303             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1304         if reason is not None:
1305             if not silent:
1306                 self.to_screen('[download] ' + reason)
1307             if self.params.get(break_opt, False):
1308                 raise break_err()
1309         return reason
1310
1311     @staticmethod
1312     def add_extra_info(info_dict, extra_info):
1313         '''Set the keys from extra_info in info dict if they are missing'''
1314         for key, value in extra_info.items():
1315             info_dict.setdefault(key, value)
1316
1317     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1318                      process=True, force_generic_extractor=False):
1319         """
1320         Return a list with a dictionary for each video extracted.
1321
1322         Arguments:
1323         url -- URL to extract
1324
1325         Keyword arguments:
1326         download -- whether to download videos during extraction
1327         ie_key -- extractor key hint
1328         extra_info -- dictionary containing the extra values to add to each result
1329         process -- whether to resolve all unresolved references (URLs, playlist items),
1330             must be True for download to work.
1331         force_generic_extractor -- force using the generic extractor
1332         """
1333
1334         if extra_info is None:
1335             extra_info = {}
1336
1337         if not ie_key and force_generic_extractor:
1338             ie_key = 'Generic'
1339
1340         if ie_key:
1341             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1342         else:
1343             ies = self._ies
1344
1345         for ie_key, ie in ies.items():
1346             if not ie.suitable(url):
1347                 continue
1348
1349             if not ie.working():
1350                 self.report_warning('The program functionality for this site has been marked as broken, '
1351                                     'and will probably not work.')
1352
1353             temp_id = ie.get_temp_id(url)
1354             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1355                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1356                 if self.params.get('break_on_existing', False):
1357                     raise ExistingVideoReached()
1358                 break
1359             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1360         else:
1361             self.report_error('no suitable InfoExtractor for URL %s' % url)
1362
1363     def __handle_extraction_exceptions(func):
1364         @functools.wraps(func)
1365         def wrapper(self, *args, **kwargs):
1366             while True:
1367                 try:
1368                     return func(self, *args, **kwargs)
1369                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1370                     raise
1371                 except ReExtractInfo as e:
1372                     if e.expected:
1373                         self.to_screen(f'{e}; Re-extracting data')
1374                     else:
1375                         self.to_stderr('\r')
1376                         self.report_warning(f'{e}; Re-extracting data')
1377                     continue
1378                 except GeoRestrictedError as e:
1379                     msg = e.msg
1380                     if e.countries:
1381                         msg += '\nThis video is available in %s.' % ', '.join(
1382                             map(ISO3166Utils.short2full, e.countries))
1383                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1384                     self.report_error(msg)
1385                 except ExtractorError as e:  # An error we somewhat expected
1386                     self.report_error(str(e), e.format_traceback())
1387                 except Exception as e:
1388                     if self.params.get('ignoreerrors'):
1389                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1390                     else:
1391                         raise
1392                 break
1393         return wrapper
1394
1395     def _wait_for_video(self, ie_result):
1396         if (not self.params.get('wait_for_video')
1397                 or ie_result.get('_type', 'video') != 'video'
1398                 or ie_result.get('formats') or ie_result.get('url')):
1399             return
1400
1401         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1402         last_msg = ''
1403
1404         def progress(msg):
1405             nonlocal last_msg
1406             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1407             last_msg = msg
1408
1409         min_wait, max_wait = self.params.get('wait_for_video')
1410         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1411         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1412             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1413             self.report_warning('Release time of video is not known')
1414         elif (diff or 0) <= 0:
1415             self.report_warning('Video should already be available according to extracted info')
1416         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1417         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1418
1419         wait_till = time.time() + diff
1420         try:
1421             while True:
1422                 diff = wait_till - time.time()
1423                 if diff <= 0:
1424                     progress('')
1425                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1426                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1427                 time.sleep(1)
1428         except KeyboardInterrupt:
1429             progress('')
1430             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1431         except BaseException as e:
1432             if not isinstance(e, ReExtractInfo):
1433                 self.to_screen('')
1434             raise
1435
1436     @__handle_extraction_exceptions
1437     def __extract_info(self, url, ie, download, extra_info, process):
1438         ie_result = ie.extract(url)
1439         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1440             return
1441         if isinstance(ie_result, list):
1442             # Backwards compatibility: old IE result format
1443             ie_result = {
1444                 '_type': 'compat_list',
1445                 'entries': ie_result,
1446             }
1447         if extra_info.get('original_url'):
1448             ie_result.setdefault('original_url', extra_info['original_url'])
1449         self.add_default_extra_info(ie_result, ie, url)
1450         if process:
1451             self._wait_for_video(ie_result)
1452             return self.process_ie_result(ie_result, download, extra_info)
1453         else:
1454             return ie_result
1455
1456     def add_default_extra_info(self, ie_result, ie, url):
1457         if url is not None:
1458             self.add_extra_info(ie_result, {
1459                 'webpage_url': url,
1460                 'original_url': url,
1461                 'webpage_url_basename': url_basename(url),
1462                 'webpage_url_domain': get_domain(url),
1463             })
1464         if ie is not None:
1465             self.add_extra_info(ie_result, {
1466                 'extractor': ie.IE_NAME,
1467                 'extractor_key': ie.ie_key(),
1468             })
1469
1470     def process_ie_result(self, ie_result, download=True, extra_info=None):
1471         """
1472         Take the result of the ie(may be modified) and resolve all unresolved
1473         references (URLs, playlist items).
1474
1475         It will also download the videos if 'download'.
1476         Returns the resolved ie_result.
1477         """
1478         if extra_info is None:
1479             extra_info = {}
1480         result_type = ie_result.get('_type', 'video')
1481
1482         if result_type in ('url', 'url_transparent'):
1483             ie_result['url'] = sanitize_url(ie_result['url'])
1484             if ie_result.get('original_url'):
1485                 extra_info.setdefault('original_url', ie_result['original_url'])
1486
1487             extract_flat = self.params.get('extract_flat', False)
1488             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1489                     or extract_flat is True):
1490                 info_copy = ie_result.copy()
1491                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1492                 if ie and not ie_result.get('id'):
1493                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1494                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1495                 self.add_extra_info(info_copy, extra_info)
1496                 info_copy, _ = self.pre_process(info_copy)
1497                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1498                 if self.params.get('force_write_download_archive', False):
1499                     self.record_download_archive(info_copy)
1500                 return ie_result
1501
1502         if result_type == 'video':
1503             self.add_extra_info(ie_result, extra_info)
1504             ie_result = self.process_video_result(ie_result, download=download)
1505             additional_urls = (ie_result or {}).get('additional_urls')
1506             if additional_urls:
1507                 # TODO: Improve MetadataParserPP to allow setting a list
1508                 if isinstance(additional_urls, compat_str):
1509                     additional_urls = [additional_urls]
1510                 self.to_screen(
1511                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1512                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1513                 ie_result['additional_entries'] = [
1514                     self.extract_info(
1515                         url, download, extra_info=extra_info,
1516                         force_generic_extractor=self.params.get('force_generic_extractor'))
1517                     for url in additional_urls
1518                 ]
1519             return ie_result
1520         elif result_type == 'url':
1521             # We have to add extra_info to the results because it may be
1522             # contained in a playlist
1523             return self.extract_info(
1524                 ie_result['url'], download,
1525                 ie_key=ie_result.get('ie_key'),
1526                 extra_info=extra_info)
1527         elif result_type == 'url_transparent':
1528             # Use the information from the embedding page
1529             info = self.extract_info(
1530                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1531                 extra_info=extra_info, download=False, process=False)
1532
1533             # extract_info may return None when ignoreerrors is enabled and
1534             # extraction failed with an error, don't crash and return early
1535             # in this case
1536             if not info:
1537                 return info
1538
1539             force_properties = dict(
1540                 (k, v) for k, v in ie_result.items() if v is not None)
1541             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1542                 if f in force_properties:
1543                     del force_properties[f]
1544             new_result = info.copy()
1545             new_result.update(force_properties)
1546
1547             # Extracted info may not be a video result (i.e.
1548             # info.get('_type', 'video') != video) but rather an url or
1549             # url_transparent. In such cases outer metadata (from ie_result)
1550             # should be propagated to inner one (info). For this to happen
1551             # _type of info should be overridden with url_transparent. This
1552             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1553             if new_result.get('_type') == 'url':
1554                 new_result['_type'] = 'url_transparent'
1555
1556             return self.process_ie_result(
1557                 new_result, download=download, extra_info=extra_info)
1558         elif result_type in ('playlist', 'multi_video'):
1559             # Protect from infinite recursion due to recursively nested playlists
1560             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1561             webpage_url = ie_result['webpage_url']
1562             if webpage_url in self._playlist_urls:
1563                 self.to_screen(
1564                     '[download] Skipping already downloaded playlist: %s'
1565                     % ie_result.get('title') or ie_result.get('id'))
1566                 return
1567
1568             self._playlist_level += 1
1569             self._playlist_urls.add(webpage_url)
1570             self._sanitize_thumbnails(ie_result)
1571             try:
1572                 return self.__process_playlist(ie_result, download)
1573             finally:
1574                 self._playlist_level -= 1
1575                 if not self._playlist_level:
1576                     self._playlist_urls.clear()
1577         elif result_type == 'compat_list':
1578             self.report_warning(
1579                 'Extractor %s returned a compat_list result. '
1580                 'It needs to be updated.' % ie_result.get('extractor'))
1581
1582             def _fixup(r):
1583                 self.add_extra_info(r, {
1584                     'extractor': ie_result['extractor'],
1585                     'webpage_url': ie_result['webpage_url'],
1586                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1587                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1588                     'extractor_key': ie_result['extractor_key'],
1589                 })
1590                 return r
1591             ie_result['entries'] = [
1592                 self.process_ie_result(_fixup(r), download, extra_info)
1593                 for r in ie_result['entries']
1594             ]
1595             return ie_result
1596         else:
1597             raise Exception('Invalid result type: %s' % result_type)
1598
1599     def _ensure_dir_exists(self, path):
1600         return make_dir(path, self.report_error)
1601
1602     @staticmethod
1603     def _playlist_infodict(ie_result, **kwargs):
1604         return {
1605             **ie_result,
1606             'playlist': ie_result.get('title') or ie_result.get('id'),
1607             'playlist_id': ie_result.get('id'),
1608             'playlist_title': ie_result.get('title'),
1609             'playlist_uploader': ie_result.get('uploader'),
1610             'playlist_uploader_id': ie_result.get('uploader_id'),
1611             'playlist_index': 0,
1612             **kwargs,
1613         }
1614
1615     def __process_playlist(self, ie_result, download):
1616         # We process each entry in the playlist
1617         playlist = ie_result.get('title') or ie_result.get('id')
1618         self.to_screen('[download] Downloading playlist: %s' % playlist)
1619
1620         if 'entries' not in ie_result:
1621             raise EntryNotInPlaylist('There are no entries')
1622
1623         MissingEntry = object()
1624         incomplete_entries = bool(ie_result.get('requested_entries'))
1625         if incomplete_entries:
1626             def fill_missing_entries(entries, indices):
1627                 ret = [MissingEntry] * max(indices)
1628                 for i, entry in zip(indices, entries):
1629                     ret[i - 1] = entry
1630                 return ret
1631             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1632
1633         playlist_results = []
1634
1635         playliststart = self.params.get('playliststart', 1)
1636         playlistend = self.params.get('playlistend')
1637         # For backwards compatibility, interpret -1 as whole list
1638         if playlistend == -1:
1639             playlistend = None
1640
1641         playlistitems_str = self.params.get('playlist_items')
1642         playlistitems = None
1643         if playlistitems_str is not None:
1644             def iter_playlistitems(format):
1645                 for string_segment in format.split(','):
1646                     if '-' in string_segment:
1647                         start, end = string_segment.split('-')
1648                         for item in range(int(start), int(end) + 1):
1649                             yield int(item)
1650                     else:
1651                         yield int(string_segment)
1652             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1653
1654         ie_entries = ie_result['entries']
1655         if isinstance(ie_entries, list):
1656             playlist_count = len(ie_entries)
1657             msg = f'Collected {playlist_count} videos; downloading %d of them'
1658             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1659
1660             def get_entry(i):
1661                 return ie_entries[i - 1]
1662         else:
1663             msg = 'Downloading %d videos'
1664             if not isinstance(ie_entries, (PagedList, LazyList)):
1665                 ie_entries = LazyList(ie_entries)
1666             elif isinstance(ie_entries, InAdvancePagedList):
1667                 if ie_entries._pagesize == 1:
1668                     playlist_count = ie_entries._pagecount
1669
1670             def get_entry(i):
1671                 return YoutubeDL.__handle_extraction_exceptions(
1672                     lambda self, i: ie_entries[i - 1]
1673                 )(self, i)
1674
1675         entries, broken = [], False
1676         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1677         for i in items:
1678             if i == 0:
1679                 continue
1680             if playlistitems is None and playlistend is not None and playlistend < i:
1681                 break
1682             entry = None
1683             try:
1684                 entry = get_entry(i)
1685                 if entry is MissingEntry:
1686                     raise EntryNotInPlaylist()
1687             except (IndexError, EntryNotInPlaylist):
1688                 if incomplete_entries:
1689                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1690                 elif not playlistitems:
1691                     break
1692             entries.append(entry)
1693             try:
1694                 if entry is not None:
1695                     self._match_entry(entry, incomplete=True, silent=True)
1696             except (ExistingVideoReached, RejectedVideoReached):
1697                 broken = True
1698                 break
1699         ie_result['entries'] = entries
1700
1701         # Save playlist_index before re-ordering
1702         entries = [
1703             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1704             for i, entry in enumerate(entries, 1)
1705             if entry is not None]
1706         n_entries = len(entries)
1707
1708         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1709             ie_result['playlist_count'] = n_entries
1710
1711         if not playlistitems and (playliststart != 1 or playlistend):
1712             playlistitems = list(range(playliststart, playliststart + n_entries))
1713         ie_result['requested_entries'] = playlistitems
1714
1715         _infojson_written = False
1716         write_playlist_files = self.params.get('allow_playlist_files', True)
1717         if write_playlist_files and self.params.get('list_thumbnails'):
1718             self.list_thumbnails(ie_result)
1719         if write_playlist_files and not self.params.get('simulate'):
1720             ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1721             _infojson_written = self._write_info_json(
1722                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1723             if _infojson_written is None:
1724                 return
1725             if self._write_description('playlist', ie_result,
1726                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1727                 return
1728             # TODO: This should be passed to ThumbnailsConvertor if necessary
1729             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1730
1731         if self.params.get('playlistreverse', False):
1732             entries = entries[::-1]
1733         if self.params.get('playlistrandom', False):
1734             random.shuffle(entries)
1735
1736         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1737
1738         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1739         failures = 0
1740         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1741         for i, entry_tuple in enumerate(entries, 1):
1742             playlist_index, entry = entry_tuple
1743             if 'playlist-index' in self.params.get('compat_opts', []):
1744                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1745             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1746             # This __x_forwarded_for_ip thing is a bit ugly but requires
1747             # minimal changes
1748             if x_forwarded_for:
1749                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1750             extra = {
1751                 'n_entries': n_entries,
1752                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1753                 'playlist_count': ie_result.get('playlist_count'),
1754                 'playlist_index': playlist_index,
1755                 'playlist_autonumber': i,
1756                 'playlist': playlist,
1757                 'playlist_id': ie_result.get('id'),
1758                 'playlist_title': ie_result.get('title'),
1759                 'playlist_uploader': ie_result.get('uploader'),
1760                 'playlist_uploader_id': ie_result.get('uploader_id'),
1761                 'extractor': ie_result['extractor'],
1762                 'webpage_url': ie_result['webpage_url'],
1763                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1764                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1765                 'extractor_key': ie_result['extractor_key'],
1766             }
1767
1768             if self._match_entry(entry, incomplete=True) is not None:
1769                 continue
1770
1771             entry_result = self.__process_iterable_entry(entry, download, extra)
1772             if not entry_result:
1773                 failures += 1
1774             if failures >= max_failures:
1775                 self.report_error(
1776                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1777                 break
1778             playlist_results.append(entry_result)
1779         ie_result['entries'] = playlist_results
1780
1781         # Write the updated info to json
1782         if _infojson_written and self._write_info_json(
1783                 'updated playlist', ie_result,
1784                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1785             return
1786
1787         ie_result = self.run_all_pps('playlist', ie_result)
1788         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1789         return ie_result
1790
1791     @__handle_extraction_exceptions
1792     def __process_iterable_entry(self, entry, download, extra_info):
1793         return self.process_ie_result(
1794             entry, download=download, extra_info=extra_info)
1795
1796     def _build_format_filter(self, filter_spec):
1797         " Returns a function to filter the formats according to the filter_spec "
1798
1799         OPERATORS = {
1800             '<': operator.lt,
1801             '<=': operator.le,
1802             '>': operator.gt,
1803             '>=': operator.ge,
1804             '=': operator.eq,
1805             '!=': operator.ne,
1806         }
1807         operator_rex = re.compile(r'''(?x)\s*
1808             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1809             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1810             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1811             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1812         m = operator_rex.fullmatch(filter_spec)
1813         if m:
1814             try:
1815                 comparison_value = int(m.group('value'))
1816             except ValueError:
1817                 comparison_value = parse_filesize(m.group('value'))
1818                 if comparison_value is None:
1819                     comparison_value = parse_filesize(m.group('value') + 'B')
1820                 if comparison_value is None:
1821                     raise ValueError(
1822                         'Invalid value %r in format specification %r' % (
1823                             m.group('value'), filter_spec))
1824             op = OPERATORS[m.group('op')]
1825
1826         if not m:
1827             STR_OPERATORS = {
1828                 '=': operator.eq,
1829                 '^=': lambda attr, value: attr.startswith(value),
1830                 '$=': lambda attr, value: attr.endswith(value),
1831                 '*=': lambda attr, value: value in attr,
1832             }
1833             str_operator_rex = re.compile(r'''(?x)\s*
1834                 (?P<key>[a-zA-Z0-9._-]+)\s*
1835                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1836                 (?P<value>[a-zA-Z0-9._-]+)\s*
1837                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1838             m = str_operator_rex.fullmatch(filter_spec)
1839             if m:
1840                 comparison_value = m.group('value')
1841                 str_op = STR_OPERATORS[m.group('op')]
1842                 if m.group('negation'):
1843                     op = lambda attr, value: not str_op(attr, value)
1844                 else:
1845                     op = str_op
1846
1847         if not m:
1848             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1849
1850         def _filter(f):
1851             actual_value = f.get(m.group('key'))
1852             if actual_value is None:
1853                 return m.group('none_inclusive')
1854             return op(actual_value, comparison_value)
1855         return _filter
1856
1857     def _check_formats(self, formats):
1858         for f in formats:
1859             self.to_screen('[info] Testing format %s' % f['format_id'])
1860             path = self.get_output_path('temp')
1861             if not self._ensure_dir_exists(f'{path}/'):
1862                 continue
1863             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1864             temp_file.close()
1865             try:
1866                 success, _ = self.dl(temp_file.name, f, test=True)
1867             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1868                 success = False
1869             finally:
1870                 if os.path.exists(temp_file.name):
1871                     try:
1872                         os.remove(temp_file.name)
1873                     except OSError:
1874                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1875             if success:
1876                 yield f
1877             else:
1878                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1879
1880     def _default_format_spec(self, info_dict, download=True):
1881
1882         def can_merge():
1883             merger = FFmpegMergerPP(self)
1884             return merger.available and merger.can_merge()
1885
1886         prefer_best = (
1887             not self.params.get('simulate')
1888             and download
1889             and (
1890                 not can_merge()
1891                 or info_dict.get('is_live', False)
1892                 or self.outtmpl_dict['default'] == '-'))
1893         compat = (
1894             prefer_best
1895             or self.params.get('allow_multiple_audio_streams', False)
1896             or 'format-spec' in self.params.get('compat_opts', []))
1897
1898         return (
1899             'best/bestvideo+bestaudio' if prefer_best
1900             else 'bestvideo*+bestaudio/best' if not compat
1901             else 'bestvideo+bestaudio/best')
1902
1903     def build_format_selector(self, format_spec):
1904         def syntax_error(note, start):
1905             message = (
1906                 'Invalid format specification: '
1907                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1908             return SyntaxError(message)
1909
1910         PICKFIRST = 'PICKFIRST'
1911         MERGE = 'MERGE'
1912         SINGLE = 'SINGLE'
1913         GROUP = 'GROUP'
1914         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1915
1916         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1917                                   'video': self.params.get('allow_multiple_video_streams', False)}
1918
1919         check_formats = self.params.get('check_formats') == 'selected'
1920
1921         def _parse_filter(tokens):
1922             filter_parts = []
1923             for type, string, start, _, _ in tokens:
1924                 if type == tokenize.OP and string == ']':
1925                     return ''.join(filter_parts)
1926                 else:
1927                     filter_parts.append(string)
1928
1929         def _remove_unused_ops(tokens):
1930             # Remove operators that we don't use and join them with the surrounding strings
1931             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1932             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1933             last_string, last_start, last_end, last_line = None, None, None, None
1934             for type, string, start, end, line in tokens:
1935                 if type == tokenize.OP and string == '[':
1936                     if last_string:
1937                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1938                         last_string = None
1939                     yield type, string, start, end, line
1940                     # everything inside brackets will be handled by _parse_filter
1941                     for type, string, start, end, line in tokens:
1942                         yield type, string, start, end, line
1943                         if type == tokenize.OP and string == ']':
1944                             break
1945                 elif type == tokenize.OP and string in ALLOWED_OPS:
1946                     if last_string:
1947                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1948                         last_string = None
1949                     yield type, string, start, end, line
1950                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1951                     if not last_string:
1952                         last_string = string
1953                         last_start = start
1954                         last_end = end
1955                     else:
1956                         last_string += string
1957             if last_string:
1958                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1959
1960         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1961             selectors = []
1962             current_selector = None
1963             for type, string, start, _, _ in tokens:
1964                 # ENCODING is only defined in python 3.x
1965                 if type == getattr(tokenize, 'ENCODING', None):
1966                     continue
1967                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1968                     current_selector = FormatSelector(SINGLE, string, [])
1969                 elif type == tokenize.OP:
1970                     if string == ')':
1971                         if not inside_group:
1972                             # ')' will be handled by the parentheses group
1973                             tokens.restore_last_token()
1974                         break
1975                     elif inside_merge and string in ['/', ',']:
1976                         tokens.restore_last_token()
1977                         break
1978                     elif inside_choice and string == ',':
1979                         tokens.restore_last_token()
1980                         break
1981                     elif string == ',':
1982                         if not current_selector:
1983                             raise syntax_error('"," must follow a format selector', start)
1984                         selectors.append(current_selector)
1985                         current_selector = None
1986                     elif string == '/':
1987                         if not current_selector:
1988                             raise syntax_error('"/" must follow a format selector', start)
1989                         first_choice = current_selector
1990                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1991                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1992                     elif string == '[':
1993                         if not current_selector:
1994                             current_selector = FormatSelector(SINGLE, 'best', [])
1995                         format_filter = _parse_filter(tokens)
1996                         current_selector.filters.append(format_filter)
1997                     elif string == '(':
1998                         if current_selector:
1999                             raise syntax_error('Unexpected "("', start)
2000                         group = _parse_format_selection(tokens, inside_group=True)
2001                         current_selector = FormatSelector(GROUP, group, [])
2002                     elif string == '+':
2003                         if not current_selector:
2004                             raise syntax_error('Unexpected "+"', start)
2005                         selector_1 = current_selector
2006                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2007                         if not selector_2:
2008                             raise syntax_error('Expected a selector', start)
2009                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2010                     else:
2011                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2012                 elif type == tokenize.ENDMARKER:
2013                     break
2014             if current_selector:
2015                 selectors.append(current_selector)
2016             return selectors
2017
2018         def _merge(formats_pair):
2019             format_1, format_2 = formats_pair
2020
2021             formats_info = []
2022             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2023             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2024
2025             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2026                 get_no_more = {'video': False, 'audio': False}
2027                 for (i, fmt_info) in enumerate(formats_info):
2028                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2029                         formats_info.pop(i)
2030                         continue
2031                     for aud_vid in ['audio', 'video']:
2032                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2033                             if get_no_more[aud_vid]:
2034                                 formats_info.pop(i)
2035                                 break
2036                             get_no_more[aud_vid] = True
2037
2038             if len(formats_info) == 1:
2039                 return formats_info[0]
2040
2041             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2042             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2043
2044             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2045             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2046
2047             output_ext = self.params.get('merge_output_format')
2048             if not output_ext:
2049                 if the_only_video:
2050                     output_ext = the_only_video['ext']
2051                 elif the_only_audio and not video_fmts:
2052                     output_ext = the_only_audio['ext']
2053                 else:
2054                     output_ext = 'mkv'
2055
2056             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2057
2058             new_dict = {
2059                 'requested_formats': formats_info,
2060                 'format': '+'.join(filtered('format')),
2061                 'format_id': '+'.join(filtered('format_id')),
2062                 'ext': output_ext,
2063                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2064                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2065                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2066                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2067                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2068             }
2069
2070             if the_only_video:
2071                 new_dict.update({
2072                     'width': the_only_video.get('width'),
2073                     'height': the_only_video.get('height'),
2074                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2075                     'fps': the_only_video.get('fps'),
2076                     'dynamic_range': the_only_video.get('dynamic_range'),
2077                     'vcodec': the_only_video.get('vcodec'),
2078                     'vbr': the_only_video.get('vbr'),
2079                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2080                 })
2081
2082             if the_only_audio:
2083                 new_dict.update({
2084                     'acodec': the_only_audio.get('acodec'),
2085                     'abr': the_only_audio.get('abr'),
2086                     'asr': the_only_audio.get('asr'),
2087                 })
2088
2089             return new_dict
2090
2091         def _check_formats(formats):
2092             if not check_formats:
2093                 yield from formats
2094                 return
2095             yield from self._check_formats(formats)
2096
2097         def _build_selector_function(selector):
2098             if isinstance(selector, list):  # ,
2099                 fs = [_build_selector_function(s) for s in selector]
2100
2101                 def selector_function(ctx):
2102                     for f in fs:
2103                         yield from f(ctx)
2104                 return selector_function
2105
2106             elif selector.type == GROUP:  # ()
2107                 selector_function = _build_selector_function(selector.selector)
2108
2109             elif selector.type == PICKFIRST:  # /
2110                 fs = [_build_selector_function(s) for s in selector.selector]
2111
2112                 def selector_function(ctx):
2113                     for f in fs:
2114                         picked_formats = list(f(ctx))
2115                         if picked_formats:
2116                             return picked_formats
2117                     return []
2118
2119             elif selector.type == MERGE:  # +
2120                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2121
2122                 def selector_function(ctx):
2123                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2124                         yield _merge(pair)
2125
2126             elif selector.type == SINGLE:  # atom
2127                 format_spec = selector.selector or 'best'
2128
2129                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2130                 if format_spec == 'all':
2131                     def selector_function(ctx):
2132                         yield from _check_formats(ctx['formats'][::-1])
2133                 elif format_spec == 'mergeall':
2134                     def selector_function(ctx):
2135                         formats = list(_check_formats(ctx['formats']))
2136                         if not formats:
2137                             return
2138                         merged_format = formats[-1]
2139                         for f in formats[-2::-1]:
2140                             merged_format = _merge((merged_format, f))
2141                         yield merged_format
2142
2143                 else:
2144                     format_fallback, format_reverse, format_idx = False, True, 1
2145                     mobj = re.match(
2146                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2147                         format_spec)
2148                     if mobj is not None:
2149                         format_idx = int_or_none(mobj.group('n'), default=1)
2150                         format_reverse = mobj.group('bw')[0] == 'b'
2151                         format_type = (mobj.group('type') or [None])[0]
2152                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2153                         format_modified = mobj.group('mod') is not None
2154
2155                         format_fallback = not format_type and not format_modified  # for b, w
2156                         _filter_f = (
2157                             (lambda f: f.get('%scodec' % format_type) != 'none')
2158                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2159                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2160                             if format_type  # bv, ba, wv, wa
2161                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2162                             if not format_modified  # b, w
2163                             else lambda f: True)  # b*, w*
2164                         filter_f = lambda f: _filter_f(f) and (
2165                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2166                     else:
2167                         if format_spec in self._format_selection_exts['audio']:
2168                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2169                         elif format_spec in self._format_selection_exts['video']:
2170                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2171                         elif format_spec in self._format_selection_exts['storyboards']:
2172                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2173                         else:
2174                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2175
2176                     def selector_function(ctx):
2177                         formats = list(ctx['formats'])
2178                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2179                         if format_fallback and ctx['incomplete_formats'] and not matches:
2180                             # for extractors with incomplete formats (audio only (soundcloud)
2181                             # or video only (imgur)) best/worst will fallback to
2182                             # best/worst {video,audio}-only format
2183                             matches = formats
2184                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2185                         try:
2186                             yield matches[format_idx - 1]
2187                         except IndexError:
2188                             return
2189
2190             filters = [self._build_format_filter(f) for f in selector.filters]
2191
2192             def final_selector(ctx):
2193                 ctx_copy = dict(ctx)
2194                 for _filter in filters:
2195                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2196                 return selector_function(ctx_copy)
2197             return final_selector
2198
2199         stream = io.BytesIO(format_spec.encode('utf-8'))
2200         try:
2201             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2202         except tokenize.TokenError:
2203             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2204
2205         class TokenIterator(object):
2206             def __init__(self, tokens):
2207                 self.tokens = tokens
2208                 self.counter = 0
2209
2210             def __iter__(self):
2211                 return self
2212
2213             def __next__(self):
2214                 if self.counter >= len(self.tokens):
2215                     raise StopIteration()
2216                 value = self.tokens[self.counter]
2217                 self.counter += 1
2218                 return value
2219
2220             next = __next__
2221
2222             def restore_last_token(self):
2223                 self.counter -= 1
2224
2225         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2226         return _build_selector_function(parsed_selector)
2227
2228     def _calc_headers(self, info_dict):
2229         res = std_headers.copy()
2230
2231         add_headers = info_dict.get('http_headers')
2232         if add_headers:
2233             res.update(add_headers)
2234
2235         cookies = self._calc_cookies(info_dict)
2236         if cookies:
2237             res['Cookie'] = cookies
2238
2239         if 'X-Forwarded-For' not in res:
2240             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2241             if x_forwarded_for_ip:
2242                 res['X-Forwarded-For'] = x_forwarded_for_ip
2243
2244         return res
2245
2246     def _calc_cookies(self, info_dict):
2247         pr = sanitized_Request(info_dict['url'])
2248         self.cookiejar.add_cookie_header(pr)
2249         return pr.get_header('Cookie')
2250
2251     def _sort_thumbnails(self, thumbnails):
2252         thumbnails.sort(key=lambda t: (
2253             t.get('preference') if t.get('preference') is not None else -1,
2254             t.get('width') if t.get('width') is not None else -1,
2255             t.get('height') if t.get('height') is not None else -1,
2256             t.get('id') if t.get('id') is not None else '',
2257             t.get('url')))
2258
2259     def _sanitize_thumbnails(self, info_dict):
2260         thumbnails = info_dict.get('thumbnails')
2261         if thumbnails is None:
2262             thumbnail = info_dict.get('thumbnail')
2263             if thumbnail:
2264                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2265         if not thumbnails:
2266             return
2267
2268         def check_thumbnails(thumbnails):
2269             for t in thumbnails:
2270                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2271                 try:
2272                     self.urlopen(HEADRequest(t['url']))
2273                 except network_exceptions as err:
2274                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2275                     continue
2276                 yield t
2277
2278         self._sort_thumbnails(thumbnails)
2279         for i, t in enumerate(thumbnails):
2280             if t.get('id') is None:
2281                 t['id'] = '%d' % i
2282             if t.get('width') and t.get('height'):
2283                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2284             t['url'] = sanitize_url(t['url'])
2285
2286         if self.params.get('check_formats') is True:
2287             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2288         else:
2289             info_dict['thumbnails'] = thumbnails
2290
2291     def process_video_result(self, info_dict, download=True):
2292         assert info_dict.get('_type', 'video') == 'video'
2293         self._num_videos += 1
2294
2295         if 'id' not in info_dict:
2296             raise ExtractorError('Missing "id" field in extractor result')
2297         if 'title' not in info_dict:
2298             raise ExtractorError('Missing "title" field in extractor result',
2299                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2300
2301         def report_force_conversion(field, field_not, conversion):
2302             self.report_warning(
2303                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2304                 % (field, field_not, conversion))
2305
2306         def sanitize_string_field(info, string_field):
2307             field = info.get(string_field)
2308             if field is None or isinstance(field, compat_str):
2309                 return
2310             report_force_conversion(string_field, 'a string', 'string')
2311             info[string_field] = compat_str(field)
2312
2313         def sanitize_numeric_fields(info):
2314             for numeric_field in self._NUMERIC_FIELDS:
2315                 field = info.get(numeric_field)
2316                 if field is None or isinstance(field, compat_numeric_types):
2317                     continue
2318                 report_force_conversion(numeric_field, 'numeric', 'int')
2319                 info[numeric_field] = int_or_none(field)
2320
2321         sanitize_string_field(info_dict, 'id')
2322         sanitize_numeric_fields(info_dict)
2323
2324         if 'playlist' not in info_dict:
2325             # It isn't part of a playlist
2326             info_dict['playlist'] = None
2327             info_dict['playlist_index'] = None
2328
2329         self._sanitize_thumbnails(info_dict)
2330
2331         thumbnail = info_dict.get('thumbnail')
2332         thumbnails = info_dict.get('thumbnails')
2333         if thumbnail:
2334             info_dict['thumbnail'] = sanitize_url(thumbnail)
2335         elif thumbnails:
2336             info_dict['thumbnail'] = thumbnails[-1]['url']
2337
2338         if info_dict.get('display_id') is None and 'id' in info_dict:
2339             info_dict['display_id'] = info_dict['id']
2340
2341         if info_dict.get('duration') is not None:
2342             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2343
2344         for ts_key, date_key in (
2345                 ('timestamp', 'upload_date'),
2346                 ('release_timestamp', 'release_date'),
2347                 ('modified_timestamp', 'modified_date'),
2348         ):
2349             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2350                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2351                 # see http://bugs.python.org/issue1646728)
2352                 try:
2353                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2354                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2355                 except (ValueError, OverflowError, OSError):
2356                     pass
2357
2358         live_keys = ('is_live', 'was_live')
2359         live_status = info_dict.get('live_status')
2360         if live_status is None:
2361             for key in live_keys:
2362                 if info_dict.get(key) is False:
2363                     continue
2364                 if info_dict.get(key):
2365                     live_status = key
2366                 break
2367             if all(info_dict.get(key) is False for key in live_keys):
2368                 live_status = 'not_live'
2369         if live_status:
2370             info_dict['live_status'] = live_status
2371             for key in live_keys:
2372                 if info_dict.get(key) is None:
2373                     info_dict[key] = (live_status == key)
2374
2375         # Auto generate title fields corresponding to the *_number fields when missing
2376         # in order to always have clean titles. This is very common for TV series.
2377         for field in ('chapter', 'season', 'episode'):
2378             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2379                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2380
2381         for cc_kind in ('subtitles', 'automatic_captions'):
2382             cc = info_dict.get(cc_kind)
2383             if cc:
2384                 for _, subtitle in cc.items():
2385                     for subtitle_format in subtitle:
2386                         if subtitle_format.get('url'):
2387                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2388                         if subtitle_format.get('ext') is None:
2389                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2390
2391         automatic_captions = info_dict.get('automatic_captions')
2392         subtitles = info_dict.get('subtitles')
2393
2394         info_dict['requested_subtitles'] = self.process_subtitles(
2395             info_dict['id'], subtitles, automatic_captions)
2396
2397         if info_dict.get('formats') is None:
2398             # There's only one format available
2399             formats = [info_dict]
2400         else:
2401             formats = info_dict['formats']
2402
2403         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2404         if not self.params.get('allow_unplayable_formats'):
2405             formats = [f for f in formats if not f.get('has_drm')]
2406
2407         # backward compatibility
2408         info_dict['fulltitle'] = info_dict['title']
2409
2410         if info_dict.get('is_live'):
2411             get_from_start = bool(self.params.get('live_from_start'))
2412             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2413             if not get_from_start:
2414                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2415
2416         if not formats:
2417             self.raise_no_formats(info_dict)
2418
2419         def is_wellformed(f):
2420             url = f.get('url')
2421             if not url:
2422                 self.report_warning(
2423                     '"url" field is missing or empty - skipping format, '
2424                     'there is an error in extractor')
2425                 return False
2426             if isinstance(url, bytes):
2427                 sanitize_string_field(f, 'url')
2428             return True
2429
2430         # Filter out malformed formats for better extraction robustness
2431         formats = list(filter(is_wellformed, formats))
2432
2433         formats_dict = {}
2434
2435         # We check that all the formats have the format and format_id fields
2436         for i, format in enumerate(formats):
2437             sanitize_string_field(format, 'format_id')
2438             sanitize_numeric_fields(format)
2439             format['url'] = sanitize_url(format['url'])
2440             if not format.get('format_id'):
2441                 format['format_id'] = compat_str(i)
2442             else:
2443                 # Sanitize format_id from characters used in format selector expression
2444                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2445             format_id = format['format_id']
2446             if format_id not in formats_dict:
2447                 formats_dict[format_id] = []
2448             formats_dict[format_id].append(format)
2449
2450         # Make sure all formats have unique format_id
2451         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2452         for format_id, ambiguous_formats in formats_dict.items():
2453             ambigious_id = len(ambiguous_formats) > 1
2454             for i, format in enumerate(ambiguous_formats):
2455                 if ambigious_id:
2456                     format['format_id'] = '%s-%d' % (format_id, i)
2457                 if format.get('ext') is None:
2458                     format['ext'] = determine_ext(format['url']).lower()
2459                 # Ensure there is no conflict between id and ext in format selection
2460                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2461                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2462                     format['format_id'] = 'f%s' % format['format_id']
2463
2464         for i, format in enumerate(formats):
2465             if format.get('format') is None:
2466                 format['format'] = '{id} - {res}{note}'.format(
2467                     id=format['format_id'],
2468                     res=self.format_resolution(format),
2469                     note=format_field(format, 'format_note', ' (%s)'),
2470                 )
2471             if format.get('protocol') is None:
2472                 format['protocol'] = determine_protocol(format)
2473             if format.get('resolution') is None:
2474                 format['resolution'] = self.format_resolution(format, default=None)
2475             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2476                 format['dynamic_range'] = 'SDR'
2477             if (info_dict.get('duration') and format.get('tbr')
2478                     and not format.get('filesize') and not format.get('filesize_approx')):
2479                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2480
2481             # Add HTTP headers, so that external programs can use them from the
2482             # json output
2483             full_format_info = info_dict.copy()
2484             full_format_info.update(format)
2485             format['http_headers'] = self._calc_headers(full_format_info)
2486         # Remove private housekeeping stuff
2487         if '__x_forwarded_for_ip' in info_dict:
2488             del info_dict['__x_forwarded_for_ip']
2489
2490         # TODO Central sorting goes here
2491
2492         if self.params.get('check_formats') is True:
2493             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2494
2495         if not formats or formats[0] is not info_dict:
2496             # only set the 'formats' fields if the original info_dict list them
2497             # otherwise we end up with a circular reference, the first (and unique)
2498             # element in the 'formats' field in info_dict is info_dict itself,
2499             # which can't be exported to json
2500             info_dict['formats'] = formats
2501
2502         info_dict, _ = self.pre_process(info_dict)
2503
2504         # The pre-processors may have modified the formats
2505         formats = info_dict.get('formats', [info_dict])
2506
2507         list_only = self.params.get('simulate') is None and (
2508             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2509         interactive_format_selection = not list_only and self.format_selector == '-'
2510         if self.params.get('list_thumbnails'):
2511             self.list_thumbnails(info_dict)
2512         if self.params.get('listsubtitles'):
2513             if 'automatic_captions' in info_dict:
2514                 self.list_subtitles(
2515                     info_dict['id'], automatic_captions, 'automatic captions')
2516             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2517         if self.params.get('listformats') or interactive_format_selection:
2518             self.list_formats(info_dict)
2519         if list_only:
2520             # Without this printing, -F --print-json will not work
2521             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2522             return
2523
2524         format_selector = self.format_selector
2525         if format_selector is None:
2526             req_format = self._default_format_spec(info_dict, download=download)
2527             self.write_debug('Default format spec: %s' % req_format)
2528             format_selector = self.build_format_selector(req_format)
2529
2530         while True:
2531             if interactive_format_selection:
2532                 req_format = input(
2533                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2534                 try:
2535                     format_selector = self.build_format_selector(req_format)
2536                 except SyntaxError as err:
2537                     self.report_error(err, tb=False, is_error=False)
2538                     continue
2539
2540             # While in format selection we may need to have an access to the original
2541             # format set in order to calculate some metrics or do some processing.
2542             # For now we need to be able to guess whether original formats provided
2543             # by extractor are incomplete or not (i.e. whether extractor provides only
2544             # video-only or audio-only formats) for proper formats selection for
2545             # extractors with such incomplete formats (see
2546             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2547             # Since formats may be filtered during format selection and may not match
2548             # the original formats the results may be incorrect. Thus original formats
2549             # or pre-calculated metrics should be passed to format selection routines
2550             # as well.
2551             # We will pass a context object containing all necessary additional data
2552             # instead of just formats.
2553             # This fixes incorrect format selection issue (see
2554             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2555             incomplete_formats = (
2556                 # All formats are video-only or
2557                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2558                 # all formats are audio-only
2559                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2560
2561             ctx = {
2562                 'formats': formats,
2563                 'incomplete_formats': incomplete_formats,
2564             }
2565
2566             formats_to_download = list(format_selector(ctx))
2567             if interactive_format_selection and not formats_to_download:
2568                 self.report_error('Requested format is not available', tb=False, is_error=False)
2569                 continue
2570             break
2571
2572         if not formats_to_download:
2573             if not self.params.get('ignore_no_formats_error'):
2574                 raise ExtractorError('Requested format is not available', expected=True,
2575                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2576             self.report_warning('Requested format is not available')
2577             # Process what we can, even without any available formats.
2578             formats_to_download = [{}]
2579
2580         best_format = formats_to_download[-1]
2581         if download:
2582             if best_format:
2583                 self.to_screen(
2584                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2585                     + ', '.join([f['format_id'] for f in formats_to_download]))
2586             max_downloads_reached = False
2587             for i, fmt in enumerate(formats_to_download):
2588                 formats_to_download[i] = new_info = dict(info_dict)
2589                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2590                 new_info.update(fmt)
2591                 new_info['__original_infodict'] = info_dict
2592                 try:
2593                     self.process_info(new_info)
2594                 except MaxDownloadsReached:
2595                     max_downloads_reached = True
2596                 new_info.pop('__original_infodict')
2597                 # Remove copied info
2598                 for key, val in tuple(new_info.items()):
2599                     if info_dict.get(key) == val:
2600                         new_info.pop(key)
2601                 if max_downloads_reached:
2602                     break
2603
2604             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2605             assert write_archive.issubset({True, False, 'ignore'})
2606             if True in write_archive and False not in write_archive:
2607                 self.record_download_archive(info_dict)
2608
2609             info_dict['requested_downloads'] = formats_to_download
2610             info_dict = self.run_all_pps('after_video', info_dict)
2611             if max_downloads_reached:
2612                 raise MaxDownloadsReached()
2613
2614         # We update the info dict with the selected best quality format (backwards compatibility)
2615         info_dict.update(best_format)
2616         return info_dict
2617
2618     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2619         """Select the requested subtitles and their format"""
2620         available_subs = {}
2621         if normal_subtitles and self.params.get('writesubtitles'):
2622             available_subs.update(normal_subtitles)
2623         if automatic_captions and self.params.get('writeautomaticsub'):
2624             for lang, cap_info in automatic_captions.items():
2625                 if lang not in available_subs:
2626                     available_subs[lang] = cap_info
2627
2628         if (not self.params.get('writesubtitles') and not
2629                 self.params.get('writeautomaticsub') or not
2630                 available_subs):
2631             return None
2632
2633         all_sub_langs = available_subs.keys()
2634         if self.params.get('allsubtitles', False):
2635             requested_langs = all_sub_langs
2636         elif self.params.get('subtitleslangs', False):
2637             # A list is used so that the order of languages will be the same as
2638             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2639             requested_langs = []
2640             for lang_re in self.params.get('subtitleslangs'):
2641                 if lang_re == 'all':
2642                     requested_langs.extend(all_sub_langs)
2643                     continue
2644                 discard = lang_re[0] == '-'
2645                 if discard:
2646                     lang_re = lang_re[1:]
2647                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2648                 if discard:
2649                     for lang in current_langs:
2650                         while lang in requested_langs:
2651                             requested_langs.remove(lang)
2652                 else:
2653                     requested_langs.extend(current_langs)
2654             requested_langs = orderedSet(requested_langs)
2655         elif 'en' in available_subs:
2656             requested_langs = ['en']
2657         else:
2658             requested_langs = [list(all_sub_langs)[0]]
2659         if requested_langs:
2660             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2661
2662         formats_query = self.params.get('subtitlesformat', 'best')
2663         formats_preference = formats_query.split('/') if formats_query else []
2664         subs = {}
2665         for lang in requested_langs:
2666             formats = available_subs.get(lang)
2667             if formats is None:
2668                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2669                 continue
2670             for ext in formats_preference:
2671                 if ext == 'best':
2672                     f = formats[-1]
2673                     break
2674                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2675                 if matches:
2676                     f = matches[-1]
2677                     break
2678             else:
2679                 f = formats[-1]
2680                 self.report_warning(
2681                     'No subtitle format found matching "%s" for language %s, '
2682                     'using %s' % (formats_query, lang, f['ext']))
2683             subs[lang] = f
2684         return subs
2685
2686     def _forceprint(self, tmpl, info_dict):
2687         mobj = re.match(r'\w+(=?)$', tmpl)
2688         if mobj and mobj.group(1):
2689             tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2690         elif mobj:
2691             tmpl = '%({})s'.format(tmpl)
2692
2693         info_dict = info_dict.copy()
2694         info_dict['formats_table'] = self.render_formats_table(info_dict)
2695         info_dict['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2696         info_dict['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2697         info_dict['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2698         self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2699
2700     def __forced_printings(self, info_dict, filename, incomplete):
2701         def print_mandatory(field, actual_field=None):
2702             if actual_field is None:
2703                 actual_field = field
2704             if (self.params.get('force%s' % field, False)
2705                     and (not incomplete or info_dict.get(actual_field) is not None)):
2706                 self.to_stdout(info_dict[actual_field])
2707
2708         def print_optional(field):
2709             if (self.params.get('force%s' % field, False)
2710                     and info_dict.get(field) is not None):
2711                 self.to_stdout(info_dict[field])
2712
2713         info_dict = info_dict.copy()
2714         if filename is not None:
2715             info_dict['filename'] = filename
2716         if info_dict.get('requested_formats') is not None:
2717             # For RTMP URLs, also include the playpath
2718             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2719         elif 'url' in info_dict:
2720             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2721
2722         if self.params['forceprint'].get('video') or self.params.get('forcejson'):
2723             self.post_extract(info_dict)
2724         for tmpl in self.params['forceprint'].get('video', []):
2725             self._forceprint(tmpl, info_dict)
2726
2727         print_mandatory('title')
2728         print_mandatory('id')
2729         print_mandatory('url', 'urls')
2730         print_optional('thumbnail')
2731         print_optional('description')
2732         print_optional('filename')
2733         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2734             self.to_stdout(formatSeconds(info_dict['duration']))
2735         print_mandatory('format')
2736
2737         if self.params.get('forcejson'):
2738             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2739
2740     def dl(self, name, info, subtitle=False, test=False):
2741         if not info.get('url'):
2742             self.raise_no_formats(info, True)
2743
2744         if test:
2745             verbose = self.params.get('verbose')
2746             params = {
2747                 'test': True,
2748                 'quiet': self.params.get('quiet') or not verbose,
2749                 'verbose': verbose,
2750                 'noprogress': not verbose,
2751                 'nopart': True,
2752                 'skip_unavailable_fragments': False,
2753                 'keep_fragments': False,
2754                 'overwrites': True,
2755                 '_no_ytdl_file': True,
2756             }
2757         else:
2758             params = self.params
2759         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2760         if not test:
2761             for ph in self._progress_hooks:
2762                 fd.add_progress_hook(ph)
2763             urls = '", "'.join(
2764                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2765                 for f in info.get('requested_formats', []) or [info])
2766             self.write_debug('Invoking downloader on "%s"' % urls)
2767
2768         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2769         # But it may contain objects that are not deep-copyable
2770         new_info = self._copy_infodict(info)
2771         if new_info.get('http_headers') is None:
2772             new_info['http_headers'] = self._calc_headers(new_info)
2773         return fd.download(name, new_info, subtitle)
2774
2775     def existing_file(self, filepaths, *, default_overwrite=True):
2776         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2777         if existing_files and not self.params.get('overwrites', default_overwrite):
2778             return existing_files[0]
2779
2780         for file in existing_files:
2781             self.report_file_delete(file)
2782             os.remove(file)
2783         return None
2784
2785     def process_info(self, info_dict):
2786         """Process a single resolved IE result. (Modified it in-place)"""
2787
2788         assert info_dict.get('_type', 'video') == 'video'
2789         original_infodict = info_dict
2790
2791         if 'format' not in info_dict and 'ext' in info_dict:
2792             info_dict['format'] = info_dict['ext']
2793
2794         if self._match_entry(info_dict) is not None:
2795             info_dict['__write_download_archive'] = 'ignore'
2796             return
2797
2798         self.post_extract(info_dict)
2799         self._num_downloads += 1
2800
2801         # info_dict['_filename'] needs to be set for backward compatibility
2802         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2803         temp_filename = self.prepare_filename(info_dict, 'temp')
2804         files_to_move = {}
2805
2806         # Forced printings
2807         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2808
2809         if self.params.get('simulate'):
2810             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2811             return
2812
2813         if full_filename is None:
2814             return
2815         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2816             return
2817         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2818             return
2819
2820         if self._write_description('video', info_dict,
2821                                    self.prepare_filename(info_dict, 'description')) is None:
2822             return
2823
2824         sub_files = self._write_subtitles(info_dict, temp_filename)
2825         if sub_files is None:
2826             return
2827         files_to_move.update(dict(sub_files))
2828
2829         thumb_files = self._write_thumbnails(
2830             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2831         if thumb_files is None:
2832             return
2833         files_to_move.update(dict(thumb_files))
2834
2835         infofn = self.prepare_filename(info_dict, 'infojson')
2836         _infojson_written = self._write_info_json('video', info_dict, infofn)
2837         if _infojson_written:
2838             info_dict['infojson_filename'] = infofn
2839             # For backward compatibility, even though it was a private field
2840             info_dict['__infojson_filename'] = infofn
2841         elif _infojson_written is None:
2842             return
2843
2844         # Note: Annotations are deprecated
2845         annofn = None
2846         if self.params.get('writeannotations', False):
2847             annofn = self.prepare_filename(info_dict, 'annotation')
2848         if annofn:
2849             if not self._ensure_dir_exists(encodeFilename(annofn)):
2850                 return
2851             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2852                 self.to_screen('[info] Video annotations are already present')
2853             elif not info_dict.get('annotations'):
2854                 self.report_warning('There are no annotations to write.')
2855             else:
2856                 try:
2857                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2858                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2859                         annofile.write(info_dict['annotations'])
2860                 except (KeyError, TypeError):
2861                     self.report_warning('There are no annotations to write.')
2862                 except (OSError, IOError):
2863                     self.report_error('Cannot write annotations file: ' + annofn)
2864                     return
2865
2866         # Write internet shortcut files
2867         def _write_link_file(link_type):
2868             if 'webpage_url' not in info_dict:
2869                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2870                 return False
2871             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2872             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2873                 return False
2874             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2875                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2876                 return True
2877             try:
2878                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2879                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2880                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2881                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2882                     if link_type == 'desktop':
2883                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2884                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2885             except (OSError, IOError):
2886                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2887                 return False
2888             return True
2889
2890         write_links = {
2891             'url': self.params.get('writeurllink'),
2892             'webloc': self.params.get('writewebloclink'),
2893             'desktop': self.params.get('writedesktoplink'),
2894         }
2895         if self.params.get('writelink'):
2896             link_type = ('webloc' if sys.platform == 'darwin'
2897                          else 'desktop' if sys.platform.startswith('linux')
2898                          else 'url')
2899             write_links[link_type] = True
2900
2901         if any(should_write and not _write_link_file(link_type)
2902                for link_type, should_write in write_links.items()):
2903             return
2904
2905         def replace_info_dict(new_info):
2906             nonlocal info_dict
2907             if new_info == info_dict:
2908                 return
2909             info_dict.clear()
2910             info_dict.update(new_info)
2911
2912         try:
2913             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2914             replace_info_dict(new_info)
2915         except PostProcessingError as err:
2916             self.report_error('Preprocessing: %s' % str(err))
2917             return
2918
2919         if self.params.get('skip_download'):
2920             info_dict['filepath'] = temp_filename
2921             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2922             info_dict['__files_to_move'] = files_to_move
2923             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2924             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2925         else:
2926             # Download
2927             info_dict.setdefault('__postprocessors', [])
2928             try:
2929
2930                 def existing_video_file(*filepaths):
2931                     ext = info_dict.get('ext')
2932                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2933                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2934                                               default_overwrite=False)
2935                     if file:
2936                         info_dict['ext'] = os.path.splitext(file)[1][1:]
2937                     return file
2938
2939                 success = True
2940                 if info_dict.get('requested_formats') is not None:
2941
2942                     def compatible_formats(formats):
2943                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2944                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2945                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2946                         if len(video_formats) > 2 or len(audio_formats) > 2:
2947                             return False
2948
2949                         # Check extension
2950                         exts = set(format.get('ext') for format in formats)
2951                         COMPATIBLE_EXTS = (
2952                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2953                             set(('webm',)),
2954                         )
2955                         for ext_sets in COMPATIBLE_EXTS:
2956                             if ext_sets.issuperset(exts):
2957                                 return True
2958                         # TODO: Check acodec/vcodec
2959                         return False
2960
2961                     requested_formats = info_dict['requested_formats']
2962                     old_ext = info_dict['ext']
2963                     if self.params.get('merge_output_format') is None:
2964                         if not compatible_formats(requested_formats):
2965                             info_dict['ext'] = 'mkv'
2966                             self.report_warning(
2967                                 'Requested formats are incompatible for merge and will be merged into mkv')
2968                         if (info_dict['ext'] == 'webm'
2969                                 and info_dict.get('thumbnails')
2970                                 # check with type instead of pp_key, __name__, or isinstance
2971                                 # since we dont want any custom PPs to trigger this
2972                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2973                             info_dict['ext'] = 'mkv'
2974                             self.report_warning(
2975                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2976                     new_ext = info_dict['ext']
2977
2978                     def correct_ext(filename, ext=new_ext):
2979                         if filename == '-':
2980                             return filename
2981                         filename_real_ext = os.path.splitext(filename)[1][1:]
2982                         filename_wo_ext = (
2983                             os.path.splitext(filename)[0]
2984                             if filename_real_ext in (old_ext, new_ext)
2985                             else filename)
2986                         return '%s.%s' % (filename_wo_ext, ext)
2987
2988                     # Ensure filename always has a correct extension for successful merge
2989                     full_filename = correct_ext(full_filename)
2990                     temp_filename = correct_ext(temp_filename)
2991                     dl_filename = existing_video_file(full_filename, temp_filename)
2992                     info_dict['__real_download'] = False
2993
2994                     downloaded = []
2995                     merger = FFmpegMergerPP(self)
2996
2997                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2998                     if dl_filename is not None:
2999                         self.report_file_already_downloaded(dl_filename)
3000                     elif fd:
3001                         for f in requested_formats if fd != FFmpegFD else []:
3002                             f['filepath'] = fname = prepend_extension(
3003                                 correct_ext(temp_filename, info_dict['ext']),
3004                                 'f%s' % f['format_id'], info_dict['ext'])
3005                             downloaded.append(fname)
3006                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3007                         success, real_download = self.dl(temp_filename, info_dict)
3008                         info_dict['__real_download'] = real_download
3009                     else:
3010                         if self.params.get('allow_unplayable_formats'):
3011                             self.report_warning(
3012                                 'You have requested merging of multiple formats '
3013                                 'while also allowing unplayable formats to be downloaded. '
3014                                 'The formats won\'t be merged to prevent data corruption.')
3015                         elif not merger.available:
3016                             self.report_warning(
3017                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
3018                                 'The formats won\'t be merged.')
3019
3020                         if temp_filename == '-':
3021                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3022                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3023                                       else 'but ffmpeg is not installed')
3024                             self.report_warning(
3025                                 f'You have requested downloading multiple formats to stdout {reason}. '
3026                                 'The formats will be streamed one after the other')
3027                             fname = temp_filename
3028                         for f in requested_formats:
3029                             new_info = dict(info_dict)
3030                             del new_info['requested_formats']
3031                             new_info.update(f)
3032                             if temp_filename != '-':
3033                                 fname = prepend_extension(
3034                                     correct_ext(temp_filename, new_info['ext']),
3035                                     'f%s' % f['format_id'], new_info['ext'])
3036                                 if not self._ensure_dir_exists(fname):
3037                                     return
3038                                 f['filepath'] = fname
3039                                 downloaded.append(fname)
3040                             partial_success, real_download = self.dl(fname, new_info)
3041                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3042                             success = success and partial_success
3043
3044                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3045                         info_dict['__postprocessors'].append(merger)
3046                         info_dict['__files_to_merge'] = downloaded
3047                         # Even if there were no downloads, it is being merged only now
3048                         info_dict['__real_download'] = True
3049                     else:
3050                         for file in downloaded:
3051                             files_to_move[file] = None
3052                 else:
3053                     # Just a single file
3054                     dl_filename = existing_video_file(full_filename, temp_filename)
3055                     if dl_filename is None or dl_filename == temp_filename:
3056                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3057                         # So we should try to resume the download
3058                         success, real_download = self.dl(temp_filename, info_dict)
3059                         info_dict['__real_download'] = real_download
3060                     else:
3061                         self.report_file_already_downloaded(dl_filename)
3062
3063                 dl_filename = dl_filename or temp_filename
3064                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3065
3066             except network_exceptions as err:
3067                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3068                 return
3069             except (OSError, IOError) as err:
3070                 raise UnavailableVideoError(err)
3071             except (ContentTooShortError, ) as err:
3072                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3073                 return
3074
3075             if success and full_filename != '-':
3076
3077                 def fixup():
3078                     do_fixup = True
3079                     fixup_policy = self.params.get('fixup')
3080                     vid = info_dict['id']
3081
3082                     if fixup_policy in ('ignore', 'never'):
3083                         return
3084                     elif fixup_policy == 'warn':
3085                         do_fixup = False
3086                     elif fixup_policy != 'force':
3087                         assert fixup_policy in ('detect_or_warn', None)
3088                         if not info_dict.get('__real_download'):
3089                             do_fixup = False
3090
3091                     def ffmpeg_fixup(cndn, msg, cls):
3092                         if not cndn:
3093                             return
3094                         if not do_fixup:
3095                             self.report_warning(f'{vid}: {msg}')
3096                             return
3097                         pp = cls(self)
3098                         if pp.available:
3099                             info_dict['__postprocessors'].append(pp)
3100                         else:
3101                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3102
3103                     stretched_ratio = info_dict.get('stretched_ratio')
3104                     ffmpeg_fixup(
3105                         stretched_ratio not in (1, None),
3106                         f'Non-uniform pixel ratio {stretched_ratio}',
3107                         FFmpegFixupStretchedPP)
3108
3109                     ffmpeg_fixup(
3110                         (info_dict.get('requested_formats') is None
3111                          and info_dict.get('container') == 'm4a_dash'
3112                          and info_dict.get('ext') == 'm4a'),
3113                         'writing DASH m4a. Only some players support this container',
3114                         FFmpegFixupM4aPP)
3115
3116                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3117                     downloader = downloader.__name__ if downloader else None
3118
3119                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3120                         ffmpeg_fixup(downloader == 'HlsFD',
3121                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3122                                      FFmpegFixupM3u8PP)
3123                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3124                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3125
3126                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3127                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3128
3129                 fixup()
3130                 try:
3131                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3132                 except PostProcessingError as err:
3133                     self.report_error('Postprocessing: %s' % str(err))
3134                     return
3135                 try:
3136                     for ph in self._post_hooks:
3137                         ph(info_dict['filepath'])
3138                 except Exception as err:
3139                     self.report_error('post hooks: %s' % str(err))
3140                     return
3141                 info_dict['__write_download_archive'] = True
3142
3143         if self.params.get('force_write_download_archive'):
3144             info_dict['__write_download_archive'] = True
3145
3146         # Make sure the info_dict was modified in-place
3147         assert info_dict is original_infodict
3148
3149         max_downloads = self.params.get('max_downloads')
3150         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3151             raise MaxDownloadsReached()
3152
3153     def __download_wrapper(self, func):
3154         @functools.wraps(func)
3155         def wrapper(*args, **kwargs):
3156             try:
3157                 res = func(*args, **kwargs)
3158             except UnavailableVideoError as e:
3159                 self.report_error(e)
3160             except MaxDownloadsReached as e:
3161                 self.to_screen(f'[info] {e}')
3162                 raise
3163             except DownloadCancelled as e:
3164                 self.to_screen(f'[info] {e}')
3165                 if not self.params.get('break_per_url'):
3166                     raise
3167             else:
3168                 if self.params.get('dump_single_json', False):
3169                     self.post_extract(res)
3170                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3171         return wrapper
3172
3173     def download(self, url_list):
3174         """Download a given list of URLs."""
3175         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3176         outtmpl = self.outtmpl_dict['default']
3177         if (len(url_list) > 1
3178                 and outtmpl != '-'
3179                 and '%' not in outtmpl
3180                 and self.params.get('max_downloads') != 1):
3181             raise SameFileError(outtmpl)
3182
3183         for url in url_list:
3184             self.__download_wrapper(self.extract_info)(
3185                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3186
3187         return self._download_retcode
3188
3189     def download_with_info_file(self, info_filename):
3190         with contextlib.closing(fileinput.FileInput(
3191                 [info_filename], mode='r',
3192                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3193             # FileInput doesn't have a read method, we can't call json.load
3194             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3195         try:
3196             self.__download_wrapper(self.process_ie_result)(info, download=True)
3197         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3198             if not isinstance(e, EntryNotInPlaylist):
3199                 self.to_stderr('\r')
3200             webpage_url = info.get('webpage_url')
3201             if webpage_url is not None:
3202                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3203                 return self.download([webpage_url])
3204             else:
3205                 raise
3206         return self._download_retcode
3207
3208     @staticmethod
3209     def sanitize_info(info_dict, remove_private_keys=False):
3210         ''' Sanitize the infodict for converting to json '''
3211         if info_dict is None:
3212             return info_dict
3213         info_dict.setdefault('epoch', int(time.time()))
3214         info_dict.setdefault('_type', 'video')
3215         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3216         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3217         if remove_private_keys:
3218             remove_keys |= {
3219                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3220                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3221             }
3222             reject = lambda k, v: k not in keep_keys and (
3223                 k.startswith('_') or k in remove_keys or v is None)
3224         else:
3225             reject = lambda k, v: k in remove_keys
3226
3227         def filter_fn(obj):
3228             if isinstance(obj, dict):
3229                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3230             elif isinstance(obj, (list, tuple, set, LazyList)):
3231                 return list(map(filter_fn, obj))
3232             elif obj is None or isinstance(obj, (str, int, float, bool)):
3233                 return obj
3234             else:
3235                 return repr(obj)
3236
3237         return filter_fn(info_dict)
3238
3239     @staticmethod
3240     def filter_requested_info(info_dict, actually_filter=True):
3241         ''' Alias of sanitize_info for backward compatibility '''
3242         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3243
3244     @staticmethod
3245     def post_extract(info_dict):
3246         def actual_post_extract(info_dict):
3247             if info_dict.get('_type') in ('playlist', 'multi_video'):
3248                 for video_dict in info_dict.get('entries', {}):
3249                     actual_post_extract(video_dict or {})
3250                 return
3251
3252             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3253             extra = post_extractor().items()
3254             info_dict.update(extra)
3255             info_dict.pop('__post_extractor', None)
3256
3257             original_infodict = info_dict.get('__original_infodict') or {}
3258             original_infodict.update(extra)
3259             original_infodict.pop('__post_extractor', None)
3260
3261         actual_post_extract(info_dict or {})
3262
3263     def run_pp(self, pp, infodict):
3264         files_to_delete = []
3265         if '__files_to_move' not in infodict:
3266             infodict['__files_to_move'] = {}
3267         try:
3268             files_to_delete, infodict = pp.run(infodict)
3269         except PostProcessingError as e:
3270             # Must be True and not 'only_download'
3271             if self.params.get('ignoreerrors') is True:
3272                 self.report_error(e)
3273                 return infodict
3274             raise
3275
3276         if not files_to_delete:
3277             return infodict
3278         if self.params.get('keepvideo', False):
3279             for f in files_to_delete:
3280                 infodict['__files_to_move'].setdefault(f, '')
3281         else:
3282             for old_filename in set(files_to_delete):
3283                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3284                 try:
3285                     os.remove(encodeFilename(old_filename))
3286                 except (IOError, OSError):
3287                     self.report_warning('Unable to remove downloaded original file')
3288                 if old_filename in infodict['__files_to_move']:
3289                     del infodict['__files_to_move'][old_filename]
3290         return infodict
3291
3292     def run_all_pps(self, key, info, *, additional_pps=None):
3293         for tmpl in self.params['forceprint'].get(key, []):
3294             self._forceprint(tmpl, info)
3295         for pp in (additional_pps or []) + self._pps[key]:
3296             info = self.run_pp(pp, info)
3297         return info
3298
3299     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3300         info = dict(ie_info)
3301         info['__files_to_move'] = files_to_move or {}
3302         info = self.run_all_pps(key, info)
3303         return info, info.pop('__files_to_move', None)
3304
3305     def post_process(self, filename, info, files_to_move=None):
3306         """Run all the postprocessors on the given file."""
3307         info['filepath'] = filename
3308         info['__files_to_move'] = files_to_move or {}
3309         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3310         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3311         del info['__files_to_move']
3312         return self.run_all_pps('after_move', info)
3313
3314     def _make_archive_id(self, info_dict):
3315         video_id = info_dict.get('id')
3316         if not video_id:
3317             return
3318         # Future-proof against any change in case
3319         # and backwards compatibility with prior versions
3320         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3321         if extractor is None:
3322             url = str_or_none(info_dict.get('url'))
3323             if not url:
3324                 return
3325             # Try to find matching extractor for the URL and take its ie_key
3326             for ie_key, ie in self._ies.items():
3327                 if ie.suitable(url):
3328                     extractor = ie_key
3329                     break
3330             else:
3331                 return
3332         return '%s %s' % (extractor.lower(), video_id)
3333
3334     def in_download_archive(self, info_dict):
3335         fn = self.params.get('download_archive')
3336         if fn is None:
3337             return False
3338
3339         vid_id = self._make_archive_id(info_dict)
3340         if not vid_id:
3341             return False  # Incomplete video information
3342
3343         return vid_id in self.archive
3344
3345     def record_download_archive(self, info_dict):
3346         fn = self.params.get('download_archive')
3347         if fn is None:
3348             return
3349         vid_id = self._make_archive_id(info_dict)
3350         assert vid_id
3351         self.write_debug(f'Adding to archive: {vid_id}')
3352         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3353             archive_file.write(vid_id + '\n')
3354         self.archive.add(vid_id)
3355
3356     @staticmethod
3357     def format_resolution(format, default='unknown'):
3358         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3359             return 'audio only'
3360         if format.get('resolution') is not None:
3361             return format['resolution']
3362         if format.get('width') and format.get('height'):
3363             return '%dx%d' % (format['width'], format['height'])
3364         elif format.get('height'):
3365             return '%sp' % format['height']
3366         elif format.get('width'):
3367             return '%dx?' % format['width']
3368         return default
3369
3370     def _list_format_headers(self, *headers):
3371         if self.params.get('listformats_table', True) is not False:
3372             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3373         return headers
3374
3375     def _format_note(self, fdict):
3376         res = ''
3377         if fdict.get('ext') in ['f4f', 'f4m']:
3378             res += '(unsupported)'
3379         if fdict.get('language'):
3380             if res:
3381                 res += ' '
3382             res += '[%s]' % fdict['language']
3383         if fdict.get('format_note') is not None:
3384             if res:
3385                 res += ' '
3386             res += fdict['format_note']
3387         if fdict.get('tbr') is not None:
3388             if res:
3389                 res += ', '
3390             res += '%4dk' % fdict['tbr']
3391         if fdict.get('container') is not None:
3392             if res:
3393                 res += ', '
3394             res += '%s container' % fdict['container']
3395         if (fdict.get('vcodec') is not None
3396                 and fdict.get('vcodec') != 'none'):
3397             if res:
3398                 res += ', '
3399             res += fdict['vcodec']
3400             if fdict.get('vbr') is not None:
3401                 res += '@'
3402         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3403             res += 'video@'
3404         if fdict.get('vbr') is not None:
3405             res += '%4dk' % fdict['vbr']
3406         if fdict.get('fps') is not None:
3407             if res:
3408                 res += ', '
3409             res += '%sfps' % fdict['fps']
3410         if fdict.get('acodec') is not None:
3411             if res:
3412                 res += ', '
3413             if fdict['acodec'] == 'none':
3414                 res += 'video only'
3415             else:
3416                 res += '%-5s' % fdict['acodec']
3417         elif fdict.get('abr') is not None:
3418             if res:
3419                 res += ', '
3420             res += 'audio'
3421         if fdict.get('abr') is not None:
3422             res += '@%3dk' % fdict['abr']
3423         if fdict.get('asr') is not None:
3424             res += ' (%5dHz)' % fdict['asr']
3425         if fdict.get('filesize') is not None:
3426             if res:
3427                 res += ', '
3428             res += format_bytes(fdict['filesize'])
3429         elif fdict.get('filesize_approx') is not None:
3430             if res:
3431                 res += ', '
3432             res += '~' + format_bytes(fdict['filesize_approx'])
3433         return res
3434
3435     def render_formats_table(self, info_dict):
3436         if not info_dict.get('formats') and not info_dict.get('url'):
3437             return None
3438
3439         formats = info_dict.get('formats', [info_dict])
3440         if not self.params.get('listformats_table', True) is not False:
3441             table = [
3442                 [
3443                     format_field(f, 'format_id'),
3444                     format_field(f, 'ext'),
3445                     self.format_resolution(f),
3446                     self._format_note(f)
3447                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3448             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3449
3450         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3451         table = [
3452             [
3453                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3454                 format_field(f, 'ext'),
3455                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3456                 format_field(f, 'fps', '\t%d'),
3457                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3458                 delim,
3459                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3460                 format_field(f, 'tbr', '\t%dk'),
3461                 shorten_protocol_name(f.get('protocol', '')),
3462                 delim,
3463                 format_field(f, 'vcodec', default='unknown').replace(
3464                     'none', 'images' if f.get('acodec') == 'none'
3465                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3466                 format_field(f, 'vbr', '\t%dk'),
3467                 format_field(f, 'acodec', default='unknown').replace(
3468                     'none', '' if f.get('vcodec') == 'none'
3469                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3470                 format_field(f, 'abr', '\t%dk'),
3471                 format_field(f, 'asr', '\t%dHz'),
3472                 join_nonempty(
3473                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3474                     format_field(f, 'language', '[%s]'),
3475                     join_nonempty(format_field(f, 'format_note'),
3476                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3477                                   delim=', '),
3478                     delim=' '),
3479             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3480         header_line = self._list_format_headers(
3481             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3482             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3483
3484         return render_table(
3485             header_line, table, hide_empty=True,
3486             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3487
3488     def render_thumbnails_table(self, info_dict):
3489         thumbnails = list(info_dict.get('thumbnails'))
3490         if not thumbnails:
3491             return None
3492         return render_table(
3493             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3494             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3495
3496     def render_subtitles_table(self, video_id, subtitles):
3497         def _row(lang, formats):
3498             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3499             if len(set(names)) == 1:
3500                 names = [] if names[0] == 'unknown' else names[:1]
3501             return [lang, ', '.join(names), ', '.join(exts)]
3502
3503         if not subtitles:
3504             return None
3505         return render_table(
3506             self._list_format_headers('Language', 'Name', 'Formats'),
3507             [_row(lang, formats) for lang, formats in subtitles.items()],
3508             hide_empty=True)
3509
3510     def __list_table(self, video_id, name, func, *args):
3511         table = func(*args)
3512         if not table:
3513             self.to_screen(f'{video_id} has no {name}')
3514             return
3515         self.to_screen(f'[info] Available {name} for {video_id}:')
3516         self.to_stdout(table)
3517
3518     def list_formats(self, info_dict):
3519         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3520
3521     def list_thumbnails(self, info_dict):
3522         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3523
3524     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3525         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3526
3527     def urlopen(self, req):
3528         """ Start an HTTP download """
3529         if isinstance(req, compat_basestring):
3530             req = sanitized_Request(req)
3531         return self._opener.open(req, timeout=self._socket_timeout)
3532
3533     def print_debug_header(self):
3534         if not self.params.get('verbose'):
3535             return
3536
3537         def get_encoding(stream):
3538             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3539             if not supports_terminal_sequences(stream):
3540                 from .compat import WINDOWS_VT_MODE
3541                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3542             return ret
3543
3544         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3545             locale.getpreferredencoding(),
3546             sys.getfilesystemencoding(),
3547             get_encoding(self._screen_file), get_encoding(self._err_file),
3548             self.get_encoding())
3549
3550         logger = self.params.get('logger')
3551         if logger:
3552             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3553             write_debug(encoding_str)
3554         else:
3555             write_string(f'[debug] {encoding_str}\n', encoding=None)
3556             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3557
3558         source = detect_variant()
3559         write_debug(join_nonempty(
3560             'yt-dlp version', __version__,
3561             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3562             '' if source == 'unknown' else f'({source})',
3563             delim=' '))
3564         if not _LAZY_LOADER:
3565             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3566                 write_debug('Lazy loading extractors is forcibly disabled')
3567             else:
3568                 write_debug('Lazy loading extractors is disabled')
3569         if plugin_extractors or plugin_postprocessors:
3570             write_debug('Plugins: %s' % [
3571                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3572                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3573         if self.params.get('compat_opts'):
3574             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3575
3576         if source == 'source':
3577             try:
3578                 sp = Popen(
3579                     ['git', 'rev-parse', '--short', 'HEAD'],
3580                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3581                     cwd=os.path.dirname(os.path.abspath(__file__)))
3582                 out, err = sp.communicate_or_kill()
3583                 out = out.decode().strip()
3584                 if re.match('[0-9a-f]+', out):
3585                     write_debug('Git HEAD: %s' % out)
3586             except Exception:
3587                 try:
3588                     sys.exc_clear()
3589                 except Exception:
3590                     pass
3591
3592         def python_implementation():
3593             impl_name = platform.python_implementation()
3594             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3595                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3596             return impl_name
3597
3598         write_debug('Python version %s (%s %s) - %s' % (
3599             platform.python_version(),
3600             python_implementation(),
3601             platform.architecture()[0],
3602             platform_name()))
3603
3604         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3605         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3606         if ffmpeg_features:
3607             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3608
3609         exe_versions['rtmpdump'] = rtmpdump_version()
3610         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3611         exe_str = ', '.join(
3612             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3613         ) or 'none'
3614         write_debug('exe versions: %s' % exe_str)
3615
3616         from .downloader.websocket import has_websockets
3617         from .postprocessor.embedthumbnail import has_mutagen
3618         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3619
3620         lib_str = join_nonempty(
3621             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3622             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3623             has_mutagen and 'mutagen',
3624             SQLITE_AVAILABLE and 'sqlite',
3625             has_websockets and 'websockets',
3626             delim=', ') or 'none'
3627         write_debug('Optional libraries: %s' % lib_str)
3628
3629         proxy_map = {}
3630         for handler in self._opener.handlers:
3631             if hasattr(handler, 'proxies'):
3632                 proxy_map.update(handler.proxies)
3633         write_debug(f'Proxy map: {proxy_map}')
3634
3635         # Not implemented
3636         if False and self.params.get('call_home'):
3637             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3638             write_debug('Public IP address: %s' % ipaddr)
3639             latest_version = self.urlopen(
3640                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3641             if version_tuple(latest_version) > version_tuple(__version__):
3642                 self.report_warning(
3643                     'You are using an outdated version (newest version: %s)! '
3644                     'See https://yt-dl.org/update if you need help updating.' %
3645                     latest_version)
3646
3647     def _setup_opener(self):
3648         timeout_val = self.params.get('socket_timeout')
3649         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3650
3651         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3652         opts_cookiefile = self.params.get('cookiefile')
3653         opts_proxy = self.params.get('proxy')
3654
3655         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3656
3657         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3658         if opts_proxy is not None:
3659             if opts_proxy == '':
3660                 proxies = {}
3661             else:
3662                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3663         else:
3664             proxies = compat_urllib_request.getproxies()
3665             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3666             if 'http' in proxies and 'https' not in proxies:
3667                 proxies['https'] = proxies['http']
3668         proxy_handler = PerRequestProxyHandler(proxies)
3669
3670         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3671         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3672         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3673         redirect_handler = YoutubeDLRedirectHandler()
3674         data_handler = compat_urllib_request_DataHandler()
3675
3676         # When passing our own FileHandler instance, build_opener won't add the
3677         # default FileHandler and allows us to disable the file protocol, which
3678         # can be used for malicious purposes (see
3679         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3680         file_handler = compat_urllib_request.FileHandler()
3681
3682         def file_open(*args, **kwargs):
3683             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3684         file_handler.file_open = file_open
3685
3686         opener = compat_urllib_request.build_opener(
3687             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3688
3689         # Delete the default user-agent header, which would otherwise apply in
3690         # cases where our custom HTTP handler doesn't come into play
3691         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3692         opener.addheaders = []
3693         self._opener = opener
3694
3695     def encode(self, s):
3696         if isinstance(s, bytes):
3697             return s  # Already encoded
3698
3699         try:
3700             return s.encode(self.get_encoding())
3701         except UnicodeEncodeError as err:
3702             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3703             raise
3704
3705     def get_encoding(self):
3706         encoding = self.params.get('encoding')
3707         if encoding is None:
3708             encoding = preferredencoding()
3709         return encoding
3710
3711     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3712         ''' Write infojson and returns True = written, False = skip, None = error '''
3713         if overwrite is None:
3714             overwrite = self.params.get('overwrites', True)
3715         if not self.params.get('writeinfojson'):
3716             return False
3717         elif not infofn:
3718             self.write_debug(f'Skipping writing {label} infojson')
3719             return False
3720         elif not self._ensure_dir_exists(infofn):
3721             return None
3722         elif not overwrite and os.path.exists(infofn):
3723             self.to_screen(f'[info] {label.title()} metadata is already present')
3724         else:
3725             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3726             try:
3727                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3728             except (OSError, IOError):
3729                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3730                 return None
3731         return True
3732
3733     def _write_description(self, label, ie_result, descfn):
3734         ''' Write description and returns True = written, False = skip, None = error '''
3735         if not self.params.get('writedescription'):
3736             return False
3737         elif not descfn:
3738             self.write_debug(f'Skipping writing {label} description')
3739             return False
3740         elif not self._ensure_dir_exists(descfn):
3741             return None
3742         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3743             self.to_screen(f'[info] {label.title()} description is already present')
3744         elif ie_result.get('description') is None:
3745             self.report_warning(f'There\'s no {label} description to write')
3746             return False
3747         else:
3748             try:
3749                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3750                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3751                     descfile.write(ie_result['description'])
3752             except (OSError, IOError):
3753                 self.report_error(f'Cannot write {label} description file {descfn}')
3754                 return None
3755         return True
3756
3757     def _write_subtitles(self, info_dict, filename):
3758         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3759         ret = []
3760         subtitles = info_dict.get('requested_subtitles')
3761         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3762             # subtitles download errors are already managed as troubles in relevant IE
3763             # that way it will silently go on when used with unsupporting IE
3764             return ret
3765
3766         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3767         if not sub_filename_base:
3768             self.to_screen('[info] Skipping writing video subtitles')
3769             return ret
3770         for sub_lang, sub_info in subtitles.items():
3771             sub_format = sub_info['ext']
3772             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3773             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3774             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3775             if existing_sub:
3776                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3777                 sub_info['filepath'] = existing_sub
3778                 ret.append((existing_sub, sub_filename_final))
3779                 continue
3780
3781             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3782             if sub_info.get('data') is not None:
3783                 try:
3784                     # Use newline='' to prevent conversion of newline characters
3785                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3786                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3787                         subfile.write(sub_info['data'])
3788                     sub_info['filepath'] = sub_filename
3789                     ret.append((sub_filename, sub_filename_final))
3790                     continue
3791                 except (OSError, IOError):
3792                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3793                     return None
3794
3795             try:
3796                 sub_copy = sub_info.copy()
3797                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3798                 self.dl(sub_filename, sub_copy, subtitle=True)
3799                 sub_info['filepath'] = sub_filename
3800                 ret.append((sub_filename, sub_filename_final))
3801             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3802                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3803                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3804                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3805         return ret
3806
3807     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3808         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3809         write_all = self.params.get('write_all_thumbnails', False)
3810         thumbnails, ret = [], []
3811         if write_all or self.params.get('writethumbnail', False):
3812             thumbnails = info_dict.get('thumbnails') or []
3813         multiple = write_all and len(thumbnails) > 1
3814
3815         if thumb_filename_base is None:
3816             thumb_filename_base = filename
3817         if thumbnails and not thumb_filename_base:
3818             self.write_debug(f'Skipping writing {label} thumbnail')
3819             return ret
3820
3821         for idx, t in list(enumerate(thumbnails))[::-1]:
3822             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3823             thumb_display_id = f'{label} thumbnail {t["id"]}'
3824             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3825             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3826
3827             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3828             if existing_thumb:
3829                 self.to_screen('[info] %s is already present' % (
3830                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3831                 t['filepath'] = existing_thumb
3832                 ret.append((existing_thumb, thumb_filename_final))
3833             else:
3834                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3835                 try:
3836                     uf = self.urlopen(t['url'])
3837                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3838                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3839                         shutil.copyfileobj(uf, thumbf)
3840                     ret.append((thumb_filename, thumb_filename_final))
3841                     t['filepath'] = thumb_filename
3842                 except network_exceptions as err:
3843                     thumbnails.pop(idx)
3844                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3845             if ret and not write_all:
3846                 break
3847         return ret