yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     join_nonempty,
  79     LazyList,
  80     LINK_TEMPLATES,
  81     locked_file,
  82     make_dir,
  83     make_HTTPS_handler,
  84     MaxDownloadsReached,
  85     network_exceptions,
  86     number_of_digits,
  87     orderedSet,
  88     OUTTMPL_TYPES,
  89     PagedList,
  90     parse_filesize,
  91     PerRequestProxyHandler,
  92     platform_name,
  93     Popen,
  94     POSTPROCESS_WHEN,
  95     PostProcessingError,
  96     preferredencoding,
  97     prepend_extension,
  98     ReExtractInfo,
  99     register_socks_protocols,
 100     RejectedVideoReached,
 101     remove_terminal_sequences,
 102     render_table,
 103     replace_extension,
 104     SameFileError,
 105     sanitize_filename,
 106     sanitize_path,
 107     sanitize_url,
 108     sanitized_Request,
 109     std_headers,
 110     STR_FORMAT_RE_TMPL,
 111     STR_FORMAT_TYPES,
 112     str_or_none,
 113     strftime_or_none,
 114     subtitles_filename,
 115     supports_terminal_sequences,
 116     timetuple_from_msec,
 117     to_high_limit_path,
 118     traverse_obj,
 119     try_get,
 120     UnavailableVideoError,
 121     url_basename,
 122     variadic,
 123     version_tuple,
 124     write_json_file,
 125     write_string,
 126     YoutubeDLCookieProcessor,
 127     YoutubeDLHandler,
 128     YoutubeDLRedirectHandler,
 129 )
 130 from .cache import Cache
 131 from .minicurses import format_text
 132 from .extractor import (
 133     gen_extractor_classes,
 134     get_info_extractor,
 135     _LAZY_LOADER,
 136     _PLUGIN_CLASSES as plugin_extractors
 137 )
 138 from .extractor.openload import PhantomJSwrapper
 139 from .downloader import (
 140     FFmpegFD,
 141     get_suitable_downloader,
 142     shorten_protocol_name
 143 )
 144 from .downloader.rtmp import rtmpdump_version
 145 from .postprocessor import (
 146     get_postprocessor,
 147     EmbedThumbnailPP,
 148     FFmpegFixupDuplicateMoovPP,
 149     FFmpegFixupDurationPP,
 150     FFmpegFixupM3u8PP,
 151     FFmpegFixupM4aPP,
 152     FFmpegFixupStretchedPP,
 153     FFmpegFixupTimestampPP,
 154     FFmpegMergerPP,
 155     FFmpegPostProcessor,
 156     MoveFilesAfterDownloadPP,
 157     _PLUGIN_CLASSES as plugin_postprocessors
 158 )
 159 from .update import detect_variant
 160 from .version import __version__, RELEASE_GIT_HEAD
 161
 162 if compat_os_name == 'nt':
 163     import ctypes
 164
 165
 166 class YoutubeDL(object):
 167     """YoutubeDL class.
 168
 169     YoutubeDL objects are the ones responsible of downloading the
 170     actual video file and writing it to disk if the user has requested
 171     it, among some other tasks. In most cases there should be one per
 172     program. As, given a video URL, the downloader doesn't know how to
 173     extract all the needed information, task that InfoExtractors do, it
 174     has to pass the URL to one of them.
 175
 176     For this, YoutubeDL objects have a method that allows
 177     InfoExtractors to be registered in a given order. When it is passed
 178     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 179     finds that reports being able to handle it. The InfoExtractor extracts
 180     all the information about the video or videos the URL refers to, and
 181     YoutubeDL process the extracted information, possibly using a File
 182     Downloader to download the video.
 183
 184     YoutubeDL objects accept a lot of parameters. In order not to saturate
 185     the object constructor with arguments, it receives a dictionary of
 186     options instead. These options are available through the params
 187     attribute for the InfoExtractors to use. The YoutubeDL also
 188     registers itself as the downloader in charge for the InfoExtractors
 189     that are added to it, so this is a "mutual registration".
 190
 191     Available options:
 192
 193     username:          Username for authentication purposes.
 194     password:          Password for authentication purposes.
 195     videopassword:     Password for accessing a video.
 196     ap_mso:            Adobe Pass multiple-system operator identifier.
 197     ap_username:       Multiple-system operator account username.
 198     ap_password:       Multiple-system operator account password.
 199     usenetrc:          Use netrc for authentication instead.
 200     verbose:           Print additional info to stdout.
 201     quiet:             Do not print messages to stdout.
 202     no_warnings:       Do not print out anything for warnings.
 203     forceprint:        A dict with keys video/playlist mapped to
 204                        a list of templates to force print to stdout
 205                        For compatibility, a single list is also accepted
 206     forceurl:          Force printing final URL. (Deprecated)
 207     forcetitle:        Force printing title. (Deprecated)
 208     forceid:           Force printing ID. (Deprecated)
 209     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 210     forcedescription:  Force printing description. (Deprecated)
 211     forcefilename:     Force printing final filename. (Deprecated)
 212     forceduration:     Force printing duration. (Deprecated)
 213     forcejson:         Force printing info_dict as JSON.
 214     dump_single_json:  Force printing the info_dict of the whole playlist
 215                        (or video) as a single JSON line.
 216     force_write_download_archive: Force writing download archive regardless
 217                        of 'skip_download' or 'simulate'.
 218     simulate:          Do not download the video files. If unset (or None),
 219                        simulate only if listsubtitles, listformats or list_thumbnails is used
 220     format:            Video format code. see "FORMAT SELECTION" for more details.
 221                        You can also pass a function. The function takes 'ctx' as
 222                        argument and returns the formats to download.
 223                        See "build_format_selector" for an implementation
 224     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 225     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 226                        extracting metadata even if the video is not actually
 227                        available for download (experimental)
 228     format_sort:       A list of fields by which to sort the video formats.
 229                        See "Sorting Formats" for more details.
 230     format_sort_force: Force the given format_sort. see "Sorting Formats"
 231                        for more details.
 232     allow_multiple_video_streams:   Allow multiple video streams to be merged
 233                        into a single file
 234     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 235                        into a single file
 236     check_formats      Whether to test if the formats are downloadable.
 237                        Can be True (check all), False (check none),
 238                        'selected' (check selected formats),
 239                        or None (check only if requested by extractor)
 240     paths:             Dictionary of output paths. The allowed keys are 'home'
 241                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 242     outtmpl:           Dictionary of templates for output names. Allowed keys
 243                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 244                        For compatibility with youtube-dl, a single string can also be used
 245     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 246     restrictfilenames: Do not allow "&" and spaces in file names
 247     trim_file_name:    Limit length of filename (extension excluded)
 248     windowsfilenames:  Force the filenames to be windows compatible
 249     ignoreerrors:      Do not stop on download/postprocessing errors.
 250                        Can be 'only_download' to ignore only download errors.
 251                        Default is 'only_download' for CLI, but False for API
 252     skip_playlist_after_errors: Number of allowed failures until the rest of
 253                        the playlist is skipped
 254     force_generic_extractor: Force downloader to use the generic extractor
 255     overwrites:        Overwrite all video and metadata files if True,
 256                        overwrite only non-video files if None
 257                        and don't overwrite any file if False
 258                        For compatibility with youtube-dl,
 259                        "nooverwrites" may also be used instead
 260     playliststart:     Playlist item to start at.
 261     playlistend:       Playlist item to end at.
 262     playlist_items:    Specific indices of playlist to download.
 263     playlistreverse:   Download playlist items in reverse order.
 264     playlistrandom:    Download playlist items in random order.
 265     matchtitle:        Download only matching titles.
 266     rejecttitle:       Reject downloads for matching titles.
 267     logger:            Log messages to a logging.Logger instance.
 268     logtostderr:       Log messages to stderr instead of stdout.
 269     consoletitle:       Display progress in console window's titlebar.
 270     writedescription:  Write the video description to a .description file
 271     writeinfojson:     Write the video description to a .info.json file
 272     clean_infojson:    Remove private fields from the infojson
 273     getcomments:       Extract video comments. This will not be written to disk
 274                        unless writeinfojson is also given
 275     writeannotations:  Write the video annotations to a .annotations.xml file
 276     writethumbnail:    Write the thumbnail image to a file
 277     allow_playlist_files: Whether to write playlists' description, infojson etc
 278                        also to disk when using the 'write*' options
 279     write_all_thumbnails:  Write all thumbnail formats to files
 280     writelink:         Write an internet shortcut file, depending on the
 281                        current platform (.url/.webloc/.desktop)
 282     writeurllink:      Write a Windows internet shortcut file (.url)
 283     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 284     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 285     writesubtitles:    Write the video subtitles to a file
 286     writeautomaticsub: Write the automatically generated subtitles to a file
 287     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 288                        Downloads all the subtitles of the video
 289                        (requires writesubtitles or writeautomaticsub)
 290     listsubtitles:     Lists all available subtitles for the video
 291     subtitlesformat:   The format code for subtitles
 292     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 293                        The list may contain "all" to refer to all the available
 294                        subtitles. The language can be prefixed with a "-" to
 295                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 296     keepvideo:         Keep the video file after post-processing
 297     daterange:         A DateRange object, download only if the upload_date is in the range.
 298     skip_download:     Skip the actual download of the video file
 299     cachedir:          Location of the cache files in the filesystem.
 300                        False to disable filesystem cache.
 301     noplaylist:        Download single video instead of a playlist if in doubt.
 302     age_limit:         An integer representing the user's age in years.
 303                        Unsuitable videos for the given age are skipped.
 304     min_views:         An integer representing the minimum view count the video
 305                        must have in order to not be skipped.
 306                        Videos without view count information are always
 307                        downloaded. None for no limit.
 308     max_views:         An integer representing the maximum view count.
 309                        Videos that are more popular than that are not
 310                        downloaded.
 311                        Videos without view count information are always
 312                        downloaded. None for no limit.
 313     download_archive:  File name of a file where all downloads are recorded.
 314                        Videos already present in the file are not downloaded
 315                        again.
 316     break_on_existing: Stop the download process after attempting to download a
 317                        file that is in the archive.
 318     break_on_reject:   Stop the download process when encountering a video that
 319                        has been filtered out.
 320     break_per_url:     Whether break_on_reject and break_on_existing
 321                        should act on each input URL as opposed to for the entire queue
 322     cookiefile:        File name where cookies should be read from and dumped to
 323     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 324                        name/pathfrom where cookies are loaded, and the name of the
 325                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 326     nocheckcertificate:  Do not verify SSL certificates
 327     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 328                        At the moment, this is only supported by YouTube.
 329     proxy:             URL of the proxy server to use
 330     geo_verification_proxy:  URL of the proxy to use for IP address verification
 331                        on geo-restricted sites.
 332     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 333     bidi_workaround:   Work around buggy terminals without bidirectional text
 334                        support, using fridibi
 335     debug_printtraffic:Print out sent and received HTTP traffic
 336     include_ads:       Download ads as well (deprecated)
 337     default_search:    Prepend this string if an input url is not valid.
 338                        'auto' for elaborate guessing
 339     encoding:          Use this encoding instead of the system-specified.
 340     extract_flat:      Do not resolve URLs, return the immediate result.
 341                        Pass in 'in_playlist' to only show this behavior for
 342                        playlist items.
 343     wait_for_video:    If given, wait for scheduled streams to become available.
 344                        The value should be a tuple containing the range
 345                        (min_secs, max_secs) to wait between retries
 346     postprocessors:    A list of dictionaries, each with an entry
 347                        * key:  The name of the postprocessor. See
 348                                yt_dlp/postprocessor/__init__.py for a list.
 349                        * when: When to run the postprocessor. Can be one of
 350                                pre_process|before_dl|post_process|after_move.
 351                                Assumed to be 'post_process' if not given
 352     post_hooks:        Deprecated - Register a custom postprocessor instead
 353                        A list of functions that get called as the final step
 354                        for each video file, after all postprocessors have been
 355                        called. The filename will be passed as the only argument.
 356     progress_hooks:    A list of functions that get called on download
 357                        progress, with a dictionary with the entries
 358                        * status: One of "downloading", "error", or "finished".
 359                                  Check this first and ignore unknown values.
 360                        * info_dict: The extracted info_dict
 361
 362                        If status is one of "downloading", or "finished", the
 363                        following properties may also be present:
 364                        * filename: The final filename (always present)
 365                        * tmpfilename: The filename we're currently writing to
 366                        * downloaded_bytes: Bytes on disk
 367                        * total_bytes: Size of the whole file, None if unknown
 368                        * total_bytes_estimate: Guess of the eventual file size,
 369                                                None if unavailable.
 370                        * elapsed: The number of seconds since download started.
 371                        * eta: The estimated time in seconds, None if unknown
 372                        * speed: The download speed in bytes/second, None if
 373                                 unknown
 374                        * fragment_index: The counter of the currently
 375                                          downloaded video fragment.
 376                        * fragment_count: The number of fragments (= individual
 377                                          files that will be merged)
 378
 379                        Progress hooks are guaranteed to be called at least once
 380                        (with status "finished") if the download is successful.
 381     postprocessor_hooks:  A list of functions that get called on postprocessing
 382                        progress, with a dictionary with the entries
 383                        * status: One of "started", "processing", or "finished".
 384                                  Check this first and ignore unknown values.
 385                        * postprocessor: Name of the postprocessor
 386                        * info_dict: The extracted info_dict
 387
 388                        Progress hooks are guaranteed to be called at least twice
 389                        (with status "started" and "finished") if the processing is successful.
 390     merge_output_format: Extension to use when merging formats.
 391     final_ext:         Expected final extension; used to detect when the file was
 392                        already downloaded and converted
 393     fixup:             Automatically correct known faults of the file.
 394                        One of:
 395                        - "never": do nothing
 396                        - "warn": only emit a warning
 397                        - "detect_or_warn": check whether we can do anything
 398                                            about it, warn otherwise (default)
 399     source_address:    Client-side IP address to bind to.
 400     call_home:         Boolean, true iff we are allowed to contact the
 401                        yt-dlp servers for debugging. (BROKEN)
 402     sleep_interval_requests: Number of seconds to sleep between requests
 403                        during extraction
 404     sleep_interval:    Number of seconds to sleep before each download when
 405                        used alone or a lower bound of a range for randomized
 406                        sleep before each download (minimum possible number
 407                        of seconds to sleep) when used along with
 408                        max_sleep_interval.
 409     max_sleep_interval:Upper bound of a range for randomized sleep before each
 410                        download (maximum possible number of seconds to sleep).
 411                        Must only be used along with sleep_interval.
 412                        Actual sleep time will be a random float from range
 413                        [sleep_interval; max_sleep_interval].
 414     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 415     listformats:       Print an overview of available video formats and exit.
 416     list_thumbnails:   Print a table of all thumbnails and exit.
 417     match_filter:      A function that gets called with the info_dict of
 418                        every video.
 419                        If it returns a message, the video is ignored.
 420                        If it returns None, the video is downloaded.
 421                        match_filter_func in utils.py is one example for this.
 422     no_color:          Do not emit color codes in output.
 423     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 424                        HTTP header
 425     geo_bypass_country:
 426                        Two-letter ISO 3166-2 country code that will be used for
 427                        explicit geographic restriction bypassing via faking
 428                        X-Forwarded-For HTTP header
 429     geo_bypass_ip_block:
 430                        IP range in CIDR notation that will be used similarly to
 431                        geo_bypass_country
 432
 433     The following options determine which downloader is picked:
 434     external_downloader: A dictionary of protocol keys and the executable of the
 435                        external downloader to use for it. The allowed protocols
 436                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 437                        Set the value to 'native' to use the native downloader
 438     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 439                        or {'m3u8': 'ffmpeg'} instead.
 440                        Use the native HLS downloader instead of ffmpeg/avconv
 441                        if True, otherwise use ffmpeg/avconv if False, otherwise
 442                        use downloader suggested by extractor if None.
 443     compat_opts:       Compatibility options. See "Differences in default behavior".
 444                        The following options do not work when used through the API:
 445                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 446                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 447                        Refer __init__.py for their implementation
 448     progress_template: Dictionary of templates for progress outputs.
 449                        Allowed keys are 'download', 'postprocess',
 450                        'download-title' (console title) and 'postprocess-title'.
 451                        The template is mapped on a dictionary with keys 'progress' and 'info'
 452
 453     The following parameters are not used by YoutubeDL itself, they are used by
 454     the downloader (see yt_dlp/downloader/common.py):
 455     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 456     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 457     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 458     external_downloader_args, concurrent_fragment_downloads.
 459
 460     The following options are used by the post processors:
 461     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 462                        otherwise prefer ffmpeg. (avconv support is deprecated)
 463     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 464                        to the binary or its containing directory.
 465     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 466                        and a list of additional command-line arguments for the
 467                        postprocessor/executable. The dict can also have "PP+EXE" keys
 468                        which are used when the given exe is used by the given PP.
 469                        Use 'default' as the name for arguments to passed to all PP
 470                        For compatibility with youtube-dl, a single list of args
 471                        can also be used
 472
 473     The following options are used by the extractors:
 474     extractor_retries: Number of times to retry for known errors
 475     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 476     hls_split_discontinuity: Split HLS playlists to different formats at
 477                        discontinuities such as ad breaks (default: False)
 478     extractor_args:    A dictionary of arguments to be passed to the extractors.
 479                        See "EXTRACTOR ARGUMENTS" for details.
 480                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 481     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 482                        If True (default), DASH manifests and related
 483                        data will be downloaded and processed by extractor.
 484                        You can reduce network I/O by disabling it if you don't
 485                        care about DASH. (only for youtube)
 486     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 487                        If True (default), HLS manifests and related
 488                        data will be downloaded and processed by extractor.
 489                        You can reduce network I/O by disabling it if you don't
 490                        care about HLS. (only for youtube)
 491     """
 492
 493     _NUMERIC_FIELDS = set((
 494         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 495         'timestamp', 'release_timestamp',
 496         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 497         'average_rating', 'comment_count', 'age_limit',
 498         'start_time', 'end_time',
 499         'chapter_number', 'season_number', 'episode_number',
 500         'track_number', 'disc_number', 'release_year',
 501     ))
 502
 503     _format_selection_exts = {
 504         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 505         'video': {'mp4', 'flv', 'webm', '3gp'},
 506         'storyboards': {'mhtml'},
 507     }
 508
 509     params = None
 510     _ies = {}
 511     _pps = {k: [] for k in POSTPROCESS_WHEN}
 512     _printed_messages = set()
 513     _first_webpage_request = True
 514     _download_retcode = None
 515     _num_downloads = None
 516     _playlist_level = 0
 517     _playlist_urls = set()
 518     _screen_file = None
 519
 520     def __init__(self, params=None, auto_init=True):
 521         """Create a FileDownloader object with the given options.
 522         @param auto_init    Whether to load the default extractors and print header (if verbose).
 523                             Set to 'no_verbose_header' to not print the header
 524         """
 525         if params is None:
 526             params = {}
 527         self._ies = {}
 528         self._ies_instances = {}
 529         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 530         self._printed_messages = set()
 531         self._first_webpage_request = True
 532         self._post_hooks = []
 533         self._progress_hooks = []
 534         self._postprocessor_hooks = []
 535         self._download_retcode = 0
 536         self._num_downloads = 0
 537         self._num_videos = 0
 538         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 539         self._err_file = sys.stderr
 540         self.params = params
 541         self.cache = Cache(self)
 542
 543         windows_enable_vt_mode()
 544         self._allow_colors = {
 545             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 546             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 547         }
 548
 549         if sys.version_info < (3, 6):
 550             self.report_warning(
 551                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 552
 553         if self.params.get('allow_unplayable_formats'):
 554             self.report_warning(
 555                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 556                 'This is a developer option intended for debugging. \n'
 557                 '         If you experience any issues while using this option, '
 558                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 559
 560         def check_deprecated(param, option, suggestion):
 561             if self.params.get(param) is not None:
 562                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 563                 return True
 564             return False
 565
 566         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 567             if self.params.get('geo_verification_proxy') is None:
 568                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 569
 570         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 571         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 572         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 573
 574         for msg in self.params.get('_warnings', []):
 575             self.report_warning(msg)
 576         for msg in self.params.get('_deprecation_warnings', []):
 577             self.deprecation_warning(msg)
 578
 579         if 'list-formats' in self.params.get('compat_opts', []):
 580             self.params['listformats_table'] = False
 581
 582         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 583             # nooverwrites was unnecessarily changed to overwrites
 584             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 585             # This ensures compatibility with both keys
 586             self.params['overwrites'] = not self.params['nooverwrites']
 587         elif self.params.get('overwrites') is None:
 588             self.params.pop('overwrites', None)
 589         else:
 590             self.params['nooverwrites'] = not self.params['overwrites']
 591
 592         # Compatibility with older syntax
 593         params.setdefault('forceprint', {})
 594         if not isinstance(params['forceprint'], dict):
 595             params['forceprint'] = {'video': params['forceprint']}
 596
 597         if params.get('bidi_workaround', False):
 598             try:
 599                 import pty
 600                 master, slave = pty.openpty()
 601                 width = compat_get_terminal_size().columns
 602                 if width is None:
 603                     width_args = []
 604                 else:
 605                     width_args = ['-w', str(width)]
 606                 sp_kwargs = dict(
 607                     stdin=subprocess.PIPE,
 608                     stdout=slave,
 609                     stderr=self._err_file)
 610                 try:
 611                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 612                 except OSError:
 613                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 614                 self._output_channel = os.fdopen(master, 'rb')
 615             except OSError as ose:
 616                 if ose.errno == errno.ENOENT:
 617                     self.report_warning(
 618                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 619                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 620                 else:
 621                     raise
 622
 623         if (sys.platform != 'win32'
 624                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 625                 and not params.get('restrictfilenames', False)):
 626             # Unicode filesystem API will throw errors (#1474, #13027)
 627             self.report_warning(
 628                 'Assuming --restrict-filenames since file system encoding '
 629                 'cannot encode all characters. '
 630                 'Set the LC_ALL environment variable to fix this.')
 631             self.params['restrictfilenames'] = True
 632
 633         self.outtmpl_dict = self.parse_outtmpl()
 634
 635         # Creating format selector here allows us to catch syntax errors before the extraction
 636         self.format_selector = (
 637             self.params.get('format') if self.params.get('format') in (None, '-')
 638             else self.params['format'] if callable(self.params['format'])
 639             else self.build_format_selector(self.params['format']))
 640
 641         self._setup_opener()
 642
 643         if auto_init:
 644             if auto_init != 'no_verbose_header':
 645                 self.print_debug_header()
 646             self.add_default_info_extractors()
 647
 648         hooks = {
 649             'post_hooks': self.add_post_hook,
 650             'progress_hooks': self.add_progress_hook,
 651             'postprocessor_hooks': self.add_postprocessor_hook,
 652         }
 653         for opt, fn in hooks.items():
 654             for ph in self.params.get(opt, []):
 655                 fn(ph)
 656
 657         for pp_def_raw in self.params.get('postprocessors', []):
 658             pp_def = dict(pp_def_raw)
 659             when = pp_def.pop('when', 'post_process')
 660             self.add_post_processor(
 661                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 662                 when=when)
 663
 664         register_socks_protocols()
 665
 666         def preload_download_archive(fn):
 667             """Preload the archive, if any is specified"""
 668             if fn is None:
 669                 return False
 670             self.write_debug(f'Loading archive file {fn!r}')
 671             try:
 672                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 673                     for line in archive_file:
 674                         self.archive.add(line.strip())
 675             except IOError as ioe:
 676                 if ioe.errno != errno.ENOENT:
 677                     raise
 678                 return False
 679             return True
 680
 681         self.archive = set()
 682         preload_download_archive(self.params.get('download_archive'))
 683
 684     def warn_if_short_id(self, argv):
 685         # short YouTube ID starting with dash?
 686         idxs = [
 687             i for i, a in enumerate(argv)
 688             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 689         if idxs:
 690             correct_argv = (
 691                 ['yt-dlp']
 692                 + [a for i, a in enumerate(argv) if i not in idxs]
 693                 + ['--'] + [argv[i] for i in idxs]
 694             )
 695             self.report_warning(
 696                 'Long argument string detected. '
 697                 'Use -- to separate parameters and URLs, like this:\n%s' %
 698                 args_to_str(correct_argv))
 699
 700     def add_info_extractor(self, ie):
 701         """Add an InfoExtractor object to the end of the list."""
 702         ie_key = ie.ie_key()
 703         self._ies[ie_key] = ie
 704         if not isinstance(ie, type):
 705             self._ies_instances[ie_key] = ie
 706             ie.set_downloader(self)
 707
 708     def _get_info_extractor_class(self, ie_key):
 709         ie = self._ies.get(ie_key)
 710         if ie is None:
 711             ie = get_info_extractor(ie_key)
 712             self.add_info_extractor(ie)
 713         return ie
 714
 715     def get_info_extractor(self, ie_key):
 716         """
 717         Get an instance of an IE with name ie_key, it will try to get one from
 718         the _ies list, if there's no instance it will create a new one and add
 719         it to the extractor list.
 720         """
 721         ie = self._ies_instances.get(ie_key)
 722         if ie is None:
 723             ie = get_info_extractor(ie_key)()
 724             self.add_info_extractor(ie)
 725         return ie
 726
 727     def add_default_info_extractors(self):
 728         """
 729         Add the InfoExtractors returned by gen_extractors to the end of the list
 730         """
 731         for ie in gen_extractor_classes():
 732             self.add_info_extractor(ie)
 733
 734     def add_post_processor(self, pp, when='post_process'):
 735         """Add a PostProcessor object to the end of the chain."""
 736         self._pps[when].append(pp)
 737         pp.set_downloader(self)
 738
 739     def add_post_hook(self, ph):
 740         """Add the post hook"""
 741         self._post_hooks.append(ph)
 742
 743     def add_progress_hook(self, ph):
 744         """Add the download progress hook"""
 745         self._progress_hooks.append(ph)
 746
 747     def add_postprocessor_hook(self, ph):
 748         """Add the postprocessing progress hook"""
 749         self._postprocessor_hooks.append(ph)
 750         for pps in self._pps.values():
 751             for pp in pps:
 752                 pp.add_progress_hook(ph)
 753
 754     def _bidi_workaround(self, message):
 755         if not hasattr(self, '_output_channel'):
 756             return message
 757
 758         assert hasattr(self, '_output_process')
 759         assert isinstance(message, compat_str)
 760         line_count = message.count('\n') + 1
 761         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 762         self._output_process.stdin.flush()
 763         res = ''.join(self._output_channel.readline().decode('utf-8')
 764                       for _ in range(line_count))
 765         return res[:-len('\n')]
 766
 767     def _write_string(self, message, out=None, only_once=False):
 768         if only_once:
 769             if message in self._printed_messages:
 770                 return
 771             self._printed_messages.add(message)
 772         write_string(message, out=out, encoding=self.params.get('encoding'))
 773
 774     def to_stdout(self, message, skip_eol=False, quiet=False):
 775         """Print message to stdout"""
 776         if self.params.get('logger'):
 777             self.params['logger'].debug(message)
 778         elif not quiet or self.params.get('verbose'):
 779             self._write_string(
 780                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 781                 self._err_file if quiet else self._screen_file)
 782
 783     def to_stderr(self, message, only_once=False):
 784         """Print message to stderr"""
 785         assert isinstance(message, compat_str)
 786         if self.params.get('logger'):
 787             self.params['logger'].error(message)
 788         else:
 789             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 790
 791     def to_console_title(self, message):
 792         if not self.params.get('consoletitle', False):
 793             return
 794         message = remove_terminal_sequences(message)
 795         if compat_os_name == 'nt':
 796             if ctypes.windll.kernel32.GetConsoleWindow():
 797                 # c_wchar_p() might not be necessary if `message` is
 798                 # already of type unicode()
 799                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 800         elif 'TERM' in os.environ:
 801             self._write_string('\033]0;%s\007' % message, self._screen_file)
 802
 803     def save_console_title(self):
 804         if not self.params.get('consoletitle', False):
 805             return
 806         if self.params.get('simulate'):
 807             return
 808         if compat_os_name != 'nt' and 'TERM' in os.environ:
 809             # Save the title on stack
 810             self._write_string('\033[22;0t', self._screen_file)
 811
 812     def restore_console_title(self):
 813         if not self.params.get('consoletitle', False):
 814             return
 815         if self.params.get('simulate'):
 816             return
 817         if compat_os_name != 'nt' and 'TERM' in os.environ:
 818             # Restore the title from stack
 819             self._write_string('\033[23;0t', self._screen_file)
 820
 821     def __enter__(self):
 822         self.save_console_title()
 823         return self
 824
 825     def __exit__(self, *args):
 826         self.restore_console_title()
 827
 828         if self.params.get('cookiefile') is not None:
 829             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 830
 831     def trouble(self, message=None, tb=None, is_error=True):
 832         """Determine action to take when a download problem appears.
 833
 834         Depending on if the downloader has been configured to ignore
 835         download errors or not, this method may throw an exception or
 836         not when errors are found, after printing the message.
 837
 838         @param tb          If given, is additional traceback information
 839         @param is_error    Whether to raise error according to ignorerrors
 840         """
 841         if message is not None:
 842             self.to_stderr(message)
 843         if self.params.get('verbose'):
 844             if tb is None:
 845                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 846                     tb = ''
 847                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 848                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 849                     tb += encode_compat_str(traceback.format_exc())
 850                 else:
 851                     tb_data = traceback.format_list(traceback.extract_stack())
 852                     tb = ''.join(tb_data)
 853             if tb:
 854                 self.to_stderr(tb)
 855         if not is_error:
 856             return
 857         if not self.params.get('ignoreerrors'):
 858             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 859                 exc_info = sys.exc_info()[1].exc_info
 860             else:
 861                 exc_info = sys.exc_info()
 862             raise DownloadError(message, exc_info)
 863         self._download_retcode = 1
 864
 865     def to_screen(self, message, skip_eol=False):
 866         """Print message to stdout if not in quiet mode"""
 867         self.to_stdout(
 868             message, skip_eol, quiet=self.params.get('quiet', False))
 869
 870     class Styles(Enum):
 871         HEADERS = 'yellow'
 872         EMPHASIS = 'light blue'
 873         ID = 'green'
 874         DELIM = 'blue'
 875         ERROR = 'red'
 876         WARNING = 'yellow'
 877         SUPPRESS = 'light black'
 878
 879     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 880         if test_encoding:
 881             original_text = text
 882             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 883             text = text.encode(encoding, 'ignore').decode(encoding)
 884             if fallback is not None and text != original_text:
 885                 text = fallback
 886         if isinstance(f, self.Styles):
 887             f = f.value
 888         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 889
 890     def _format_screen(self, *args, **kwargs):
 891         return self._format_text(
 892             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 893
 894     def _format_err(self, *args, **kwargs):
 895         return self._format_text(
 896             self._err_file, self._allow_colors['err'], *args, **kwargs)
 897
 898     def report_warning(self, message, only_once=False):
 899         '''
 900         Print the message to stderr, it will be prefixed with 'WARNING:'
 901         If stderr is a tty file the 'WARNING:' will be colored
 902         '''
 903         if self.params.get('logger') is not None:
 904             self.params['logger'].warning(message)
 905         else:
 906             if self.params.get('no_warnings'):
 907                 return
 908             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 909
 910     def deprecation_warning(self, message):
 911         if self.params.get('logger') is not None:
 912             self.params['logger'].warning('DeprecationWarning: {message}')
 913         else:
 914             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 915
 916     def report_error(self, message, *args, **kwargs):
 917         '''
 918         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 919         in red if stderr is a tty file.
 920         '''
 921         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 922
 923     def write_debug(self, message, only_once=False):
 924         '''Log debug message or Print message to stderr'''
 925         if not self.params.get('verbose', False):
 926             return
 927         message = '[debug] %s' % message
 928         if self.params.get('logger'):
 929             self.params['logger'].debug(message)
 930         else:
 931             self.to_stderr(message, only_once)
 932
 933     def report_file_already_downloaded(self, file_name):
 934         """Report file has already been fully downloaded."""
 935         try:
 936             self.to_screen('[download] %s has already been downloaded' % file_name)
 937         except UnicodeEncodeError:
 938             self.to_screen('[download] The file has already been downloaded')
 939
 940     def report_file_delete(self, file_name):
 941         """Report that existing file will be deleted."""
 942         try:
 943             self.to_screen('Deleting existing file %s' % file_name)
 944         except UnicodeEncodeError:
 945             self.to_screen('Deleting existing file')
 946
 947     def raise_no_formats(self, info, forced=False):
 948         has_drm = info.get('__has_drm')
 949         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 950         expected = self.params.get('ignore_no_formats_error')
 951         if forced or not expected:
 952             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 953                                  expected=has_drm or expected)
 954         else:
 955             self.report_warning(msg)
 956
 957     def parse_outtmpl(self):
 958         outtmpl_dict = self.params.get('outtmpl', {})
 959         if not isinstance(outtmpl_dict, dict):
 960             outtmpl_dict = {'default': outtmpl_dict}
 961         # Remove spaces in the default template
 962         if self.params.get('restrictfilenames'):
 963             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 964         else:
 965             sanitize = lambda x: x
 966         outtmpl_dict.update({
 967             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 968             if outtmpl_dict.get(k) is None})
 969         for key, val in outtmpl_dict.items():
 970             if isinstance(val, bytes):
 971                 self.report_warning(
 972                     'Parameter outtmpl is bytes, but should be a unicode string. '
 973                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 974         return outtmpl_dict
 975
 976     def get_output_path(self, dir_type='', filename=None):
 977         paths = self.params.get('paths', {})
 978         assert isinstance(paths, dict)
 979         path = os.path.join(
 980             expand_path(paths.get('home', '').strip()),
 981             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 982             filename or '')
 983
 984         # Temporary fix for #4787
 985         # 'Treat' all problem characters by passing filename through preferredencoding
 986         # to workaround encoding issues with subprocess on python2 @ Windows
 987         if sys.version_info < (3, 0) and sys.platform == 'win32':
 988             path = encodeFilename(path, True).decode(preferredencoding())
 989         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 990
 991     @staticmethod
 992     def _outtmpl_expandpath(outtmpl):
 993         # expand_path translates '%%' into '%' and '$$' into '$'
 994         # correspondingly that is not what we want since we need to keep
 995         # '%%' intact for template dict substitution step. Working around
 996         # with boundary-alike separator hack.
 997         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 998         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 999
1000         # outtmpl should be expand_path'ed before template dict substitution
1001         # because meta fields may contain env variables we don't want to
1002         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1003         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1004         return expand_path(outtmpl).replace(sep, '')
1005
1006     @staticmethod
1007     def escape_outtmpl(outtmpl):
1008         ''' Escape any remaining strings like %s, %abc% etc. '''
1009         return re.sub(
1010             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1011             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1012             outtmpl)
1013
1014     @classmethod
1015     def validate_outtmpl(cls, outtmpl):
1016         ''' @return None or Exception object '''
1017         outtmpl = re.sub(
1018             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1019             lambda mobj: f'{mobj.group(0)[:-1]}s',
1020             cls._outtmpl_expandpath(outtmpl))
1021         try:
1022             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1023             return None
1024         except ValueError as err:
1025             return err
1026
1027     @staticmethod
1028     def _copy_infodict(info_dict):
1029         info_dict = dict(info_dict)
1030         for key in ('__original_infodict', '__postprocessors'):
1031             info_dict.pop(key, None)
1032         return info_dict
1033
1034     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1035         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1036         @param sanitize    Whether to sanitize the output as a filename.
1037                            For backward compatibility, a function can also be passed
1038         """
1039
1040         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1041
1042         info_dict = self._copy_infodict(info_dict)
1043         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1044             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1045             if info_dict.get('duration', None) is not None
1046             else None)
1047         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1048         info_dict['video_autonumber'] = self._num_videos
1049         if info_dict.get('resolution') is None:
1050             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1051
1052         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1053         # of %(field)s to %(field)0Nd for backward compatibility
1054         field_size_compat_map = {
1055             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1056             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1057             'autonumber': self.params.get('autonumber_size') or 5,
1058         }
1059
1060         TMPL_DICT = {}
1061         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1062         MATH_FUNCTIONS = {
1063             '+': float.__add__,
1064             '-': float.__sub__,
1065         }
1066         # Field is of the form key1.key2...
1067         # where keys (except first) can be string, int or slice
1068         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1069         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1070         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1071         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1072             (?P<negate>-)?
1073             (?P<fields>{field})
1074             (?P<maths>(?:{math_op}{math_field})*)
1075             (?:>(?P<strf_format>.+?))?
1076             (?P<alternate>(?<!\\),[^|&)]+)?
1077             (?:&(?P<replacement>.*?))?
1078             (?:\|(?P<default>.*?))?
1079             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1080
1081         def _traverse_infodict(k):
1082             k = k.split('.')
1083             if k[0] == '':
1084                 k.pop(0)
1085             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1086
1087         def get_value(mdict):
1088             # Object traversal
1089             value = _traverse_infodict(mdict['fields'])
1090             # Negative
1091             if mdict['negate']:
1092                 value = float_or_none(value)
1093                 if value is not None:
1094                     value *= -1
1095             # Do maths
1096             offset_key = mdict['maths']
1097             if offset_key:
1098                 value = float_or_none(value)
1099                 operator = None
1100                 while offset_key:
1101                     item = re.match(
1102                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1103                         offset_key).group(0)
1104                     offset_key = offset_key[len(item):]
1105                     if operator is None:
1106                         operator = MATH_FUNCTIONS[item]
1107                         continue
1108                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1109                     offset = float_or_none(item)
1110                     if offset is None:
1111                         offset = float_or_none(_traverse_infodict(item))
1112                     try:
1113                         value = operator(value, multiplier * offset)
1114                     except (TypeError, ZeroDivisionError):
1115                         return None
1116                     operator = None
1117             # Datetime formatting
1118             if mdict['strf_format']:
1119                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1120
1121             return value
1122
1123         na = self.params.get('outtmpl_na_placeholder', 'NA')
1124
1125         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1126             return sanitize_filename(str(value), restricted=restricted,
1127                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1128
1129         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1130         sanitize = bool(sanitize)
1131
1132         def _dumpjson_default(obj):
1133             if isinstance(obj, (set, LazyList)):
1134                 return list(obj)
1135             return repr(obj)
1136
1137         def create_key(outer_mobj):
1138             if not outer_mobj.group('has_key'):
1139                 return outer_mobj.group(0)
1140             key = outer_mobj.group('key')
1141             mobj = re.match(INTERNAL_FORMAT_RE, key)
1142             initial_field = mobj.group('fields') if mobj else ''
1143             value, replacement, default = None, None, na
1144             while mobj:
1145                 mobj = mobj.groupdict()
1146                 default = mobj['default'] if mobj['default'] is not None else default
1147                 value = get_value(mobj)
1148                 replacement = mobj['replacement']
1149                 if value is None and mobj['alternate']:
1150                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1151                 else:
1152                     break
1153
1154             fmt = outer_mobj.group('format')
1155             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1156                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1157
1158             value = default if value is None else value if replacement is None else replacement
1159
1160             flags = outer_mobj.group('conversion') or ''
1161             str_fmt = f'{fmt[:-1]}s'
1162             if fmt[-1] == 'l':  # list
1163                 delim = '\n' if '#' in flags else ', '
1164                 value, fmt = delim.join(variadic(value, allowed_types=(str, bytes))), str_fmt
1165             elif fmt[-1] == 'j':  # json
1166                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1167             elif fmt[-1] == 'q':  # quoted
1168                 value = map(str, variadic(value) if '#' in flags else [value])
1169                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1170             elif fmt[-1] == 'B':  # bytes
1171                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1172                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1173             elif fmt[-1] == 'U':  # unicode normalized
1174                 value, fmt = unicodedata.normalize(
1175                     # "+" = compatibility equivalence, "#" = NFD
1176                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1177                     value), str_fmt
1178             elif fmt[-1] == 'D':  # decimal suffix
1179                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1180                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1181                                               factor=1024 if '#' in flags else 1000)
1182             elif fmt[-1] == 'S':  # filename sanitization
1183                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1184             elif fmt[-1] == 'c':
1185                 if value:
1186                     value = str(value)[0]
1187                 else:
1188                     fmt = str_fmt
1189             elif fmt[-1] not in 'rs':  # numeric
1190                 value = float_or_none(value)
1191                 if value is None:
1192                     value, fmt = default, 's'
1193
1194             if sanitize:
1195                 if fmt[-1] == 'r':
1196                     # If value is an object, sanitize might convert it to a string
1197                     # So we convert it to repr first
1198                     value, fmt = repr(value), str_fmt
1199                 if fmt[-1] in 'csr':
1200                     value = sanitizer(initial_field, value)
1201
1202             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1203             TMPL_DICT[key] = value
1204             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1205
1206         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1207
1208     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1209         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1210         return self.escape_outtmpl(outtmpl) % info_dict
1211
1212     def _prepare_filename(self, info_dict, tmpl_type='default'):
1213         try:
1214             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1215             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1216
1217             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1218             if filename and force_ext is not None:
1219                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1220
1221             # https://github.com/blackjack4494/youtube-dlc/issues/85
1222             trim_file_name = self.params.get('trim_file_name', False)
1223             if trim_file_name:
1224                 no_ext, *ext = filename.rsplit('.', 2)
1225                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1226
1227             return filename
1228         except ValueError as err:
1229             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1230             return None
1231
1232     def prepare_filename(self, info_dict, dir_type='', warn=False):
1233         """Generate the output filename."""
1234
1235         filename = self._prepare_filename(info_dict, dir_type or 'default')
1236         if not filename and dir_type not in ('', 'temp'):
1237             return ''
1238
1239         if warn:
1240             if not self.params.get('paths'):
1241                 pass
1242             elif filename == '-':
1243                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1244             elif os.path.isabs(filename):
1245                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1246         if filename == '-' or not filename:
1247             return filename
1248
1249         return self.get_output_path(dir_type, filename)
1250
1251     def _match_entry(self, info_dict, incomplete=False, silent=False):
1252         """ Returns None if the file should be downloaded """
1253
1254         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1255
1256         def check_filter():
1257             if 'title' in info_dict:
1258                 # This can happen when we're just evaluating the playlist
1259                 title = info_dict['title']
1260                 matchtitle = self.params.get('matchtitle', False)
1261                 if matchtitle:
1262                     if not re.search(matchtitle, title, re.IGNORECASE):
1263                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1264                 rejecttitle = self.params.get('rejecttitle', False)
1265                 if rejecttitle:
1266                     if re.search(rejecttitle, title, re.IGNORECASE):
1267                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1268             date = info_dict.get('upload_date')
1269             if date is not None:
1270                 dateRange = self.params.get('daterange', DateRange())
1271                 if date not in dateRange:
1272                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1273             view_count = info_dict.get('view_count')
1274             if view_count is not None:
1275                 min_views = self.params.get('min_views')
1276                 if min_views is not None and view_count < min_views:
1277                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1278                 max_views = self.params.get('max_views')
1279                 if max_views is not None and view_count > max_views:
1280                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1281             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1282                 return 'Skipping "%s" because it is age restricted' % video_title
1283
1284             match_filter = self.params.get('match_filter')
1285             if match_filter is not None:
1286                 try:
1287                     ret = match_filter(info_dict, incomplete=incomplete)
1288                 except TypeError:
1289                     # For backward compatibility
1290                     ret = None if incomplete else match_filter(info_dict)
1291                 if ret is not None:
1292                     return ret
1293             return None
1294
1295         if self.in_download_archive(info_dict):
1296             reason = '%s has already been recorded in the archive' % video_title
1297             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1298         else:
1299             reason = check_filter()
1300             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1301         if reason is not None:
1302             if not silent:
1303                 self.to_screen('[download] ' + reason)
1304             if self.params.get(break_opt, False):
1305                 raise break_err()
1306         return reason
1307
1308     @staticmethod
1309     def add_extra_info(info_dict, extra_info):
1310         '''Set the keys from extra_info in info dict if they are missing'''
1311         for key, value in extra_info.items():
1312             info_dict.setdefault(key, value)
1313
1314     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1315                      process=True, force_generic_extractor=False):
1316         """
1317         Return a list with a dictionary for each video extracted.
1318
1319         Arguments:
1320         url -- URL to extract
1321
1322         Keyword arguments:
1323         download -- whether to download videos during extraction
1324         ie_key -- extractor key hint
1325         extra_info -- dictionary containing the extra values to add to each result
1326         process -- whether to resolve all unresolved references (URLs, playlist items),
1327             must be True for download to work.
1328         force_generic_extractor -- force using the generic extractor
1329         """
1330
1331         if extra_info is None:
1332             extra_info = {}
1333
1334         if not ie_key and force_generic_extractor:
1335             ie_key = 'Generic'
1336
1337         if ie_key:
1338             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1339         else:
1340             ies = self._ies
1341
1342         for ie_key, ie in ies.items():
1343             if not ie.suitable(url):
1344                 continue
1345
1346             if not ie.working():
1347                 self.report_warning('The program functionality for this site has been marked as broken, '
1348                                     'and will probably not work.')
1349
1350             temp_id = ie.get_temp_id(url)
1351             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1352                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1353                 if self.params.get('break_on_existing', False):
1354                     raise ExistingVideoReached()
1355                 break
1356             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1357         else:
1358             self.report_error('no suitable InfoExtractor for URL %s' % url)
1359
1360     def __handle_extraction_exceptions(func):
1361         @functools.wraps(func)
1362         def wrapper(self, *args, **kwargs):
1363             while True:
1364                 try:
1365                     return func(self, *args, **kwargs)
1366                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1367                     raise
1368                 except ReExtractInfo as e:
1369                     if e.expected:
1370                         self.to_screen(f'{e}; Re-extracting data')
1371                     else:
1372                         self.to_stderr('\r')
1373                         self.report_warning(f'{e}; Re-extracting data')
1374                     continue
1375                 except GeoRestrictedError as e:
1376                     msg = e.msg
1377                     if e.countries:
1378                         msg += '\nThis video is available in %s.' % ', '.join(
1379                             map(ISO3166Utils.short2full, e.countries))
1380                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1381                     self.report_error(msg)
1382                 except ExtractorError as e:  # An error we somewhat expected
1383                     self.report_error(str(e), e.format_traceback())
1384                 except Exception as e:
1385                     if self.params.get('ignoreerrors'):
1386                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1387                     else:
1388                         raise
1389                 break
1390         return wrapper
1391
1392     def _wait_for_video(self, ie_result):
1393         if (not self.params.get('wait_for_video')
1394                 or ie_result.get('_type', 'video') != 'video'
1395                 or ie_result.get('formats') or ie_result.get('url')):
1396             return
1397
1398         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1399         last_msg = ''
1400
1401         def progress(msg):
1402             nonlocal last_msg
1403             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1404             last_msg = msg
1405
1406         min_wait, max_wait = self.params.get('wait_for_video')
1407         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1408         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1409             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1410             self.report_warning('Release time of video is not known')
1411         elif (diff or 0) <= 0:
1412             self.report_warning('Video should already be available according to extracted info')
1413         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1414         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1415
1416         wait_till = time.time() + diff
1417         try:
1418             while True:
1419                 diff = wait_till - time.time()
1420                 if diff <= 0:
1421                     progress('')
1422                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1423                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1424                 time.sleep(1)
1425         except KeyboardInterrupt:
1426             progress('')
1427             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1428         except BaseException as e:
1429             if not isinstance(e, ReExtractInfo):
1430                 self.to_screen('')
1431             raise
1432
1433     @__handle_extraction_exceptions
1434     def __extract_info(self, url, ie, download, extra_info, process):
1435         ie_result = ie.extract(url)
1436         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1437             return
1438         if isinstance(ie_result, list):
1439             # Backwards compatibility: old IE result format
1440             ie_result = {
1441                 '_type': 'compat_list',
1442                 'entries': ie_result,
1443             }
1444         if extra_info.get('original_url'):
1445             ie_result.setdefault('original_url', extra_info['original_url'])
1446         self.add_default_extra_info(ie_result, ie, url)
1447         if process:
1448             self._wait_for_video(ie_result)
1449             return self.process_ie_result(ie_result, download, extra_info)
1450         else:
1451             return ie_result
1452
1453     def add_default_extra_info(self, ie_result, ie, url):
1454         if url is not None:
1455             self.add_extra_info(ie_result, {
1456                 'webpage_url': url,
1457                 'original_url': url,
1458                 'webpage_url_basename': url_basename(url),
1459                 'webpage_url_domain': get_domain(url),
1460             })
1461         if ie is not None:
1462             self.add_extra_info(ie_result, {
1463                 'extractor': ie.IE_NAME,
1464                 'extractor_key': ie.ie_key(),
1465             })
1466
1467     def process_ie_result(self, ie_result, download=True, extra_info=None):
1468         """
1469         Take the result of the ie(may be modified) and resolve all unresolved
1470         references (URLs, playlist items).
1471
1472         It will also download the videos if 'download'.
1473         Returns the resolved ie_result.
1474         """
1475         if extra_info is None:
1476             extra_info = {}
1477         result_type = ie_result.get('_type', 'video')
1478
1479         if result_type in ('url', 'url_transparent'):
1480             ie_result['url'] = sanitize_url(ie_result['url'])
1481             if ie_result.get('original_url'):
1482                 extra_info.setdefault('original_url', ie_result['original_url'])
1483
1484             extract_flat = self.params.get('extract_flat', False)
1485             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1486                     or extract_flat is True):
1487                 info_copy = ie_result.copy()
1488                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1489                 if ie and not ie_result.get('id'):
1490                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1491                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1492                 self.add_extra_info(info_copy, extra_info)
1493                 info_copy, _ = self.pre_process(info_copy)
1494                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1495                 if self.params.get('force_write_download_archive', False):
1496                     self.record_download_archive(info_copy)
1497                 return ie_result
1498
1499         if result_type == 'video':
1500             self.add_extra_info(ie_result, extra_info)
1501             ie_result = self.process_video_result(ie_result, download=download)
1502             additional_urls = (ie_result or {}).get('additional_urls')
1503             if additional_urls:
1504                 # TODO: Improve MetadataParserPP to allow setting a list
1505                 if isinstance(additional_urls, compat_str):
1506                     additional_urls = [additional_urls]
1507                 self.to_screen(
1508                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1509                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1510                 ie_result['additional_entries'] = [
1511                     self.extract_info(
1512                         url, download, extra_info=extra_info,
1513                         force_generic_extractor=self.params.get('force_generic_extractor'))
1514                     for url in additional_urls
1515                 ]
1516             return ie_result
1517         elif result_type == 'url':
1518             # We have to add extra_info to the results because it may be
1519             # contained in a playlist
1520             return self.extract_info(
1521                 ie_result['url'], download,
1522                 ie_key=ie_result.get('ie_key'),
1523                 extra_info=extra_info)
1524         elif result_type == 'url_transparent':
1525             # Use the information from the embedding page
1526             info = self.extract_info(
1527                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1528                 extra_info=extra_info, download=False, process=False)
1529
1530             # extract_info may return None when ignoreerrors is enabled and
1531             # extraction failed with an error, don't crash and return early
1532             # in this case
1533             if not info:
1534                 return info
1535
1536             force_properties = dict(
1537                 (k, v) for k, v in ie_result.items() if v is not None)
1538             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1539                 if f in force_properties:
1540                     del force_properties[f]
1541             new_result = info.copy()
1542             new_result.update(force_properties)
1543
1544             # Extracted info may not be a video result (i.e.
1545             # info.get('_type', 'video') != video) but rather an url or
1546             # url_transparent. In such cases outer metadata (from ie_result)
1547             # should be propagated to inner one (info). For this to happen
1548             # _type of info should be overridden with url_transparent. This
1549             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1550             if new_result.get('_type') == 'url':
1551                 new_result['_type'] = 'url_transparent'
1552
1553             return self.process_ie_result(
1554                 new_result, download=download, extra_info=extra_info)
1555         elif result_type in ('playlist', 'multi_video'):
1556             # Protect from infinite recursion due to recursively nested playlists
1557             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1558             webpage_url = ie_result['webpage_url']
1559             if webpage_url in self._playlist_urls:
1560                 self.to_screen(
1561                     '[download] Skipping already downloaded playlist: %s'
1562                     % ie_result.get('title') or ie_result.get('id'))
1563                 return
1564
1565             self._playlist_level += 1
1566             self._playlist_urls.add(webpage_url)
1567             self._sanitize_thumbnails(ie_result)
1568             try:
1569                 return self.__process_playlist(ie_result, download)
1570             finally:
1571                 self._playlist_level -= 1
1572                 if not self._playlist_level:
1573                     self._playlist_urls.clear()
1574         elif result_type == 'compat_list':
1575             self.report_warning(
1576                 'Extractor %s returned a compat_list result. '
1577                 'It needs to be updated.' % ie_result.get('extractor'))
1578
1579             def _fixup(r):
1580                 self.add_extra_info(r, {
1581                     'extractor': ie_result['extractor'],
1582                     'webpage_url': ie_result['webpage_url'],
1583                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1584                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1585                     'extractor_key': ie_result['extractor_key'],
1586                 })
1587                 return r
1588             ie_result['entries'] = [
1589                 self.process_ie_result(_fixup(r), download, extra_info)
1590                 for r in ie_result['entries']
1591             ]
1592             return ie_result
1593         else:
1594             raise Exception('Invalid result type: %s' % result_type)
1595
1596     def _ensure_dir_exists(self, path):
1597         return make_dir(path, self.report_error)
1598
1599     def __process_playlist(self, ie_result, download):
1600         # We process each entry in the playlist
1601         playlist = ie_result.get('title') or ie_result.get('id')
1602         self.to_screen('[download] Downloading playlist: %s' % playlist)
1603
1604         if 'entries' not in ie_result:
1605             raise EntryNotInPlaylist('There are no entries')
1606
1607         MissingEntry = object()
1608         incomplete_entries = bool(ie_result.get('requested_entries'))
1609         if incomplete_entries:
1610             def fill_missing_entries(entries, indices):
1611                 ret = [MissingEntry] * max(indices)
1612                 for i, entry in zip(indices, entries):
1613                     ret[i - 1] = entry
1614                 return ret
1615             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1616
1617         playlist_results = []
1618
1619         playliststart = self.params.get('playliststart', 1)
1620         playlistend = self.params.get('playlistend')
1621         # For backwards compatibility, interpret -1 as whole list
1622         if playlistend == -1:
1623             playlistend = None
1624
1625         playlistitems_str = self.params.get('playlist_items')
1626         playlistitems = None
1627         if playlistitems_str is not None:
1628             def iter_playlistitems(format):
1629                 for string_segment in format.split(','):
1630                     if '-' in string_segment:
1631                         start, end = string_segment.split('-')
1632                         for item in range(int(start), int(end) + 1):
1633                             yield int(item)
1634                     else:
1635                         yield int(string_segment)
1636             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1637
1638         ie_entries = ie_result['entries']
1639         msg = (
1640             'Downloading %d videos' if not isinstance(ie_entries, list)
1641             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1642
1643         if isinstance(ie_entries, list):
1644             def get_entry(i):
1645                 return ie_entries[i - 1]
1646         else:
1647             if not isinstance(ie_entries, (PagedList, LazyList)):
1648                 ie_entries = LazyList(ie_entries)
1649
1650             def get_entry(i):
1651                 return YoutubeDL.__handle_extraction_exceptions(
1652                     lambda self, i: ie_entries[i - 1]
1653                 )(self, i)
1654
1655         entries = []
1656         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1657         for i in items:
1658             if i == 0:
1659                 continue
1660             if playlistitems is None and playlistend is not None and playlistend < i:
1661                 break
1662             entry = None
1663             try:
1664                 entry = get_entry(i)
1665                 if entry is MissingEntry:
1666                     raise EntryNotInPlaylist()
1667             except (IndexError, EntryNotInPlaylist):
1668                 if incomplete_entries:
1669                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1670                 elif not playlistitems:
1671                     break
1672             entries.append(entry)
1673             try:
1674                 if entry is not None:
1675                     self._match_entry(entry, incomplete=True, silent=True)
1676             except (ExistingVideoReached, RejectedVideoReached):
1677                 break
1678         ie_result['entries'] = entries
1679
1680         # Save playlist_index before re-ordering
1681         entries = [
1682             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1683             for i, entry in enumerate(entries, 1)
1684             if entry is not None]
1685         n_entries = len(entries)
1686
1687         if not playlistitems and (playliststart != 1 or playlistend):
1688             playlistitems = list(range(playliststart, playliststart + n_entries))
1689         ie_result['requested_entries'] = playlistitems
1690
1691         _infojson_written = False
1692         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1693             ie_copy = {
1694                 'playlist': playlist,
1695                 'playlist_id': ie_result.get('id'),
1696                 'playlist_title': ie_result.get('title'),
1697                 'playlist_uploader': ie_result.get('uploader'),
1698                 'playlist_uploader_id': ie_result.get('uploader_id'),
1699                 'playlist_index': 0,
1700                 'n_entries': n_entries,
1701             }
1702             ie_copy.update(dict(ie_result))
1703
1704             _infojson_written = self._write_info_json(
1705                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1706             if _infojson_written is None:
1707                 return
1708             if self._write_description('playlist', ie_result,
1709                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1710                 return
1711             # TODO: This should be passed to ThumbnailsConvertor if necessary
1712             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1713
1714         if self.params.get('playlistreverse', False):
1715             entries = entries[::-1]
1716         if self.params.get('playlistrandom', False):
1717             random.shuffle(entries)
1718
1719         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1720
1721         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1722         failures = 0
1723         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1724         for i, entry_tuple in enumerate(entries, 1):
1725             playlist_index, entry = entry_tuple
1726             if 'playlist-index' in self.params.get('compat_opts', []):
1727                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1728             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1729             # This __x_forwarded_for_ip thing is a bit ugly but requires
1730             # minimal changes
1731             if x_forwarded_for:
1732                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1733             extra = {
1734                 'n_entries': n_entries,
1735                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1736                 'playlist_index': playlist_index,
1737                 'playlist_autonumber': i,
1738                 'playlist': playlist,
1739                 'playlist_id': ie_result.get('id'),
1740                 'playlist_title': ie_result.get('title'),
1741                 'playlist_uploader': ie_result.get('uploader'),
1742                 'playlist_uploader_id': ie_result.get('uploader_id'),
1743                 'extractor': ie_result['extractor'],
1744                 'webpage_url': ie_result['webpage_url'],
1745                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1746                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1747                 'extractor_key': ie_result['extractor_key'],
1748             }
1749
1750             if self._match_entry(entry, incomplete=True) is not None:
1751                 continue
1752
1753             entry_result = self.__process_iterable_entry(entry, download, extra)
1754             if not entry_result:
1755                 failures += 1
1756             if failures >= max_failures:
1757                 self.report_error(
1758                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1759                 break
1760             playlist_results.append(entry_result)
1761         ie_result['entries'] = playlist_results
1762
1763         # Write the updated info to json
1764         if _infojson_written and self._write_info_json(
1765                 'updated playlist', ie_result,
1766                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1767             return
1768
1769         for tmpl in self.params['forceprint'].get('playlist', []):
1770             self._forceprint(tmpl, ie_result)
1771
1772         for pp in self._pps['playlist']:
1773             ie_result = self.run_pp(pp, ie_result)
1774
1775         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1776         return ie_result
1777
1778     @__handle_extraction_exceptions
1779     def __process_iterable_entry(self, entry, download, extra_info):
1780         return self.process_ie_result(
1781             entry, download=download, extra_info=extra_info)
1782
1783     def _build_format_filter(self, filter_spec):
1784         " Returns a function to filter the formats according to the filter_spec "
1785
1786         OPERATORS = {
1787             '<': operator.lt,
1788             '<=': operator.le,
1789             '>': operator.gt,
1790             '>=': operator.ge,
1791             '=': operator.eq,
1792             '!=': operator.ne,
1793         }
1794         operator_rex = re.compile(r'''(?x)\s*
1795             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1796             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1797             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1798             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1799         m = operator_rex.fullmatch(filter_spec)
1800         if m:
1801             try:
1802                 comparison_value = int(m.group('value'))
1803             except ValueError:
1804                 comparison_value = parse_filesize(m.group('value'))
1805                 if comparison_value is None:
1806                     comparison_value = parse_filesize(m.group('value') + 'B')
1807                 if comparison_value is None:
1808                     raise ValueError(
1809                         'Invalid value %r in format specification %r' % (
1810                             m.group('value'), filter_spec))
1811             op = OPERATORS[m.group('op')]
1812
1813         if not m:
1814             STR_OPERATORS = {
1815                 '=': operator.eq,
1816                 '^=': lambda attr, value: attr.startswith(value),
1817                 '$=': lambda attr, value: attr.endswith(value),
1818                 '*=': lambda attr, value: value in attr,
1819             }
1820             str_operator_rex = re.compile(r'''(?x)\s*
1821                 (?P<key>[a-zA-Z0-9._-]+)\s*
1822                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1823                 (?P<value>[a-zA-Z0-9._-]+)\s*
1824                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1825             m = str_operator_rex.fullmatch(filter_spec)
1826             if m:
1827                 comparison_value = m.group('value')
1828                 str_op = STR_OPERATORS[m.group('op')]
1829                 if m.group('negation'):
1830                     op = lambda attr, value: not str_op(attr, value)
1831                 else:
1832                     op = str_op
1833
1834         if not m:
1835             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1836
1837         def _filter(f):
1838             actual_value = f.get(m.group('key'))
1839             if actual_value is None:
1840                 return m.group('none_inclusive')
1841             return op(actual_value, comparison_value)
1842         return _filter
1843
1844     def _check_formats(self, formats):
1845         for f in formats:
1846             self.to_screen('[info] Testing format %s' % f['format_id'])
1847             path = self.get_output_path('temp')
1848             if not self._ensure_dir_exists(f'{path}/'):
1849                 continue
1850             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1851             temp_file.close()
1852             try:
1853                 success, _ = self.dl(temp_file.name, f, test=True)
1854             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1855                 success = False
1856             finally:
1857                 if os.path.exists(temp_file.name):
1858                     try:
1859                         os.remove(temp_file.name)
1860                     except OSError:
1861                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1862             if success:
1863                 yield f
1864             else:
1865                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1866
1867     def _default_format_spec(self, info_dict, download=True):
1868
1869         def can_merge():
1870             merger = FFmpegMergerPP(self)
1871             return merger.available and merger.can_merge()
1872
1873         prefer_best = (
1874             not self.params.get('simulate')
1875             and download
1876             and (
1877                 not can_merge()
1878                 or info_dict.get('is_live', False)
1879                 or self.outtmpl_dict['default'] == '-'))
1880         compat = (
1881             prefer_best
1882             or self.params.get('allow_multiple_audio_streams', False)
1883             or 'format-spec' in self.params.get('compat_opts', []))
1884
1885         return (
1886             'best/bestvideo+bestaudio' if prefer_best
1887             else 'bestvideo*+bestaudio/best' if not compat
1888             else 'bestvideo+bestaudio/best')
1889
1890     def build_format_selector(self, format_spec):
1891         def syntax_error(note, start):
1892             message = (
1893                 'Invalid format specification: '
1894                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1895             return SyntaxError(message)
1896
1897         PICKFIRST = 'PICKFIRST'
1898         MERGE = 'MERGE'
1899         SINGLE = 'SINGLE'
1900         GROUP = 'GROUP'
1901         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1902
1903         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1904                                   'video': self.params.get('allow_multiple_video_streams', False)}
1905
1906         check_formats = self.params.get('check_formats') == 'selected'
1907
1908         def _parse_filter(tokens):
1909             filter_parts = []
1910             for type, string, start, _, _ in tokens:
1911                 if type == tokenize.OP and string == ']':
1912                     return ''.join(filter_parts)
1913                 else:
1914                     filter_parts.append(string)
1915
1916         def _remove_unused_ops(tokens):
1917             # Remove operators that we don't use and join them with the surrounding strings
1918             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1919             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1920             last_string, last_start, last_end, last_line = None, None, None, None
1921             for type, string, start, end, line in tokens:
1922                 if type == tokenize.OP and string == '[':
1923                     if last_string:
1924                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1925                         last_string = None
1926                     yield type, string, start, end, line
1927                     # everything inside brackets will be handled by _parse_filter
1928                     for type, string, start, end, line in tokens:
1929                         yield type, string, start, end, line
1930                         if type == tokenize.OP and string == ']':
1931                             break
1932                 elif type == tokenize.OP and string in ALLOWED_OPS:
1933                     if last_string:
1934                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1935                         last_string = None
1936                     yield type, string, start, end, line
1937                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1938                     if not last_string:
1939                         last_string = string
1940                         last_start = start
1941                         last_end = end
1942                     else:
1943                         last_string += string
1944             if last_string:
1945                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1946
1947         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1948             selectors = []
1949             current_selector = None
1950             for type, string, start, _, _ in tokens:
1951                 # ENCODING is only defined in python 3.x
1952                 if type == getattr(tokenize, 'ENCODING', None):
1953                     continue
1954                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1955                     current_selector = FormatSelector(SINGLE, string, [])
1956                 elif type == tokenize.OP:
1957                     if string == ')':
1958                         if not inside_group:
1959                             # ')' will be handled by the parentheses group
1960                             tokens.restore_last_token()
1961                         break
1962                     elif inside_merge and string in ['/', ',']:
1963                         tokens.restore_last_token()
1964                         break
1965                     elif inside_choice and string == ',':
1966                         tokens.restore_last_token()
1967                         break
1968                     elif string == ',':
1969                         if not current_selector:
1970                             raise syntax_error('"," must follow a format selector', start)
1971                         selectors.append(current_selector)
1972                         current_selector = None
1973                     elif string == '/':
1974                         if not current_selector:
1975                             raise syntax_error('"/" must follow a format selector', start)
1976                         first_choice = current_selector
1977                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1978                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1979                     elif string == '[':
1980                         if not current_selector:
1981                             current_selector = FormatSelector(SINGLE, 'best', [])
1982                         format_filter = _parse_filter(tokens)
1983                         current_selector.filters.append(format_filter)
1984                     elif string == '(':
1985                         if current_selector:
1986                             raise syntax_error('Unexpected "("', start)
1987                         group = _parse_format_selection(tokens, inside_group=True)
1988                         current_selector = FormatSelector(GROUP, group, [])
1989                     elif string == '+':
1990                         if not current_selector:
1991                             raise syntax_error('Unexpected "+"', start)
1992                         selector_1 = current_selector
1993                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1994                         if not selector_2:
1995                             raise syntax_error('Expected a selector', start)
1996                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1997                     else:
1998                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1999                 elif type == tokenize.ENDMARKER:
2000                     break
2001             if current_selector:
2002                 selectors.append(current_selector)
2003             return selectors
2004
2005         def _merge(formats_pair):
2006             format_1, format_2 = formats_pair
2007
2008             formats_info = []
2009             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2010             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2011
2012             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2013                 get_no_more = {'video': False, 'audio': False}
2014                 for (i, fmt_info) in enumerate(formats_info):
2015                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2016                         formats_info.pop(i)
2017                         continue
2018                     for aud_vid in ['audio', 'video']:
2019                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2020                             if get_no_more[aud_vid]:
2021                                 formats_info.pop(i)
2022                                 break
2023                             get_no_more[aud_vid] = True
2024
2025             if len(formats_info) == 1:
2026                 return formats_info[0]
2027
2028             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2029             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2030
2031             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2032             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2033
2034             output_ext = self.params.get('merge_output_format')
2035             if not output_ext:
2036                 if the_only_video:
2037                     output_ext = the_only_video['ext']
2038                 elif the_only_audio and not video_fmts:
2039                     output_ext = the_only_audio['ext']
2040                 else:
2041                     output_ext = 'mkv'
2042
2043             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2044
2045             new_dict = {
2046                 'requested_formats': formats_info,
2047                 'format': '+'.join(filtered('format')),
2048                 'format_id': '+'.join(filtered('format_id')),
2049                 'ext': output_ext,
2050                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2051                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2052                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2053                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2054                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2055             }
2056
2057             if the_only_video:
2058                 new_dict.update({
2059                     'width': the_only_video.get('width'),
2060                     'height': the_only_video.get('height'),
2061                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2062                     'fps': the_only_video.get('fps'),
2063                     'dynamic_range': the_only_video.get('dynamic_range'),
2064                     'vcodec': the_only_video.get('vcodec'),
2065                     'vbr': the_only_video.get('vbr'),
2066                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2067                 })
2068
2069             if the_only_audio:
2070                 new_dict.update({
2071                     'acodec': the_only_audio.get('acodec'),
2072                     'abr': the_only_audio.get('abr'),
2073                     'asr': the_only_audio.get('asr'),
2074                 })
2075
2076             return new_dict
2077
2078         def _check_formats(formats):
2079             if not check_formats:
2080                 yield from formats
2081                 return
2082             yield from self._check_formats(formats)
2083
2084         def _build_selector_function(selector):
2085             if isinstance(selector, list):  # ,
2086                 fs = [_build_selector_function(s) for s in selector]
2087
2088                 def selector_function(ctx):
2089                     for f in fs:
2090                         yield from f(ctx)
2091                 return selector_function
2092
2093             elif selector.type == GROUP:  # ()
2094                 selector_function = _build_selector_function(selector.selector)
2095
2096             elif selector.type == PICKFIRST:  # /
2097                 fs = [_build_selector_function(s) for s in selector.selector]
2098
2099                 def selector_function(ctx):
2100                     for f in fs:
2101                         picked_formats = list(f(ctx))
2102                         if picked_formats:
2103                             return picked_formats
2104                     return []
2105
2106             elif selector.type == MERGE:  # +
2107                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2108
2109                 def selector_function(ctx):
2110                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2111                         yield _merge(pair)
2112
2113             elif selector.type == SINGLE:  # atom
2114                 format_spec = selector.selector or 'best'
2115
2116                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2117                 if format_spec == 'all':
2118                     def selector_function(ctx):
2119                         yield from _check_formats(ctx['formats'][::-1])
2120                 elif format_spec == 'mergeall':
2121                     def selector_function(ctx):
2122                         formats = list(_check_formats(ctx['formats']))
2123                         if not formats:
2124                             return
2125                         merged_format = formats[-1]
2126                         for f in formats[-2::-1]:
2127                             merged_format = _merge((merged_format, f))
2128                         yield merged_format
2129
2130                 else:
2131                     format_fallback, format_reverse, format_idx = False, True, 1
2132                     mobj = re.match(
2133                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2134                         format_spec)
2135                     if mobj is not None:
2136                         format_idx = int_or_none(mobj.group('n'), default=1)
2137                         format_reverse = mobj.group('bw')[0] == 'b'
2138                         format_type = (mobj.group('type') or [None])[0]
2139                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2140                         format_modified = mobj.group('mod') is not None
2141
2142                         format_fallback = not format_type and not format_modified  # for b, w
2143                         _filter_f = (
2144                             (lambda f: f.get('%scodec' % format_type) != 'none')
2145                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2146                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2147                             if format_type  # bv, ba, wv, wa
2148                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2149                             if not format_modified  # b, w
2150                             else lambda f: True)  # b*, w*
2151                         filter_f = lambda f: _filter_f(f) and (
2152                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2153                     else:
2154                         if format_spec in self._format_selection_exts['audio']:
2155                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2156                         elif format_spec in self._format_selection_exts['video']:
2157                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2158                         elif format_spec in self._format_selection_exts['storyboards']:
2159                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2160                         else:
2161                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2162
2163                     def selector_function(ctx):
2164                         formats = list(ctx['formats'])
2165                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2166                         if format_fallback and ctx['incomplete_formats'] and not matches:
2167                             # for extractors with incomplete formats (audio only (soundcloud)
2168                             # or video only (imgur)) best/worst will fallback to
2169                             # best/worst {video,audio}-only format
2170                             matches = formats
2171                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2172                         try:
2173                             yield matches[format_idx - 1]
2174                         except IndexError:
2175                             return
2176
2177             filters = [self._build_format_filter(f) for f in selector.filters]
2178
2179             def final_selector(ctx):
2180                 ctx_copy = dict(ctx)
2181                 for _filter in filters:
2182                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2183                 return selector_function(ctx_copy)
2184             return final_selector
2185
2186         stream = io.BytesIO(format_spec.encode('utf-8'))
2187         try:
2188             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2189         except tokenize.TokenError:
2190             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2191
2192         class TokenIterator(object):
2193             def __init__(self, tokens):
2194                 self.tokens = tokens
2195                 self.counter = 0
2196
2197             def __iter__(self):
2198                 return self
2199
2200             def __next__(self):
2201                 if self.counter >= len(self.tokens):
2202                     raise StopIteration()
2203                 value = self.tokens[self.counter]
2204                 self.counter += 1
2205                 return value
2206
2207             next = __next__
2208
2209             def restore_last_token(self):
2210                 self.counter -= 1
2211
2212         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2213         return _build_selector_function(parsed_selector)
2214
2215     def _calc_headers(self, info_dict):
2216         res = std_headers.copy()
2217
2218         add_headers = info_dict.get('http_headers')
2219         if add_headers:
2220             res.update(add_headers)
2221
2222         cookies = self._calc_cookies(info_dict)
2223         if cookies:
2224             res['Cookie'] = cookies
2225
2226         if 'X-Forwarded-For' not in res:
2227             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2228             if x_forwarded_for_ip:
2229                 res['X-Forwarded-For'] = x_forwarded_for_ip
2230
2231         return res
2232
2233     def _calc_cookies(self, info_dict):
2234         pr = sanitized_Request(info_dict['url'])
2235         self.cookiejar.add_cookie_header(pr)
2236         return pr.get_header('Cookie')
2237
2238     def _sort_thumbnails(self, thumbnails):
2239         thumbnails.sort(key=lambda t: (
2240             t.get('preference') if t.get('preference') is not None else -1,
2241             t.get('width') if t.get('width') is not None else -1,
2242             t.get('height') if t.get('height') is not None else -1,
2243             t.get('id') if t.get('id') is not None else '',
2244             t.get('url')))
2245
2246     def _sanitize_thumbnails(self, info_dict):
2247         thumbnails = info_dict.get('thumbnails')
2248         if thumbnails is None:
2249             thumbnail = info_dict.get('thumbnail')
2250             if thumbnail:
2251                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2252         if not thumbnails:
2253             return
2254
2255         def check_thumbnails(thumbnails):
2256             for t in thumbnails:
2257                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2258                 try:
2259                     self.urlopen(HEADRequest(t['url']))
2260                 except network_exceptions as err:
2261                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2262                     continue
2263                 yield t
2264
2265         self._sort_thumbnails(thumbnails)
2266         for i, t in enumerate(thumbnails):
2267             if t.get('id') is None:
2268                 t['id'] = '%d' % i
2269             if t.get('width') and t.get('height'):
2270                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2271             t['url'] = sanitize_url(t['url'])
2272
2273         if self.params.get('check_formats') is True:
2274             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2275         else:
2276             info_dict['thumbnails'] = thumbnails
2277
2278     def process_video_result(self, info_dict, download=True):
2279         assert info_dict.get('_type', 'video') == 'video'
2280         self._num_videos += 1
2281
2282         if 'id' not in info_dict:
2283             raise ExtractorError('Missing "id" field in extractor result')
2284         if 'title' not in info_dict:
2285             raise ExtractorError('Missing "title" field in extractor result',
2286                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2287
2288         def report_force_conversion(field, field_not, conversion):
2289             self.report_warning(
2290                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2291                 % (field, field_not, conversion))
2292
2293         def sanitize_string_field(info, string_field):
2294             field = info.get(string_field)
2295             if field is None or isinstance(field, compat_str):
2296                 return
2297             report_force_conversion(string_field, 'a string', 'string')
2298             info[string_field] = compat_str(field)
2299
2300         def sanitize_numeric_fields(info):
2301             for numeric_field in self._NUMERIC_FIELDS:
2302                 field = info.get(numeric_field)
2303                 if field is None or isinstance(field, compat_numeric_types):
2304                     continue
2305                 report_force_conversion(numeric_field, 'numeric', 'int')
2306                 info[numeric_field] = int_or_none(field)
2307
2308         sanitize_string_field(info_dict, 'id')
2309         sanitize_numeric_fields(info_dict)
2310
2311         if 'playlist' not in info_dict:
2312             # It isn't part of a playlist
2313             info_dict['playlist'] = None
2314             info_dict['playlist_index'] = None
2315
2316         self._sanitize_thumbnails(info_dict)
2317
2318         thumbnail = info_dict.get('thumbnail')
2319         thumbnails = info_dict.get('thumbnails')
2320         if thumbnail:
2321             info_dict['thumbnail'] = sanitize_url(thumbnail)
2322         elif thumbnails:
2323             info_dict['thumbnail'] = thumbnails[-1]['url']
2324
2325         if info_dict.get('display_id') is None and 'id' in info_dict:
2326             info_dict['display_id'] = info_dict['id']
2327
2328         if info_dict.get('duration') is not None:
2329             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2330
2331         for ts_key, date_key in (
2332                 ('timestamp', 'upload_date'),
2333                 ('release_timestamp', 'release_date'),
2334         ):
2335             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2336                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2337                 # see http://bugs.python.org/issue1646728)
2338                 try:
2339                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2340                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2341                 except (ValueError, OverflowError, OSError):
2342                     pass
2343
2344         live_keys = ('is_live', 'was_live')
2345         live_status = info_dict.get('live_status')
2346         if live_status is None:
2347             for key in live_keys:
2348                 if info_dict.get(key) is False:
2349                     continue
2350                 if info_dict.get(key):
2351                     live_status = key
2352                 break
2353             if all(info_dict.get(key) is False for key in live_keys):
2354                 live_status = 'not_live'
2355         if live_status:
2356             info_dict['live_status'] = live_status
2357             for key in live_keys:
2358                 if info_dict.get(key) is None:
2359                     info_dict[key] = (live_status == key)
2360
2361         # Auto generate title fields corresponding to the *_number fields when missing
2362         # in order to always have clean titles. This is very common for TV series.
2363         for field in ('chapter', 'season', 'episode'):
2364             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2365                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2366
2367         for cc_kind in ('subtitles', 'automatic_captions'):
2368             cc = info_dict.get(cc_kind)
2369             if cc:
2370                 for _, subtitle in cc.items():
2371                     for subtitle_format in subtitle:
2372                         if subtitle_format.get('url'):
2373                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2374                         if subtitle_format.get('ext') is None:
2375                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2376
2377         automatic_captions = info_dict.get('automatic_captions')
2378         subtitles = info_dict.get('subtitles')
2379
2380         info_dict['requested_subtitles'] = self.process_subtitles(
2381             info_dict['id'], subtitles, automatic_captions)
2382
2383         if info_dict.get('formats') is None:
2384             # There's only one format available
2385             formats = [info_dict]
2386         else:
2387             formats = info_dict['formats']
2388
2389         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2390         if not self.params.get('allow_unplayable_formats'):
2391             formats = [f for f in formats if not f.get('has_drm')]
2392
2393         if info_dict.get('is_live'):
2394             get_from_start = bool(self.params.get('live_from_start'))
2395             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2396             if not get_from_start:
2397                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2398
2399         if not formats:
2400             self.raise_no_formats(info_dict)
2401
2402         def is_wellformed(f):
2403             url = f.get('url')
2404             if not url:
2405                 self.report_warning(
2406                     '"url" field is missing or empty - skipping format, '
2407                     'there is an error in extractor')
2408                 return False
2409             if isinstance(url, bytes):
2410                 sanitize_string_field(f, 'url')
2411             return True
2412
2413         # Filter out malformed formats for better extraction robustness
2414         formats = list(filter(is_wellformed, formats))
2415
2416         formats_dict = {}
2417
2418         # We check that all the formats have the format and format_id fields
2419         for i, format in enumerate(formats):
2420             sanitize_string_field(format, 'format_id')
2421             sanitize_numeric_fields(format)
2422             format['url'] = sanitize_url(format['url'])
2423             if not format.get('format_id'):
2424                 format['format_id'] = compat_str(i)
2425             else:
2426                 # Sanitize format_id from characters used in format selector expression
2427                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2428             format_id = format['format_id']
2429             if format_id not in formats_dict:
2430                 formats_dict[format_id] = []
2431             formats_dict[format_id].append(format)
2432
2433         # Make sure all formats have unique format_id
2434         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2435         for format_id, ambiguous_formats in formats_dict.items():
2436             ambigious_id = len(ambiguous_formats) > 1
2437             for i, format in enumerate(ambiguous_formats):
2438                 if ambigious_id:
2439                     format['format_id'] = '%s-%d' % (format_id, i)
2440                 if format.get('ext') is None:
2441                     format['ext'] = determine_ext(format['url']).lower()
2442                 # Ensure there is no conflict between id and ext in format selection
2443                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2444                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2445                     format['format_id'] = 'f%s' % format['format_id']
2446
2447         for i, format in enumerate(formats):
2448             if format.get('format') is None:
2449                 format['format'] = '{id} - {res}{note}'.format(
2450                     id=format['format_id'],
2451                     res=self.format_resolution(format),
2452                     note=format_field(format, 'format_note', ' (%s)'),
2453                 )
2454             if format.get('protocol') is None:
2455                 format['protocol'] = determine_protocol(format)
2456             if format.get('resolution') is None:
2457                 format['resolution'] = self.format_resolution(format, default=None)
2458             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2459                 format['dynamic_range'] = 'SDR'
2460             if (info_dict.get('duration') and format.get('tbr')
2461                     and not format.get('filesize') and not format.get('filesize_approx')):
2462                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2463
2464             # Add HTTP headers, so that external programs can use them from the
2465             # json output
2466             full_format_info = info_dict.copy()
2467             full_format_info.update(format)
2468             format['http_headers'] = self._calc_headers(full_format_info)
2469         # Remove private housekeeping stuff
2470         if '__x_forwarded_for_ip' in info_dict:
2471             del info_dict['__x_forwarded_for_ip']
2472
2473         # TODO Central sorting goes here
2474
2475         if self.params.get('check_formats') is True:
2476             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2477
2478         if not formats or formats[0] is not info_dict:
2479             # only set the 'formats' fields if the original info_dict list them
2480             # otherwise we end up with a circular reference, the first (and unique)
2481             # element in the 'formats' field in info_dict is info_dict itself,
2482             # which can't be exported to json
2483             info_dict['formats'] = formats
2484
2485         info_dict, _ = self.pre_process(info_dict)
2486
2487         # The pre-processors may have modified the formats
2488         formats = info_dict.get('formats', [info_dict])
2489
2490         list_only = self.params.get('simulate') is None and (
2491             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2492         interactive_format_selection = not list_only and self.format_selector == '-'
2493         if self.params.get('list_thumbnails'):
2494             self.list_thumbnails(info_dict)
2495         if self.params.get('listsubtitles'):
2496             if 'automatic_captions' in info_dict:
2497                 self.list_subtitles(
2498                     info_dict['id'], automatic_captions, 'automatic captions')
2499             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2500         if self.params.get('listformats') or interactive_format_selection:
2501             self.list_formats(info_dict)
2502         if list_only:
2503             # Without this printing, -F --print-json will not work
2504             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2505             return
2506
2507         format_selector = self.format_selector
2508         if format_selector is None:
2509             req_format = self._default_format_spec(info_dict, download=download)
2510             self.write_debug('Default format spec: %s' % req_format)
2511             format_selector = self.build_format_selector(req_format)
2512
2513         while True:
2514             if interactive_format_selection:
2515                 req_format = input(
2516                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2517                 try:
2518                     format_selector = self.build_format_selector(req_format)
2519                 except SyntaxError as err:
2520                     self.report_error(err, tb=False, is_error=False)
2521                     continue
2522
2523             # While in format selection we may need to have an access to the original
2524             # format set in order to calculate some metrics or do some processing.
2525             # For now we need to be able to guess whether original formats provided
2526             # by extractor are incomplete or not (i.e. whether extractor provides only
2527             # video-only or audio-only formats) for proper formats selection for
2528             # extractors with such incomplete formats (see
2529             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2530             # Since formats may be filtered during format selection and may not match
2531             # the original formats the results may be incorrect. Thus original formats
2532             # or pre-calculated metrics should be passed to format selection routines
2533             # as well.
2534             # We will pass a context object containing all necessary additional data
2535             # instead of just formats.
2536             # This fixes incorrect format selection issue (see
2537             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2538             incomplete_formats = (
2539                 # All formats are video-only or
2540                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2541                 # all formats are audio-only
2542                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2543
2544             ctx = {
2545                 'formats': formats,
2546                 'incomplete_formats': incomplete_formats,
2547             }
2548
2549             formats_to_download = list(format_selector(ctx))
2550             if interactive_format_selection and not formats_to_download:
2551                 self.report_error('Requested format is not available', tb=False, is_error=False)
2552                 continue
2553             break
2554
2555         if not formats_to_download:
2556             if not self.params.get('ignore_no_formats_error'):
2557                 raise ExtractorError('Requested format is not available', expected=True,
2558                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2559             self.report_warning('Requested format is not available')
2560             # Process what we can, even without any available formats.
2561             formats_to_download = [{}]
2562
2563         best_format = formats_to_download[-1]
2564         if download:
2565             if best_format:
2566                 self.to_screen(
2567                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2568                     + ', '.join([f['format_id'] for f in formats_to_download]))
2569             max_downloads_reached = False
2570             for i, fmt in enumerate(formats_to_download):
2571                 formats_to_download[i] = new_info = dict(info_dict)
2572                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2573                 new_info.update(fmt)
2574                 new_info['__original_infodict'] = info_dict
2575                 try:
2576                     self.process_info(new_info)
2577                 except MaxDownloadsReached:
2578                     max_downloads_reached = True
2579                 new_info.pop('__original_infodict')
2580                 # Remove copied info
2581                 for key, val in tuple(new_info.items()):
2582                     if info_dict.get(key) == val:
2583                         new_info.pop(key)
2584                 if max_downloads_reached:
2585                     break
2586
2587             write_archive = set(f.get('_write_download_archive', False) for f in formats_to_download)
2588             assert write_archive.issubset({True, False, 'ignore'})
2589             if True in write_archive and False not in write_archive:
2590                 self.record_download_archive(info_dict)
2591
2592             info_dict['requested_downloads'] = formats_to_download
2593             for pp in self._pps['after_video']:
2594                 info_dict = self.run_pp(pp, info_dict)
2595             if max_downloads_reached:
2596                 raise MaxDownloadsReached()
2597
2598         # We update the info dict with the selected best quality format (backwards compatibility)
2599         info_dict.update(best_format)
2600         return info_dict
2601
2602     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2603         """Select the requested subtitles and their format"""
2604         available_subs = {}
2605         if normal_subtitles and self.params.get('writesubtitles'):
2606             available_subs.update(normal_subtitles)
2607         if automatic_captions and self.params.get('writeautomaticsub'):
2608             for lang, cap_info in automatic_captions.items():
2609                 if lang not in available_subs:
2610                     available_subs[lang] = cap_info
2611
2612         if (not self.params.get('writesubtitles') and not
2613                 self.params.get('writeautomaticsub') or not
2614                 available_subs):
2615             return None
2616
2617         all_sub_langs = available_subs.keys()
2618         if self.params.get('allsubtitles', False):
2619             requested_langs = all_sub_langs
2620         elif self.params.get('subtitleslangs', False):
2621             # A list is used so that the order of languages will be the same as
2622             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2623             requested_langs = []
2624             for lang_re in self.params.get('subtitleslangs'):
2625                 if lang_re == 'all':
2626                     requested_langs.extend(all_sub_langs)
2627                     continue
2628                 discard = lang_re[0] == '-'
2629                 if discard:
2630                     lang_re = lang_re[1:]
2631                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2632                 if discard:
2633                     for lang in current_langs:
2634                         while lang in requested_langs:
2635                             requested_langs.remove(lang)
2636                 else:
2637                     requested_langs.extend(current_langs)
2638             requested_langs = orderedSet(requested_langs)
2639         elif 'en' in available_subs:
2640             requested_langs = ['en']
2641         else:
2642             requested_langs = [list(all_sub_langs)[0]]
2643         if requested_langs:
2644             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2645
2646         formats_query = self.params.get('subtitlesformat', 'best')
2647         formats_preference = formats_query.split('/') if formats_query else []
2648         subs = {}
2649         for lang in requested_langs:
2650             formats = available_subs.get(lang)
2651             if formats is None:
2652                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2653                 continue
2654             for ext in formats_preference:
2655                 if ext == 'best':
2656                     f = formats[-1]
2657                     break
2658                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2659                 if matches:
2660                     f = matches[-1]
2661                     break
2662             else:
2663                 f = formats[-1]
2664                 self.report_warning(
2665                     'No subtitle format found matching "%s" for language %s, '
2666                     'using %s' % (formats_query, lang, f['ext']))
2667             subs[lang] = f
2668         return subs
2669
2670     def _forceprint(self, tmpl, info_dict):
2671         mobj = re.match(r'\w+(=?)$', tmpl)
2672         if mobj and mobj.group(1):
2673             tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2674         elif mobj:
2675             tmpl = '%({})s'.format(tmpl)
2676         self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2677
2678     def __forced_printings(self, info_dict, filename, incomplete):
2679         def print_mandatory(field, actual_field=None):
2680             if actual_field is None:
2681                 actual_field = field
2682             if (self.params.get('force%s' % field, False)
2683                     and (not incomplete or info_dict.get(actual_field) is not None)):
2684                 self.to_stdout(info_dict[actual_field])
2685
2686         def print_optional(field):
2687             if (self.params.get('force%s' % field, False)
2688                     and info_dict.get(field) is not None):
2689                 self.to_stdout(info_dict[field])
2690
2691         info_dict = info_dict.copy()
2692         if filename is not None:
2693             info_dict['filename'] = filename
2694         if info_dict.get('requested_formats') is not None:
2695             # For RTMP URLs, also include the playpath
2696             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2697         elif 'url' in info_dict:
2698             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2699
2700         if self.params['forceprint'].get('video') or self.params.get('forcejson'):
2701             self.post_extract(info_dict)
2702         for tmpl in self.params['forceprint'].get('video', []):
2703             self._forceprint(tmpl, info_dict)
2704
2705         print_mandatory('title')
2706         print_mandatory('id')
2707         print_mandatory('url', 'urls')
2708         print_optional('thumbnail')
2709         print_optional('description')
2710         print_optional('filename')
2711         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2712             self.to_stdout(formatSeconds(info_dict['duration']))
2713         print_mandatory('format')
2714
2715         if self.params.get('forcejson'):
2716             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2717
2718     def dl(self, name, info, subtitle=False, test=False):
2719         if not info.get('url'):
2720             self.raise_no_formats(info, True)
2721
2722         if test:
2723             verbose = self.params.get('verbose')
2724             params = {
2725                 'test': True,
2726                 'quiet': self.params.get('quiet') or not verbose,
2727                 'verbose': verbose,
2728                 'noprogress': not verbose,
2729                 'nopart': True,
2730                 'skip_unavailable_fragments': False,
2731                 'keep_fragments': False,
2732                 'overwrites': True,
2733                 '_no_ytdl_file': True,
2734             }
2735         else:
2736             params = self.params
2737         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2738         if not test:
2739             for ph in self._progress_hooks:
2740                 fd.add_progress_hook(ph)
2741             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2742             self.write_debug('Invoking downloader on "%s"' % urls)
2743
2744         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2745         # But it may contain objects that are not deep-copyable
2746         new_info = self._copy_infodict(info)
2747         if new_info.get('http_headers') is None:
2748             new_info['http_headers'] = self._calc_headers(new_info)
2749         return fd.download(name, new_info, subtitle)
2750
2751     def process_info(self, info_dict):
2752         """Process a single resolved IE result. (Modified it in-place)"""
2753
2754         assert info_dict.get('_type', 'video') == 'video'
2755         original_infodict = info_dict
2756
2757         # TODO: backward compatibility, to be removed
2758         info_dict['fulltitle'] = info_dict['title']
2759
2760         if 'format' not in info_dict and 'ext' in info_dict:
2761             info_dict['format'] = info_dict['ext']
2762
2763         if self._match_entry(info_dict) is not None:
2764             info_dict['_write_download_archive'] = 'ignore'
2765             return
2766
2767         self.post_extract(info_dict)
2768         self._num_downloads += 1
2769
2770         # info_dict['_filename'] needs to be set for backward compatibility
2771         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2772         temp_filename = self.prepare_filename(info_dict, 'temp')
2773         files_to_move = {}
2774
2775         # Forced printings
2776         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2777
2778         if self.params.get('simulate'):
2779             info_dict['_write_download_archive'] = self.params.get('force_write_download_archive')
2780             return
2781
2782         if full_filename is None:
2783             return
2784         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2785             return
2786         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2787             return
2788
2789         if self._write_description('video', info_dict,
2790                                    self.prepare_filename(info_dict, 'description')) is None:
2791             return
2792
2793         sub_files = self._write_subtitles(info_dict, temp_filename)
2794         if sub_files is None:
2795             return
2796         files_to_move.update(dict(sub_files))
2797
2798         thumb_files = self._write_thumbnails(
2799             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2800         if thumb_files is None:
2801             return
2802         files_to_move.update(dict(thumb_files))
2803
2804         infofn = self.prepare_filename(info_dict, 'infojson')
2805         _infojson_written = self._write_info_json('video', info_dict, infofn)
2806         if _infojson_written:
2807             info_dict['infojson_filename'] = infofn
2808             # For backward compatibility, even though it was a private field
2809             info_dict['__infojson_filename'] = infofn
2810         elif _infojson_written is None:
2811             return
2812
2813         # Note: Annotations are deprecated
2814         annofn = None
2815         if self.params.get('writeannotations', False):
2816             annofn = self.prepare_filename(info_dict, 'annotation')
2817         if annofn:
2818             if not self._ensure_dir_exists(encodeFilename(annofn)):
2819                 return
2820             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2821                 self.to_screen('[info] Video annotations are already present')
2822             elif not info_dict.get('annotations'):
2823                 self.report_warning('There are no annotations to write.')
2824             else:
2825                 try:
2826                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2827                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2828                         annofile.write(info_dict['annotations'])
2829                 except (KeyError, TypeError):
2830                     self.report_warning('There are no annotations to write.')
2831                 except (OSError, IOError):
2832                     self.report_error('Cannot write annotations file: ' + annofn)
2833                     return
2834
2835         # Write internet shortcut files
2836         def _write_link_file(link_type):
2837             if 'webpage_url' not in info_dict:
2838                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2839                 return False
2840             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2841             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2842                 return False
2843             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2844                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2845                 return True
2846             try:
2847                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2848                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2849                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2850                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2851                     if link_type == 'desktop':
2852                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2853                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2854             except (OSError, IOError):
2855                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2856                 return False
2857             return True
2858
2859         write_links = {
2860             'url': self.params.get('writeurllink'),
2861             'webloc': self.params.get('writewebloclink'),
2862             'desktop': self.params.get('writedesktoplink'),
2863         }
2864         if self.params.get('writelink'):
2865             link_type = ('webloc' if sys.platform == 'darwin'
2866                          else 'desktop' if sys.platform.startswith('linux')
2867                          else 'url')
2868             write_links[link_type] = True
2869
2870         if any(should_write and not _write_link_file(link_type)
2871                for link_type, should_write in write_links.items()):
2872             return
2873
2874         def replace_info_dict(new_info):
2875             nonlocal info_dict
2876             if new_info == info_dict:
2877                 return
2878             info_dict.clear()
2879             info_dict.update(new_info)
2880
2881         try:
2882             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2883             replace_info_dict(new_info)
2884         except PostProcessingError as err:
2885             self.report_error('Preprocessing: %s' % str(err))
2886             return
2887
2888         if self.params.get('skip_download'):
2889             info_dict['filepath'] = temp_filename
2890             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2891             info_dict['__files_to_move'] = files_to_move
2892             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2893             info_dict['_write_download_archive'] = self.params.get('force_write_download_archive')
2894         else:
2895             # Download
2896             info_dict.setdefault('__postprocessors', [])
2897             try:
2898
2899                 def existing_file(*filepaths):
2900                     ext = info_dict.get('ext')
2901                     final_ext = self.params.get('final_ext', ext)
2902                     existing_files = []
2903                     for file in orderedSet(filepaths):
2904                         if final_ext != ext:
2905                             converted = replace_extension(file, final_ext, ext)
2906                             if os.path.exists(encodeFilename(converted)):
2907                                 existing_files.append(converted)
2908                         if os.path.exists(encodeFilename(file)):
2909                             existing_files.append(file)
2910
2911                     if not existing_files or self.params.get('overwrites', False):
2912                         for file in orderedSet(existing_files):
2913                             self.report_file_delete(file)
2914                             os.remove(encodeFilename(file))
2915                         return None
2916
2917                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2918                     return existing_files[0]
2919
2920                 success = True
2921                 if info_dict.get('requested_formats') is not None:
2922
2923                     def compatible_formats(formats):
2924                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2925                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2926                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2927                         if len(video_formats) > 2 or len(audio_formats) > 2:
2928                             return False
2929
2930                         # Check extension
2931                         exts = set(format.get('ext') for format in formats)
2932                         COMPATIBLE_EXTS = (
2933                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2934                             set(('webm',)),
2935                         )
2936                         for ext_sets in COMPATIBLE_EXTS:
2937                             if ext_sets.issuperset(exts):
2938                                 return True
2939                         # TODO: Check acodec/vcodec
2940                         return False
2941
2942                     requested_formats = info_dict['requested_formats']
2943                     old_ext = info_dict['ext']
2944                     if self.params.get('merge_output_format') is None:
2945                         if not compatible_formats(requested_formats):
2946                             info_dict['ext'] = 'mkv'
2947                             self.report_warning(
2948                                 'Requested formats are incompatible for merge and will be merged into mkv')
2949                         if (info_dict['ext'] == 'webm'
2950                                 and info_dict.get('thumbnails')
2951                                 # check with type instead of pp_key, __name__, or isinstance
2952                                 # since we dont want any custom PPs to trigger this
2953                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2954                             info_dict['ext'] = 'mkv'
2955                             self.report_warning(
2956                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2957                     new_ext = info_dict['ext']
2958
2959                     def correct_ext(filename, ext=new_ext):
2960                         if filename == '-':
2961                             return filename
2962                         filename_real_ext = os.path.splitext(filename)[1][1:]
2963                         filename_wo_ext = (
2964                             os.path.splitext(filename)[0]
2965                             if filename_real_ext in (old_ext, new_ext)
2966                             else filename)
2967                         return '%s.%s' % (filename_wo_ext, ext)
2968
2969                     # Ensure filename always has a correct extension for successful merge
2970                     full_filename = correct_ext(full_filename)
2971                     temp_filename = correct_ext(temp_filename)
2972                     dl_filename = existing_file(full_filename, temp_filename)
2973                     info_dict['__real_download'] = False
2974
2975                     downloaded = []
2976                     merger = FFmpegMergerPP(self)
2977
2978                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
2979                     if dl_filename is not None:
2980                         self.report_file_already_downloaded(dl_filename)
2981                     elif fd:
2982                         for f in requested_formats if fd != FFmpegFD else []:
2983                             f['filepath'] = fname = prepend_extension(
2984                                 correct_ext(temp_filename, info_dict['ext']),
2985                                 'f%s' % f['format_id'], info_dict['ext'])
2986                             downloaded.append(fname)
2987                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2988                         success, real_download = self.dl(temp_filename, info_dict)
2989                         info_dict['__real_download'] = real_download
2990                     else:
2991                         if self.params.get('allow_unplayable_formats'):
2992                             self.report_warning(
2993                                 'You have requested merging of multiple formats '
2994                                 'while also allowing unplayable formats to be downloaded. '
2995                                 'The formats won\'t be merged to prevent data corruption.')
2996                         elif not merger.available:
2997                             self.report_warning(
2998                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2999                                 'The formats won\'t be merged.')
3000
3001                         if temp_filename == '-':
3002                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3003                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3004                                       else 'but ffmpeg is not installed')
3005                             self.report_warning(
3006                                 f'You have requested downloading multiple formats to stdout {reason}. '
3007                                 'The formats will be streamed one after the other')
3008                             fname = temp_filename
3009                         for f in requested_formats:
3010                             new_info = dict(info_dict)
3011                             del new_info['requested_formats']
3012                             new_info.update(f)
3013                             if temp_filename != '-':
3014                                 fname = prepend_extension(
3015                                     correct_ext(temp_filename, new_info['ext']),
3016                                     'f%s' % f['format_id'], new_info['ext'])
3017                                 if not self._ensure_dir_exists(fname):
3018                                     return
3019                                 f['filepath'] = fname
3020                                 downloaded.append(fname)
3021                             partial_success, real_download = self.dl(fname, new_info)
3022                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3023                             success = success and partial_success
3024
3025                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3026                         info_dict['__postprocessors'].append(merger)
3027                         info_dict['__files_to_merge'] = downloaded
3028                         # Even if there were no downloads, it is being merged only now
3029                         info_dict['__real_download'] = True
3030                     else:
3031                         for file in downloaded:
3032                             files_to_move[file] = None
3033                 else:
3034                     # Just a single file
3035                     dl_filename = existing_file(full_filename, temp_filename)
3036                     if dl_filename is None or dl_filename == temp_filename:
3037                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3038                         # So we should try to resume the download
3039                         success, real_download = self.dl(temp_filename, info_dict)
3040                         info_dict['__real_download'] = real_download
3041                     else:
3042                         self.report_file_already_downloaded(dl_filename)
3043
3044                 dl_filename = dl_filename or temp_filename
3045                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3046
3047             except network_exceptions as err:
3048                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3049                 return
3050             except (OSError, IOError) as err:
3051                 raise UnavailableVideoError(err)
3052             except (ContentTooShortError, ) as err:
3053                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3054                 return
3055
3056             if success and full_filename != '-':
3057
3058                 def fixup():
3059                     do_fixup = True
3060                     fixup_policy = self.params.get('fixup')
3061                     vid = info_dict['id']
3062
3063                     if fixup_policy in ('ignore', 'never'):
3064                         return
3065                     elif fixup_policy == 'warn':
3066                         do_fixup = False
3067                     elif fixup_policy != 'force':
3068                         assert fixup_policy in ('detect_or_warn', None)
3069                         if not info_dict.get('__real_download'):
3070                             do_fixup = False
3071
3072                     def ffmpeg_fixup(cndn, msg, cls):
3073                         if not cndn:
3074                             return
3075                         if not do_fixup:
3076                             self.report_warning(f'{vid}: {msg}')
3077                             return
3078                         pp = cls(self)
3079                         if pp.available:
3080                             info_dict['__postprocessors'].append(pp)
3081                         else:
3082                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3083
3084                     stretched_ratio = info_dict.get('stretched_ratio')
3085                     ffmpeg_fixup(
3086                         stretched_ratio not in (1, None),
3087                         f'Non-uniform pixel ratio {stretched_ratio}',
3088                         FFmpegFixupStretchedPP)
3089
3090                     ffmpeg_fixup(
3091                         (info_dict.get('requested_formats') is None
3092                          and info_dict.get('container') == 'm4a_dash'
3093                          and info_dict.get('ext') == 'm4a'),
3094                         'writing DASH m4a. Only some players support this container',
3095                         FFmpegFixupM4aPP)
3096
3097                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3098                     downloader = downloader.__name__ if downloader else None
3099
3100                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3101                         ffmpeg_fixup(downloader == 'HlsFD',
3102                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3103                                      FFmpegFixupM3u8PP)
3104                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3105                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3106
3107                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3108                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3109
3110                 fixup()
3111                 try:
3112                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3113                 except PostProcessingError as err:
3114                     self.report_error('Postprocessing: %s' % str(err))
3115                     return
3116                 try:
3117                     for ph in self._post_hooks:
3118                         ph(info_dict['filepath'])
3119                 except Exception as err:
3120                     self.report_error('post hooks: %s' % str(err))
3121                     return
3122                 info_dict['_write_download_archive'] = True
3123
3124         if self.params.get('force_write_download_archive'):
3125             info_dict['_write_download_archive'] = True
3126
3127         # Make sure the info_dict was modified in-place
3128         assert info_dict is original_infodict
3129
3130         max_downloads = self.params.get('max_downloads')
3131         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3132             raise MaxDownloadsReached()
3133
3134     def __download_wrapper(self, func):
3135         @functools.wraps(func)
3136         def wrapper(*args, **kwargs):
3137             try:
3138                 res = func(*args, **kwargs)
3139             except UnavailableVideoError as e:
3140                 self.report_error(e)
3141             except MaxDownloadsReached as e:
3142                 self.to_screen(f'[info] {e}')
3143                 raise
3144             except DownloadCancelled as e:
3145                 self.to_screen(f'[info] {e}')
3146                 if not self.params.get('break_per_url'):
3147                     raise
3148             else:
3149                 if self.params.get('dump_single_json', False):
3150                     self.post_extract(res)
3151                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3152         return wrapper
3153
3154     def download(self, url_list):
3155         """Download a given list of URLs."""
3156         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3157         outtmpl = self.outtmpl_dict['default']
3158         if (len(url_list) > 1
3159                 and outtmpl != '-'
3160                 and '%' not in outtmpl
3161                 and self.params.get('max_downloads') != 1):
3162             raise SameFileError(outtmpl)
3163
3164         for url in url_list:
3165             self.__download_wrapper(self.extract_info)(
3166                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3167
3168         return self._download_retcode
3169
3170     def download_with_info_file(self, info_filename):
3171         with contextlib.closing(fileinput.FileInput(
3172                 [info_filename], mode='r',
3173                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3174             # FileInput doesn't have a read method, we can't call json.load
3175             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3176         try:
3177             self.__download_wrapper(self.process_ie_result)(info, download=True)
3178         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3179             if not isinstance(e, EntryNotInPlaylist):
3180                 self.to_stderr('\r')
3181             webpage_url = info.get('webpage_url')
3182             if webpage_url is not None:
3183                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3184                 return self.download([webpage_url])
3185             else:
3186                 raise
3187         return self._download_retcode
3188
3189     @staticmethod
3190     def sanitize_info(info_dict, remove_private_keys=False):
3191         ''' Sanitize the infodict for converting to json '''
3192         if info_dict is None:
3193             return info_dict
3194         info_dict.setdefault('epoch', int(time.time()))
3195         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3196         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3197         if remove_private_keys:
3198             remove_keys |= {
3199                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3200                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3201             }
3202             reject = lambda k, v: k not in keep_keys and (
3203                 k.startswith('_') or k in remove_keys or v is None)
3204         else:
3205             reject = lambda k, v: k in remove_keys
3206
3207         def filter_fn(obj):
3208             if isinstance(obj, dict):
3209                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3210             elif isinstance(obj, (list, tuple, set, LazyList)):
3211                 return list(map(filter_fn, obj))
3212             elif obj is None or isinstance(obj, (str, int, float, bool)):
3213                 return obj
3214             else:
3215                 return repr(obj)
3216
3217         return filter_fn(info_dict)
3218
3219     @staticmethod
3220     def filter_requested_info(info_dict, actually_filter=True):
3221         ''' Alias of sanitize_info for backward compatibility '''
3222         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3223
3224     def run_pp(self, pp, infodict):
3225         files_to_delete = []
3226         if '__files_to_move' not in infodict:
3227             infodict['__files_to_move'] = {}
3228         try:
3229             files_to_delete, infodict = pp.run(infodict)
3230         except PostProcessingError as e:
3231             # Must be True and not 'only_download'
3232             if self.params.get('ignoreerrors') is True:
3233                 self.report_error(e)
3234                 return infodict
3235             raise
3236
3237         if not files_to_delete:
3238             return infodict
3239         if self.params.get('keepvideo', False):
3240             for f in files_to_delete:
3241                 infodict['__files_to_move'].setdefault(f, '')
3242         else:
3243             for old_filename in set(files_to_delete):
3244                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3245                 try:
3246                     os.remove(encodeFilename(old_filename))
3247                 except (IOError, OSError):
3248                     self.report_warning('Unable to remove downloaded original file')
3249                 if old_filename in infodict['__files_to_move']:
3250                     del infodict['__files_to_move'][old_filename]
3251         return infodict
3252
3253     @staticmethod
3254     def post_extract(info_dict):
3255         def actual_post_extract(info_dict):
3256             if info_dict.get('_type') in ('playlist', 'multi_video'):
3257                 for video_dict in info_dict.get('entries', {}):
3258                     actual_post_extract(video_dict or {})
3259                 return
3260
3261             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3262             extra = post_extractor().items()
3263             info_dict.update(extra)
3264             info_dict.pop('__post_extractor', None)
3265
3266             original_infodict = info_dict.get('__original_infodict') or {}
3267             original_infodict.update(extra)
3268             original_infodict.pop('__post_extractor', None)
3269
3270         actual_post_extract(info_dict or {})
3271
3272     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3273         info = dict(ie_info)
3274         info['__files_to_move'] = files_to_move or {}
3275         for pp in self._pps[key]:
3276             info = self.run_pp(pp, info)
3277         return info, info.pop('__files_to_move', None)
3278
3279     def post_process(self, filename, info, files_to_move=None):
3280         """Run all the postprocessors on the given file."""
3281         info['filepath'] = filename
3282         info['__files_to_move'] = files_to_move or {}
3283
3284         for pp in info.get('__postprocessors', []) + self._pps['post_process']:
3285             info = self.run_pp(pp, info)
3286         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3287         del info['__files_to_move']
3288         for pp in self._pps['after_move']:
3289             info = self.run_pp(pp, info)
3290         return info
3291
3292     def _make_archive_id(self, info_dict):
3293         video_id = info_dict.get('id')
3294         if not video_id:
3295             return
3296         # Future-proof against any change in case
3297         # and backwards compatibility with prior versions
3298         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3299         if extractor is None:
3300             url = str_or_none(info_dict.get('url'))
3301             if not url:
3302                 return
3303             # Try to find matching extractor for the URL and take its ie_key
3304             for ie_key, ie in self._ies.items():
3305                 if ie.suitable(url):
3306                     extractor = ie_key
3307                     break
3308             else:
3309                 return
3310         return '%s %s' % (extractor.lower(), video_id)
3311
3312     def in_download_archive(self, info_dict):
3313         fn = self.params.get('download_archive')
3314         if fn is None:
3315             return False
3316
3317         vid_id = self._make_archive_id(info_dict)
3318         if not vid_id:
3319             return False  # Incomplete video information
3320
3321         return vid_id in self.archive
3322
3323     def record_download_archive(self, info_dict):
3324         fn = self.params.get('download_archive')
3325         if fn is None:
3326             return
3327         vid_id = self._make_archive_id(info_dict)
3328         assert vid_id
3329         self.write_debug(f'Adding to archive: {vid_id}')
3330         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3331             archive_file.write(vid_id + '\n')
3332         self.archive.add(vid_id)
3333
3334     @staticmethod
3335     def format_resolution(format, default='unknown'):
3336         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3337             return 'audio only'
3338         if format.get('resolution') is not None:
3339             return format['resolution']
3340         if format.get('width') and format.get('height'):
3341             return '%dx%d' % (format['width'], format['height'])
3342         elif format.get('height'):
3343             return '%sp' % format['height']
3344         elif format.get('width'):
3345             return '%dx?' % format['width']
3346         return default
3347
3348     def _format_note(self, fdict):
3349         res = ''
3350         if fdict.get('ext') in ['f4f', 'f4m']:
3351             res += '(unsupported)'
3352         if fdict.get('language'):
3353             if res:
3354                 res += ' '
3355             res += '[%s]' % fdict['language']
3356         if fdict.get('format_note') is not None:
3357             if res:
3358                 res += ' '
3359             res += fdict['format_note']
3360         if fdict.get('tbr') is not None:
3361             if res:
3362                 res += ', '
3363             res += '%4dk' % fdict['tbr']
3364         if fdict.get('container') is not None:
3365             if res:
3366                 res += ', '
3367             res += '%s container' % fdict['container']
3368         if (fdict.get('vcodec') is not None
3369                 and fdict.get('vcodec') != 'none'):
3370             if res:
3371                 res += ', '
3372             res += fdict['vcodec']
3373             if fdict.get('vbr') is not None:
3374                 res += '@'
3375         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3376             res += 'video@'
3377         if fdict.get('vbr') is not None:
3378             res += '%4dk' % fdict['vbr']
3379         if fdict.get('fps') is not None:
3380             if res:
3381                 res += ', '
3382             res += '%sfps' % fdict['fps']
3383         if fdict.get('acodec') is not None:
3384             if res:
3385                 res += ', '
3386             if fdict['acodec'] == 'none':
3387                 res += 'video only'
3388             else:
3389                 res += '%-5s' % fdict['acodec']
3390         elif fdict.get('abr') is not None:
3391             if res:
3392                 res += ', '
3393             res += 'audio'
3394         if fdict.get('abr') is not None:
3395             res += '@%3dk' % fdict['abr']
3396         if fdict.get('asr') is not None:
3397             res += ' (%5dHz)' % fdict['asr']
3398         if fdict.get('filesize') is not None:
3399             if res:
3400                 res += ', '
3401             res += format_bytes(fdict['filesize'])
3402         elif fdict.get('filesize_approx') is not None:
3403             if res:
3404                 res += ', '
3405             res += '~' + format_bytes(fdict['filesize_approx'])
3406         return res
3407
3408     def _list_format_headers(self, *headers):
3409         if self.params.get('listformats_table', True) is not False:
3410             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3411         return headers
3412
3413     def list_formats(self, info_dict):
3414         if not info_dict.get('formats') and not info_dict.get('url'):
3415             self.to_screen('%s has no formats' % info_dict['id'])
3416             return
3417         self.to_screen('[info] Available formats for %s:' % info_dict['id'])
3418
3419         formats = info_dict.get('formats', [info_dict])
3420         new_format = self.params.get('listformats_table', True) is not False
3421         if new_format:
3422             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3423             table = [
3424                 [
3425                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3426                     format_field(f, 'ext'),
3427                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3428                     format_field(f, 'fps', '\t%d'),
3429                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3430                     delim,
3431                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3432                     format_field(f, 'tbr', '\t%dk'),
3433                     shorten_protocol_name(f.get('protocol', '')),
3434                     delim,
3435                     format_field(f, 'vcodec', default='unknown').replace(
3436                         'none',
3437                         'images' if f.get('acodec') == 'none'
3438                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3439                     format_field(f, 'vbr', '\t%dk'),
3440                     format_field(f, 'acodec', default='unknown').replace(
3441                         'none',
3442                         '' if f.get('vcodec') == 'none'
3443                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3444                     format_field(f, 'abr', '\t%dk'),
3445                     format_field(f, 'asr', '\t%dHz'),
3446                     join_nonempty(
3447                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3448                         format_field(f, 'language', '[%s]'),
3449                         join_nonempty(
3450                             format_field(f, 'format_note'),
3451                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3452                             delim=', '),
3453                         delim=' '),
3454                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3455             header_line = self._list_format_headers(
3456                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3457                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3458         else:
3459             table = [
3460                 [
3461                     format_field(f, 'format_id'),
3462                     format_field(f, 'ext'),
3463                     self.format_resolution(f),
3464                     self._format_note(f)]
3465                 for f in formats
3466                 if f.get('preference') is None or f['preference'] >= -1000]
3467             header_line = ['format code', 'extension', 'resolution', 'note']
3468
3469         self.to_stdout(render_table(
3470             header_line, table,
3471             extra_gap=(0 if new_format else 1),
3472             hide_empty=new_format,
3473             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3474
3475     def list_thumbnails(self, info_dict):
3476         thumbnails = list(info_dict.get('thumbnails'))
3477         if not thumbnails:
3478             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3479             return
3480
3481         self.to_screen(
3482             '[info] Thumbnails for %s:' % info_dict['id'])
3483         self.to_stdout(render_table(
3484             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3485             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3486
3487     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3488         if not subtitles:
3489             self.to_screen('%s has no %s' % (video_id, name))
3490             return
3491         self.to_screen(
3492             'Available %s for %s:' % (name, video_id))
3493
3494         def _row(lang, formats):
3495             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3496             if len(set(names)) == 1:
3497                 names = [] if names[0] == 'unknown' else names[:1]
3498             return [lang, ', '.join(names), ', '.join(exts)]
3499
3500         self.to_stdout(render_table(
3501             self._list_format_headers('Language', 'Name', 'Formats'),
3502             [_row(lang, formats) for lang, formats in subtitles.items()],
3503             hide_empty=True))
3504
3505     def urlopen(self, req):
3506         """ Start an HTTP download """
3507         if isinstance(req, compat_basestring):
3508             req = sanitized_Request(req)
3509         return self._opener.open(req, timeout=self._socket_timeout)
3510
3511     def print_debug_header(self):
3512         if not self.params.get('verbose'):
3513             return
3514
3515         def get_encoding(stream):
3516             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3517             if not supports_terminal_sequences(stream):
3518                 from .compat import WINDOWS_VT_MODE
3519                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3520             return ret
3521
3522         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3523             locale.getpreferredencoding(),
3524             sys.getfilesystemencoding(),
3525             get_encoding(self._screen_file), get_encoding(self._err_file),
3526             self.get_encoding())
3527
3528         logger = self.params.get('logger')
3529         if logger:
3530             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3531             write_debug(encoding_str)
3532         else:
3533             write_string(f'[debug] {encoding_str}\n', encoding=None)
3534             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3535
3536         source = detect_variant()
3537         write_debug(join_nonempty(
3538             'yt-dlp version', __version__,
3539             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3540             '' if source == 'unknown' else f'({source})',
3541             delim=' '))
3542         if not _LAZY_LOADER:
3543             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3544                 write_debug('Lazy loading extractors is forcibly disabled')
3545             else:
3546                 write_debug('Lazy loading extractors is disabled')
3547         if plugin_extractors or plugin_postprocessors:
3548             write_debug('Plugins: %s' % [
3549                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3550                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3551         if self.params.get('compat_opts'):
3552             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3553
3554         if source == 'source':
3555             try:
3556                 sp = Popen(
3557                     ['git', 'rev-parse', '--short', 'HEAD'],
3558                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3559                     cwd=os.path.dirname(os.path.abspath(__file__)))
3560                 out, err = sp.communicate_or_kill()
3561                 out = out.decode().strip()
3562                 if re.match('[0-9a-f]+', out):
3563                     write_debug('Git HEAD: %s' % out)
3564             except Exception:
3565                 try:
3566                     sys.exc_clear()
3567                 except Exception:
3568                     pass
3569
3570         def python_implementation():
3571             impl_name = platform.python_implementation()
3572             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3573                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3574             return impl_name
3575
3576         write_debug('Python version %s (%s %s) - %s' % (
3577             platform.python_version(),
3578             python_implementation(),
3579             platform.architecture()[0],
3580             platform_name()))
3581
3582         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3583         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3584         if ffmpeg_features:
3585             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3586
3587         exe_versions['rtmpdump'] = rtmpdump_version()
3588         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3589         exe_str = ', '.join(
3590             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3591         ) or 'none'
3592         write_debug('exe versions: %s' % exe_str)
3593
3594         from .downloader.websocket import has_websockets
3595         from .postprocessor.embedthumbnail import has_mutagen
3596         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3597
3598         lib_str = join_nonempty(
3599             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3600             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3601             has_mutagen and 'mutagen',
3602             SQLITE_AVAILABLE and 'sqlite',
3603             has_websockets and 'websockets',
3604             delim=', ') or 'none'
3605         write_debug('Optional libraries: %s' % lib_str)
3606
3607         proxy_map = {}
3608         for handler in self._opener.handlers:
3609             if hasattr(handler, 'proxies'):
3610                 proxy_map.update(handler.proxies)
3611         write_debug(f'Proxy map: {proxy_map}')
3612
3613         # Not implemented
3614         if False and self.params.get('call_home'):
3615             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3616             write_debug('Public IP address: %s' % ipaddr)
3617             latest_version = self.urlopen(
3618                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3619             if version_tuple(latest_version) > version_tuple(__version__):
3620                 self.report_warning(
3621                     'You are using an outdated version (newest version: %s)! '
3622                     'See https://yt-dl.org/update if you need help updating.' %
3623                     latest_version)
3624
3625     def _setup_opener(self):
3626         timeout_val = self.params.get('socket_timeout')
3627         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3628
3629         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3630         opts_cookiefile = self.params.get('cookiefile')
3631         opts_proxy = self.params.get('proxy')
3632
3633         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3634
3635         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3636         if opts_proxy is not None:
3637             if opts_proxy == '':
3638                 proxies = {}
3639             else:
3640                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3641         else:
3642             proxies = compat_urllib_request.getproxies()
3643             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3644             if 'http' in proxies and 'https' not in proxies:
3645                 proxies['https'] = proxies['http']
3646         proxy_handler = PerRequestProxyHandler(proxies)
3647
3648         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3649         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3650         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3651         redirect_handler = YoutubeDLRedirectHandler()
3652         data_handler = compat_urllib_request_DataHandler()
3653
3654         # When passing our own FileHandler instance, build_opener won't add the
3655         # default FileHandler and allows us to disable the file protocol, which
3656         # can be used for malicious purposes (see
3657         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3658         file_handler = compat_urllib_request.FileHandler()
3659
3660         def file_open(*args, **kwargs):
3661             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3662         file_handler.file_open = file_open
3663
3664         opener = compat_urllib_request.build_opener(
3665             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3666
3667         # Delete the default user-agent header, which would otherwise apply in
3668         # cases where our custom HTTP handler doesn't come into play
3669         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3670         opener.addheaders = []
3671         self._opener = opener
3672
3673     def encode(self, s):
3674         if isinstance(s, bytes):
3675             return s  # Already encoded
3676
3677         try:
3678             return s.encode(self.get_encoding())
3679         except UnicodeEncodeError as err:
3680             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3681             raise
3682
3683     def get_encoding(self):
3684         encoding = self.params.get('encoding')
3685         if encoding is None:
3686             encoding = preferredencoding()
3687         return encoding
3688
3689     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3690         ''' Write infojson and returns True = written, False = skip, None = error '''
3691         if overwrite is None:
3692             overwrite = self.params.get('overwrites', True)
3693         if not self.params.get('writeinfojson'):
3694             return False
3695         elif not infofn:
3696             self.write_debug(f'Skipping writing {label} infojson')
3697             return False
3698         elif not self._ensure_dir_exists(infofn):
3699             return None
3700         elif not overwrite and os.path.exists(infofn):
3701             self.to_screen(f'[info] {label.title()} metadata is already present')
3702         else:
3703             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3704             try:
3705                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3706             except (OSError, IOError):
3707                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3708                 return None
3709         return True
3710
3711     def _write_description(self, label, ie_result, descfn):
3712         ''' Write description and returns True = written, False = skip, None = error '''
3713         if not self.params.get('writedescription'):
3714             return False
3715         elif not descfn:
3716             self.write_debug(f'Skipping writing {label} description')
3717             return False
3718         elif not self._ensure_dir_exists(descfn):
3719             return None
3720         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3721             self.to_screen(f'[info] {label.title()} description is already present')
3722         elif ie_result.get('description') is None:
3723             self.report_warning(f'There\'s no {label} description to write')
3724             return False
3725         else:
3726             try:
3727                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3728                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3729                     descfile.write(ie_result['description'])
3730             except (OSError, IOError):
3731                 self.report_error(f'Cannot write {label} description file {descfn}')
3732                 return None
3733         return True
3734
3735     def _write_subtitles(self, info_dict, filename):
3736         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3737         ret = []
3738         subtitles = info_dict.get('requested_subtitles')
3739         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3740             # subtitles download errors are already managed as troubles in relevant IE
3741             # that way it will silently go on when used with unsupporting IE
3742             return ret
3743
3744         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3745         if not sub_filename_base:
3746             self.to_screen('[info] Skipping writing video subtitles')
3747             return ret
3748         for sub_lang, sub_info in subtitles.items():
3749             sub_format = sub_info['ext']
3750             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3751             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3752             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3753                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3754                 sub_info['filepath'] = sub_filename
3755                 ret.append((sub_filename, sub_filename_final))
3756                 continue
3757
3758             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3759             if sub_info.get('data') is not None:
3760                 try:
3761                     # Use newline='' to prevent conversion of newline characters
3762                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3763                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3764                         subfile.write(sub_info['data'])
3765                     sub_info['filepath'] = sub_filename
3766                     ret.append((sub_filename, sub_filename_final))
3767                     continue
3768                 except (OSError, IOError):
3769                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3770                     return None
3771
3772             try:
3773                 sub_copy = sub_info.copy()
3774                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3775                 self.dl(sub_filename, sub_copy, subtitle=True)
3776                 sub_info['filepath'] = sub_filename
3777                 ret.append((sub_filename, sub_filename_final))
3778             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3779                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3780                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3781                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3782         return ret
3783
3784     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3785         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3786         write_all = self.params.get('write_all_thumbnails', False)
3787         thumbnails, ret = [], []
3788         if write_all or self.params.get('writethumbnail', False):
3789             thumbnails = info_dict.get('thumbnails') or []
3790         multiple = write_all and len(thumbnails) > 1
3791
3792         if thumb_filename_base is None:
3793             thumb_filename_base = filename
3794         if thumbnails and not thumb_filename_base:
3795             self.write_debug(f'Skipping writing {label} thumbnail')
3796             return ret
3797
3798         for idx, t in list(enumerate(thumbnails))[::-1]:
3799             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3800             thumb_display_id = f'{label} thumbnail {t["id"]}'
3801             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3802             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3803
3804             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3805                 ret.append((thumb_filename, thumb_filename_final))
3806                 t['filepath'] = thumb_filename
3807                 self.to_screen('[info] %s is already present' % (
3808                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3809             else:
3810                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3811                 try:
3812                     uf = self.urlopen(t['url'])
3813                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3814                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3815                         shutil.copyfileobj(uf, thumbf)
3816                     ret.append((thumb_filename, thumb_filename_final))
3817                     t['filepath'] = thumb_filename
3818                 except network_exceptions as err:
3819                     thumbnails.pop(idx)
3820                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3821             if ret and not write_all:
3822                 break
3823         return ret