yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import functools
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import sys
  23 import tempfile
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28 import unicodedata
  29
  30 from enum import Enum
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DownloadCancelled,
  59     DownloadError,
  60     encode_compat_str,
  61     encodeFilename,
  62     EntryNotInPlaylist,
  63     error_to_compat_str,
  64     ExistingVideoReached,
  65     expand_path,
  66     ExtractorError,
  67     float_or_none,
  68     format_bytes,
  69     format_field,
  70     format_decimal_suffix,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     get_domain,
  74     HEADRequest,
  75     InAdvancePagedList,
  76     int_or_none,
  77     iri_to_uri,
  78     ISO3166Utils,
  79     join_nonempty,
  80     LazyList,
  81     LINK_TEMPLATES,
  82     locked_file,
  83     make_dir,
  84     make_HTTPS_handler,
  85     MaxDownloadsReached,
  86     network_exceptions,
  87     number_of_digits,
  88     orderedSet,
  89     OUTTMPL_TYPES,
  90     PagedList,
  91     parse_filesize,
  92     PerRequestProxyHandler,
  93     platform_name,
  94     Popen,
  95     POSTPROCESS_WHEN,
  96     PostProcessingError,
  97     preferredencoding,
  98     prepend_extension,
  99     ReExtractInfo,
 100     register_socks_protocols,
 101     RejectedVideoReached,
 102     remove_terminal_sequences,
 103     render_table,
 104     replace_extension,
 105     SameFileError,
 106     sanitize_filename,
 107     sanitize_path,
 108     sanitize_url,
 109     sanitized_Request,
 110     std_headers,
 111     STR_FORMAT_RE_TMPL,
 112     STR_FORMAT_TYPES,
 113     str_or_none,
 114     strftime_or_none,
 115     subtitles_filename,
 116     supports_terminal_sequences,
 117     timetuple_from_msec,
 118     to_high_limit_path,
 119     traverse_obj,
 120     try_get,
 121     UnavailableVideoError,
 122     url_basename,
 123     variadic,
 124     version_tuple,
 125     write_json_file,
 126     write_string,
 127     YoutubeDLCookieProcessor,
 128     YoutubeDLHandler,
 129     YoutubeDLRedirectHandler,
 130 )
 131 from .cache import Cache
 132 from .minicurses import format_text
 133 from .extractor import (
 134     gen_extractor_classes,
 135     get_info_extractor,
 136     _LAZY_LOADER,
 137     _PLUGIN_CLASSES as plugin_extractors
 138 )
 139 from .extractor.openload import PhantomJSwrapper
 140 from .downloader import (
 141     FFmpegFD,
 142     get_suitable_downloader,
 143     shorten_protocol_name
 144 )
 145 from .downloader.rtmp import rtmpdump_version
 146 from .postprocessor import (
 147     get_postprocessor,
 148     EmbedThumbnailPP,
 149     FFmpegFixupDuplicateMoovPP,
 150     FFmpegFixupDurationPP,
 151     FFmpegFixupM3u8PP,
 152     FFmpegFixupM4aPP,
 153     FFmpegFixupStretchedPP,
 154     FFmpegFixupTimestampPP,
 155     FFmpegMergerPP,
 156     FFmpegPostProcessor,
 157     MoveFilesAfterDownloadPP,
 158     _PLUGIN_CLASSES as plugin_postprocessors
 159 )
 160 from .update import detect_variant
 161 from .version import __version__, RELEASE_GIT_HEAD
 162
 163 if compat_os_name == 'nt':
 164     import ctypes
 165
 166
 167 class YoutubeDL(object):
 168     """YoutubeDL class.
 169
 170     YoutubeDL objects are the ones responsible of downloading the
 171     actual video file and writing it to disk if the user has requested
 172     it, among some other tasks. In most cases there should be one per
 173     program. As, given a video URL, the downloader doesn't know how to
 174     extract all the needed information, task that InfoExtractors do, it
 175     has to pass the URL to one of them.
 176
 177     For this, YoutubeDL objects have a method that allows
 178     InfoExtractors to be registered in a given order. When it is passed
 179     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 180     finds that reports being able to handle it. The InfoExtractor extracts
 181     all the information about the video or videos the URL refers to, and
 182     YoutubeDL process the extracted information, possibly using a File
 183     Downloader to download the video.
 184
 185     YoutubeDL objects accept a lot of parameters. In order not to saturate
 186     the object constructor with arguments, it receives a dictionary of
 187     options instead. These options are available through the params
 188     attribute for the InfoExtractors to use. The YoutubeDL also
 189     registers itself as the downloader in charge for the InfoExtractors
 190     that are added to it, so this is a "mutual registration".
 191
 192     Available options:
 193
 194     username:          Username for authentication purposes.
 195     password:          Password for authentication purposes.
 196     videopassword:     Password for accessing a video.
 197     ap_mso:            Adobe Pass multiple-system operator identifier.
 198     ap_username:       Multiple-system operator account username.
 199     ap_password:       Multiple-system operator account password.
 200     usenetrc:          Use netrc for authentication instead.
 201     verbose:           Print additional info to stdout.
 202     quiet:             Do not print messages to stdout.
 203     no_warnings:       Do not print out anything for warnings.
 204     forceprint:        A dict with keys WHEN mapped to a list of templates to
 205                        print to stdout. The allowed keys are video or any of the
 206                        items in utils.POSTPROCESS_WHEN.
 207                        For compatibility, a single list is also accepted
 208     print_to_file:     A dict with keys WHEN (same as forceprint) mapped to
 209                        a list of tuples with (template, filename)
 210     forceurl:          Force printing final URL. (Deprecated)
 211     forcetitle:        Force printing title. (Deprecated)
 212     forceid:           Force printing ID. (Deprecated)
 213     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 214     forcedescription:  Force printing description. (Deprecated)
 215     forcefilename:     Force printing final filename. (Deprecated)
 216     forceduration:     Force printing duration. (Deprecated)
 217     forcejson:         Force printing info_dict as JSON.
 218     dump_single_json:  Force printing the info_dict of the whole playlist
 219                        (or video) as a single JSON line.
 220     force_write_download_archive: Force writing download archive regardless
 221                        of 'skip_download' or 'simulate'.
 222     simulate:          Do not download the video files. If unset (or None),
 223                        simulate only if listsubtitles, listformats or list_thumbnails is used
 224     format:            Video format code. see "FORMAT SELECTION" for more details.
 225                        You can also pass a function. The function takes 'ctx' as
 226                        argument and returns the formats to download.
 227                        See "build_format_selector" for an implementation
 228     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 229     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 230                        extracting metadata even if the video is not actually
 231                        available for download (experimental)
 232     format_sort:       A list of fields by which to sort the video formats.
 233                        See "Sorting Formats" for more details.
 234     format_sort_force: Force the given format_sort. see "Sorting Formats"
 235                        for more details.
 236     allow_multiple_video_streams:   Allow multiple video streams to be merged
 237                        into a single file
 238     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 239                        into a single file
 240     check_formats      Whether to test if the formats are downloadable.
 241                        Can be True (check all), False (check none),
 242                        'selected' (check selected formats),
 243                        or None (check only if requested by extractor)
 244     paths:             Dictionary of output paths. The allowed keys are 'home'
 245                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 246     outtmpl:           Dictionary of templates for output names. Allowed keys
 247                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 248                        For compatibility with youtube-dl, a single string can also be used
 249     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 250     restrictfilenames: Do not allow "&" and spaces in file names
 251     trim_file_name:    Limit length of filename (extension excluded)
 252     windowsfilenames:  Force the filenames to be windows compatible
 253     ignoreerrors:      Do not stop on download/postprocessing errors.
 254                        Can be 'only_download' to ignore only download errors.
 255                        Default is 'only_download' for CLI, but False for API
 256     skip_playlist_after_errors: Number of allowed failures until the rest of
 257                        the playlist is skipped
 258     force_generic_extractor: Force downloader to use the generic extractor
 259     overwrites:        Overwrite all video and metadata files if True,
 260                        overwrite only non-video files if None
 261                        and don't overwrite any file if False
 262                        For compatibility with youtube-dl,
 263                        "nooverwrites" may also be used instead
 264     playliststart:     Playlist item to start at.
 265     playlistend:       Playlist item to end at.
 266     playlist_items:    Specific indices of playlist to download.
 267     playlistreverse:   Download playlist items in reverse order.
 268     playlistrandom:    Download playlist items in random order.
 269     matchtitle:        Download only matching titles.
 270     rejecttitle:       Reject downloads for matching titles.
 271     logger:            Log messages to a logging.Logger instance.
 272     logtostderr:       Log messages to stderr instead of stdout.
 273     consoletitle:       Display progress in console window's titlebar.
 274     writedescription:  Write the video description to a .description file
 275     writeinfojson:     Write the video description to a .info.json file
 276     clean_infojson:    Remove private fields from the infojson
 277     getcomments:       Extract video comments. This will not be written to disk
 278                        unless writeinfojson is also given
 279     writeannotations:  Write the video annotations to a .annotations.xml file
 280     writethumbnail:    Write the thumbnail image to a file
 281     allow_playlist_files: Whether to write playlists' description, infojson etc
 282                        also to disk when using the 'write*' options
 283     write_all_thumbnails:  Write all thumbnail formats to files
 284     writelink:         Write an internet shortcut file, depending on the
 285                        current platform (.url/.webloc/.desktop)
 286     writeurllink:      Write a Windows internet shortcut file (.url)
 287     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 288     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 289     writesubtitles:    Write the video subtitles to a file
 290     writeautomaticsub: Write the automatically generated subtitles to a file
 291     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 292                        Downloads all the subtitles of the video
 293                        (requires writesubtitles or writeautomaticsub)
 294     listsubtitles:     Lists all available subtitles for the video
 295     subtitlesformat:   The format code for subtitles
 296     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 297                        The list may contain "all" to refer to all the available
 298                        subtitles. The language can be prefixed with a "-" to
 299                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 300     keepvideo:         Keep the video file after post-processing
 301     daterange:         A DateRange object, download only if the upload_date is in the range.
 302     skip_download:     Skip the actual download of the video file
 303     cachedir:          Location of the cache files in the filesystem.
 304                        False to disable filesystem cache.
 305     noplaylist:        Download single video instead of a playlist if in doubt.
 306     age_limit:         An integer representing the user's age in years.
 307                        Unsuitable videos for the given age are skipped.
 308     min_views:         An integer representing the minimum view count the video
 309                        must have in order to not be skipped.
 310                        Videos without view count information are always
 311                        downloaded. None for no limit.
 312     max_views:         An integer representing the maximum view count.
 313                        Videos that are more popular than that are not
 314                        downloaded.
 315                        Videos without view count information are always
 316                        downloaded. None for no limit.
 317     download_archive:  File name of a file where all downloads are recorded.
 318                        Videos already present in the file are not downloaded
 319                        again.
 320     break_on_existing: Stop the download process after attempting to download a
 321                        file that is in the archive.
 322     break_on_reject:   Stop the download process when encountering a video that
 323                        has been filtered out.
 324     break_per_url:     Whether break_on_reject and break_on_existing
 325                        should act on each input URL as opposed to for the entire queue
 326     cookiefile:        File name where cookies should be read from and dumped to
 327     cookiesfrombrowser:  A tuple containing the name of the browser, the profile
 328                        name/pathfrom where cookies are loaded, and the name of the
 329                        keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
 330     legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
 331                        support RFC 5746 secure renegotiation
 332     nocheckcertificate:  Do not verify SSL certificates
 333     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 334                        At the moment, this is only supported by YouTube.
 335     proxy:             URL of the proxy server to use
 336     geo_verification_proxy:  URL of the proxy to use for IP address verification
 337                        on geo-restricted sites.
 338     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 339     bidi_workaround:   Work around buggy terminals without bidirectional text
 340                        support, using fridibi
 341     debug_printtraffic:Print out sent and received HTTP traffic
 342     include_ads:       Download ads as well (deprecated)
 343     default_search:    Prepend this string if an input url is not valid.
 344                        'auto' for elaborate guessing
 345     encoding:          Use this encoding instead of the system-specified.
 346     extract_flat:      Do not resolve URLs, return the immediate result.
 347                        Pass in 'in_playlist' to only show this behavior for
 348                        playlist items.
 349     wait_for_video:    If given, wait for scheduled streams to become available.
 350                        The value should be a tuple containing the range
 351                        (min_secs, max_secs) to wait between retries
 352     postprocessors:    A list of dictionaries, each with an entry
 353                        * key:  The name of the postprocessor. See
 354                                yt_dlp/postprocessor/__init__.py for a list.
 355                        * when: When to run the postprocessor. Allowed values are
 356                                the entries of utils.POSTPROCESS_WHEN
 357                                Assumed to be 'post_process' if not given
 358     post_hooks:        Deprecated - Register a custom postprocessor instead
 359                        A list of functions that get called as the final step
 360                        for each video file, after all postprocessors have been
 361                        called. The filename will be passed as the only argument.
 362     progress_hooks:    A list of functions that get called on download
 363                        progress, with a dictionary with the entries
 364                        * status: One of "downloading", "error", or "finished".
 365                                  Check this first and ignore unknown values.
 366                        * info_dict: The extracted info_dict
 367
 368                        If status is one of "downloading", or "finished", the
 369                        following properties may also be present:
 370                        * filename: The final filename (always present)
 371                        * tmpfilename: The filename we're currently writing to
 372                        * downloaded_bytes: Bytes on disk
 373                        * total_bytes: Size of the whole file, None if unknown
 374                        * total_bytes_estimate: Guess of the eventual file size,
 375                                                None if unavailable.
 376                        * elapsed: The number of seconds since download started.
 377                        * eta: The estimated time in seconds, None if unknown
 378                        * speed: The download speed in bytes/second, None if
 379                                 unknown
 380                        * fragment_index: The counter of the currently
 381                                          downloaded video fragment.
 382                        * fragment_count: The number of fragments (= individual
 383                                          files that will be merged)
 384
 385                        Progress hooks are guaranteed to be called at least once
 386                        (with status "finished") if the download is successful.
 387     postprocessor_hooks:  A list of functions that get called on postprocessing
 388                        progress, with a dictionary with the entries
 389                        * status: One of "started", "processing", or "finished".
 390                                  Check this first and ignore unknown values.
 391                        * postprocessor: Name of the postprocessor
 392                        * info_dict: The extracted info_dict
 393
 394                        Progress hooks are guaranteed to be called at least twice
 395                        (with status "started" and "finished") if the processing is successful.
 396     merge_output_format: Extension to use when merging formats.
 397     final_ext:         Expected final extension; used to detect when the file was
 398                        already downloaded and converted
 399     fixup:             Automatically correct known faults of the file.
 400                        One of:
 401                        - "never": do nothing
 402                        - "warn": only emit a warning
 403                        - "detect_or_warn": check whether we can do anything
 404                                            about it, warn otherwise (default)
 405     source_address:    Client-side IP address to bind to.
 406     call_home:         Boolean, true iff we are allowed to contact the
 407                        yt-dlp servers for debugging. (BROKEN)
 408     sleep_interval_requests: Number of seconds to sleep between requests
 409                        during extraction
 410     sleep_interval:    Number of seconds to sleep before each download when
 411                        used alone or a lower bound of a range for randomized
 412                        sleep before each download (minimum possible number
 413                        of seconds to sleep) when used along with
 414                        max_sleep_interval.
 415     max_sleep_interval:Upper bound of a range for randomized sleep before each
 416                        download (maximum possible number of seconds to sleep).
 417                        Must only be used along with sleep_interval.
 418                        Actual sleep time will be a random float from range
 419                        [sleep_interval; max_sleep_interval].
 420     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 421     listformats:       Print an overview of available video formats and exit.
 422     list_thumbnails:   Print a table of all thumbnails and exit.
 423     match_filter:      A function that gets called with the info_dict of
 424                        every video.
 425                        If it returns a message, the video is ignored.
 426                        If it returns None, the video is downloaded.
 427                        match_filter_func in utils.py is one example for this.
 428     no_color:          Do not emit color codes in output.
 429     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 430                        HTTP header
 431     geo_bypass_country:
 432                        Two-letter ISO 3166-2 country code that will be used for
 433                        explicit geographic restriction bypassing via faking
 434                        X-Forwarded-For HTTP header
 435     geo_bypass_ip_block:
 436                        IP range in CIDR notation that will be used similarly to
 437                        geo_bypass_country
 438
 439     The following options determine which downloader is picked:
 440     external_downloader: A dictionary of protocol keys and the executable of the
 441                        external downloader to use for it. The allowed protocols
 442                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 443                        Set the value to 'native' to use the native downloader
 444     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 445                        or {'m3u8': 'ffmpeg'} instead.
 446                        Use the native HLS downloader instead of ffmpeg/avconv
 447                        if True, otherwise use ffmpeg/avconv if False, otherwise
 448                        use downloader suggested by extractor if None.
 449     compat_opts:       Compatibility options. See "Differences in default behavior".
 450                        The following options do not work when used through the API:
 451                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 452                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 453                        Refer __init__.py for their implementation
 454     progress_template: Dictionary of templates for progress outputs.
 455                        Allowed keys are 'download', 'postprocess',
 456                        'download-title' (console title) and 'postprocess-title'.
 457                        The template is mapped on a dictionary with keys 'progress' and 'info'
 458
 459     The following parameters are not used by YoutubeDL itself, they are used by
 460     the downloader (see yt_dlp/downloader/common.py):
 461     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 462     max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
 463     continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 464     external_downloader_args, concurrent_fragment_downloads.
 465
 466     The following options are used by the post processors:
 467     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 468                        otherwise prefer ffmpeg. (avconv support is deprecated)
 469     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 470                        to the binary or its containing directory.
 471     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 472                        and a list of additional command-line arguments for the
 473                        postprocessor/executable. The dict can also have "PP+EXE" keys
 474                        which are used when the given exe is used by the given PP.
 475                        Use 'default' as the name for arguments to passed to all PP
 476                        For compatibility with youtube-dl, a single list of args
 477                        can also be used
 478
 479     The following options are used by the extractors:
 480     extractor_retries: Number of times to retry for known errors
 481     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 482     hls_split_discontinuity: Split HLS playlists to different formats at
 483                        discontinuities such as ad breaks (default: False)
 484     extractor_args:    A dictionary of arguments to be passed to the extractors.
 485                        See "EXTRACTOR ARGUMENTS" for details.
 486                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 487     mark_watched:      Mark videos watched (even with --simulate). Only for YouTube
 488     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 489                        If True (default), DASH manifests and related
 490                        data will be downloaded and processed by extractor.
 491                        You can reduce network I/O by disabling it if you don't
 492                        care about DASH. (only for youtube)
 493     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 494                        If True (default), HLS manifests and related
 495                        data will be downloaded and processed by extractor.
 496                        You can reduce network I/O by disabling it if you don't
 497                        care about HLS. (only for youtube)
 498     """
 499
 500     _NUMERIC_FIELDS = set((
 501         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 502         'timestamp', 'release_timestamp',
 503         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 504         'average_rating', 'comment_count', 'age_limit',
 505         'start_time', 'end_time',
 506         'chapter_number', 'season_number', 'episode_number',
 507         'track_number', 'disc_number', 'release_year',
 508     ))
 509
 510     _format_selection_exts = {
 511         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 512         'video': {'mp4', 'flv', 'webm', '3gp'},
 513         'storyboards': {'mhtml'},
 514     }
 515
 516     params = None
 517     _ies = {}
 518     _pps = {k: [] for k in POSTPROCESS_WHEN}
 519     _printed_messages = set()
 520     _first_webpage_request = True
 521     _download_retcode = None
 522     _num_downloads = None
 523     _playlist_level = 0
 524     _playlist_urls = set()
 525     _screen_file = None
 526
 527     def __init__(self, params=None, auto_init=True):
 528         """Create a FileDownloader object with the given options.
 529         @param auto_init    Whether to load the default extractors and print header (if verbose).
 530                             Set to 'no_verbose_header' to not print the header
 531         """
 532         if params is None:
 533             params = {}
 534         self._ies = {}
 535         self._ies_instances = {}
 536         self._pps = {k: [] for k in POSTPROCESS_WHEN}
 537         self._printed_messages = set()
 538         self._first_webpage_request = True
 539         self._post_hooks = []
 540         self._progress_hooks = []
 541         self._postprocessor_hooks = []
 542         self._download_retcode = 0
 543         self._num_downloads = 0
 544         self._num_videos = 0
 545         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 546         self._err_file = sys.stderr
 547         self.params = params
 548         self.cache = Cache(self)
 549
 550         windows_enable_vt_mode()
 551         self._allow_colors = {
 552             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 553             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 554         }
 555
 556         if sys.version_info < (3, 6):
 557             self.report_warning(
 558                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 559
 560         if self.params.get('allow_unplayable_formats'):
 561             self.report_warning(
 562                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 563                 'This is a developer option intended for debugging. \n'
 564                 '         If you experience any issues while using this option, '
 565                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 566
 567         def check_deprecated(param, option, suggestion):
 568             if self.params.get(param) is not None:
 569                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 570                 return True
 571             return False
 572
 573         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 574             if self.params.get('geo_verification_proxy') is None:
 575                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 576
 577         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 578         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 579         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 580
 581         for msg in self.params.get('_warnings', []):
 582             self.report_warning(msg)
 583         for msg in self.params.get('_deprecation_warnings', []):
 584             self.deprecation_warning(msg)
 585
 586         if 'list-formats' in self.params.get('compat_opts', []):
 587             self.params['listformats_table'] = False
 588
 589         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 590             # nooverwrites was unnecessarily changed to overwrites
 591             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 592             # This ensures compatibility with both keys
 593             self.params['overwrites'] = not self.params['nooverwrites']
 594         elif self.params.get('overwrites') is None:
 595             self.params.pop('overwrites', None)
 596         else:
 597             self.params['nooverwrites'] = not self.params['overwrites']
 598
 599         params.setdefault('forceprint', {})
 600         params.setdefault('print_to_file', {})
 601
 602         # Compatibility with older syntax
 603         if not isinstance(params['forceprint'], dict):
 604             params['forceprint'] = {'video': params['forceprint']}
 605
 606         if params.get('bidi_workaround', False):
 607             try:
 608                 import pty
 609                 master, slave = pty.openpty()
 610                 width = compat_get_terminal_size().columns
 611                 if width is None:
 612                     width_args = []
 613                 else:
 614                     width_args = ['-w', str(width)]
 615                 sp_kwargs = dict(
 616                     stdin=subprocess.PIPE,
 617                     stdout=slave,
 618                     stderr=self._err_file)
 619                 try:
 620                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 621                 except OSError:
 622                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 623                 self._output_channel = os.fdopen(master, 'rb')
 624             except OSError as ose:
 625                 if ose.errno == errno.ENOENT:
 626                     self.report_warning(
 627                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 628                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 629                 else:
 630                     raise
 631
 632         if (sys.platform != 'win32'
 633                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 634                 and not params.get('restrictfilenames', False)):
 635             # Unicode filesystem API will throw errors (#1474, #13027)
 636             self.report_warning(
 637                 'Assuming --restrict-filenames since file system encoding '
 638                 'cannot encode all characters. '
 639                 'Set the LC_ALL environment variable to fix this.')
 640             self.params['restrictfilenames'] = True
 641
 642         self.outtmpl_dict = self.parse_outtmpl()
 643
 644         # Creating format selector here allows us to catch syntax errors before the extraction
 645         self.format_selector = (
 646             self.params.get('format') if self.params.get('format') in (None, '-')
 647             else self.params['format'] if callable(self.params['format'])
 648             else self.build_format_selector(self.params['format']))
 649
 650         self._setup_opener()
 651
 652         if auto_init:
 653             if auto_init != 'no_verbose_header':
 654                 self.print_debug_header()
 655             self.add_default_info_extractors()
 656
 657         hooks = {
 658             'post_hooks': self.add_post_hook,
 659             'progress_hooks': self.add_progress_hook,
 660             'postprocessor_hooks': self.add_postprocessor_hook,
 661         }
 662         for opt, fn in hooks.items():
 663             for ph in self.params.get(opt, []):
 664                 fn(ph)
 665
 666         for pp_def_raw in self.params.get('postprocessors', []):
 667             pp_def = dict(pp_def_raw)
 668             when = pp_def.pop('when', 'post_process')
 669             self.add_post_processor(
 670                 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
 671                 when=when)
 672
 673         register_socks_protocols()
 674
 675         def preload_download_archive(fn):
 676             """Preload the archive, if any is specified"""
 677             if fn is None:
 678                 return False
 679             self.write_debug(f'Loading archive file {fn!r}')
 680             try:
 681                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 682                     for line in archive_file:
 683                         self.archive.add(line.strip())
 684             except IOError as ioe:
 685                 if ioe.errno != errno.ENOENT:
 686                     raise
 687                 return False
 688             return True
 689
 690         self.archive = set()
 691         preload_download_archive(self.params.get('download_archive'))
 692
 693     def warn_if_short_id(self, argv):
 694         # short YouTube ID starting with dash?
 695         idxs = [
 696             i for i, a in enumerate(argv)
 697             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 698         if idxs:
 699             correct_argv = (
 700                 ['yt-dlp']
 701                 + [a for i, a in enumerate(argv) if i not in idxs]
 702                 + ['--'] + [argv[i] for i in idxs]
 703             )
 704             self.report_warning(
 705                 'Long argument string detected. '
 706                 'Use -- to separate parameters and URLs, like this:\n%s' %
 707                 args_to_str(correct_argv))
 708
 709     def add_info_extractor(self, ie):
 710         """Add an InfoExtractor object to the end of the list."""
 711         ie_key = ie.ie_key()
 712         self._ies[ie_key] = ie
 713         if not isinstance(ie, type):
 714             self._ies_instances[ie_key] = ie
 715             ie.set_downloader(self)
 716
 717     def _get_info_extractor_class(self, ie_key):
 718         ie = self._ies.get(ie_key)
 719         if ie is None:
 720             ie = get_info_extractor(ie_key)
 721             self.add_info_extractor(ie)
 722         return ie
 723
 724     def get_info_extractor(self, ie_key):
 725         """
 726         Get an instance of an IE with name ie_key, it will try to get one from
 727         the _ies list, if there's no instance it will create a new one and add
 728         it to the extractor list.
 729         """
 730         ie = self._ies_instances.get(ie_key)
 731         if ie is None:
 732             ie = get_info_extractor(ie_key)()
 733             self.add_info_extractor(ie)
 734         return ie
 735
 736     def add_default_info_extractors(self):
 737         """
 738         Add the InfoExtractors returned by gen_extractors to the end of the list
 739         """
 740         for ie in gen_extractor_classes():
 741             self.add_info_extractor(ie)
 742
 743     def add_post_processor(self, pp, when='post_process'):
 744         """Add a PostProcessor object to the end of the chain."""
 745         self._pps[when].append(pp)
 746         pp.set_downloader(self)
 747
 748     def add_post_hook(self, ph):
 749         """Add the post hook"""
 750         self._post_hooks.append(ph)
 751
 752     def add_progress_hook(self, ph):
 753         """Add the download progress hook"""
 754         self._progress_hooks.append(ph)
 755
 756     def add_postprocessor_hook(self, ph):
 757         """Add the postprocessing progress hook"""
 758         self._postprocessor_hooks.append(ph)
 759         for pps in self._pps.values():
 760             for pp in pps:
 761                 pp.add_progress_hook(ph)
 762
 763     def _bidi_workaround(self, message):
 764         if not hasattr(self, '_output_channel'):
 765             return message
 766
 767         assert hasattr(self, '_output_process')
 768         assert isinstance(message, compat_str)
 769         line_count = message.count('\n') + 1
 770         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 771         self._output_process.stdin.flush()
 772         res = ''.join(self._output_channel.readline().decode('utf-8')
 773                       for _ in range(line_count))
 774         return res[:-len('\n')]
 775
 776     def _write_string(self, message, out=None, only_once=False):
 777         if only_once:
 778             if message in self._printed_messages:
 779                 return
 780             self._printed_messages.add(message)
 781         write_string(message, out=out, encoding=self.params.get('encoding'))
 782
 783     def to_stdout(self, message, skip_eol=False, quiet=False):
 784         """Print message to stdout"""
 785         if self.params.get('logger'):
 786             self.params['logger'].debug(message)
 787         elif not quiet or self.params.get('verbose'):
 788             self._write_string(
 789                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 790                 self._err_file if quiet else self._screen_file)
 791
 792     def to_stderr(self, message, only_once=False):
 793         """Print message to stderr"""
 794         assert isinstance(message, compat_str)
 795         if self.params.get('logger'):
 796             self.params['logger'].error(message)
 797         else:
 798             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 799
 800     def to_console_title(self, message):
 801         if not self.params.get('consoletitle', False):
 802             return
 803         message = remove_terminal_sequences(message)
 804         if compat_os_name == 'nt':
 805             if ctypes.windll.kernel32.GetConsoleWindow():
 806                 # c_wchar_p() might not be necessary if `message` is
 807                 # already of type unicode()
 808                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 809         elif 'TERM' in os.environ:
 810             self._write_string('\033]0;%s\007' % message, self._screen_file)
 811
 812     def save_console_title(self):
 813         if not self.params.get('consoletitle', False):
 814             return
 815         if self.params.get('simulate'):
 816             return
 817         if compat_os_name != 'nt' and 'TERM' in os.environ:
 818             # Save the title on stack
 819             self._write_string('\033[22;0t', self._screen_file)
 820
 821     def restore_console_title(self):
 822         if not self.params.get('consoletitle', False):
 823             return
 824         if self.params.get('simulate'):
 825             return
 826         if compat_os_name != 'nt' and 'TERM' in os.environ:
 827             # Restore the title from stack
 828             self._write_string('\033[23;0t', self._screen_file)
 829
 830     def __enter__(self):
 831         self.save_console_title()
 832         return self
 833
 834     def __exit__(self, *args):
 835         self.restore_console_title()
 836
 837         if self.params.get('cookiefile') is not None:
 838             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 839
 840     def trouble(self, message=None, tb=None, is_error=True):
 841         """Determine action to take when a download problem appears.
 842
 843         Depending on if the downloader has been configured to ignore
 844         download errors or not, this method may throw an exception or
 845         not when errors are found, after printing the message.
 846
 847         @param tb          If given, is additional traceback information
 848         @param is_error    Whether to raise error according to ignorerrors
 849         """
 850         if message is not None:
 851             self.to_stderr(message)
 852         if self.params.get('verbose'):
 853             if tb is None:
 854                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 855                     tb = ''
 856                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 857                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 858                     tb += encode_compat_str(traceback.format_exc())
 859                 else:
 860                     tb_data = traceback.format_list(traceback.extract_stack())
 861                     tb = ''.join(tb_data)
 862             if tb:
 863                 self.to_stderr(tb)
 864         if not is_error:
 865             return
 866         if not self.params.get('ignoreerrors'):
 867             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 868                 exc_info = sys.exc_info()[1].exc_info
 869             else:
 870                 exc_info = sys.exc_info()
 871             raise DownloadError(message, exc_info)
 872         self._download_retcode = 1
 873
 874     def to_screen(self, message, skip_eol=False):
 875         """Print message to stdout if not in quiet mode"""
 876         self.to_stdout(
 877             message, skip_eol, quiet=self.params.get('quiet', False))
 878
 879     class Styles(Enum):
 880         HEADERS = 'yellow'
 881         EMPHASIS = 'light blue'
 882         ID = 'green'
 883         DELIM = 'blue'
 884         ERROR = 'red'
 885         WARNING = 'yellow'
 886         SUPPRESS = 'light black'
 887
 888     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 889         if test_encoding:
 890             original_text = text
 891             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 892             text = text.encode(encoding, 'ignore').decode(encoding)
 893             if fallback is not None and text != original_text:
 894                 text = fallback
 895         if isinstance(f, self.Styles):
 896             f = f.value
 897         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 898
 899     def _format_screen(self, *args, **kwargs):
 900         return self._format_text(
 901             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 902
 903     def _format_err(self, *args, **kwargs):
 904         return self._format_text(
 905             self._err_file, self._allow_colors['err'], *args, **kwargs)
 906
 907     def report_warning(self, message, only_once=False):
 908         '''
 909         Print the message to stderr, it will be prefixed with 'WARNING:'
 910         If stderr is a tty file the 'WARNING:' will be colored
 911         '''
 912         if self.params.get('logger') is not None:
 913             self.params['logger'].warning(message)
 914         else:
 915             if self.params.get('no_warnings'):
 916                 return
 917             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 918
 919     def deprecation_warning(self, message):
 920         if self.params.get('logger') is not None:
 921             self.params['logger'].warning('DeprecationWarning: {message}')
 922         else:
 923             self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
 924
 925     def report_error(self, message, *args, **kwargs):
 926         '''
 927         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 928         in red if stderr is a tty file.
 929         '''
 930         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
 931
 932     def write_debug(self, message, only_once=False):
 933         '''Log debug message or Print message to stderr'''
 934         if not self.params.get('verbose', False):
 935             return
 936         message = '[debug] %s' % message
 937         if self.params.get('logger'):
 938             self.params['logger'].debug(message)
 939         else:
 940             self.to_stderr(message, only_once)
 941
 942     def report_file_already_downloaded(self, file_name):
 943         """Report file has already been fully downloaded."""
 944         try:
 945             self.to_screen('[download] %s has already been downloaded' % file_name)
 946         except UnicodeEncodeError:
 947             self.to_screen('[download] The file has already been downloaded')
 948
 949     def report_file_delete(self, file_name):
 950         """Report that existing file will be deleted."""
 951         try:
 952             self.to_screen('Deleting existing file %s' % file_name)
 953         except UnicodeEncodeError:
 954             self.to_screen('Deleting existing file')
 955
 956     def raise_no_formats(self, info, forced=False):
 957         has_drm = info.get('__has_drm')
 958         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 959         expected = self.params.get('ignore_no_formats_error')
 960         if forced or not expected:
 961             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 962                                  expected=has_drm or expected)
 963         else:
 964             self.report_warning(msg)
 965
 966     def parse_outtmpl(self):
 967         outtmpl_dict = self.params.get('outtmpl', {})
 968         if not isinstance(outtmpl_dict, dict):
 969             outtmpl_dict = {'default': outtmpl_dict}
 970         # Remove spaces in the default template
 971         if self.params.get('restrictfilenames'):
 972             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 973         else:
 974             sanitize = lambda x: x
 975         outtmpl_dict.update({
 976             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 977             if outtmpl_dict.get(k) is None})
 978         for key, val in outtmpl_dict.items():
 979             if isinstance(val, bytes):
 980                 self.report_warning(
 981                     'Parameter outtmpl is bytes, but should be a unicode string. '
 982                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 983         return outtmpl_dict
 984
 985     def get_output_path(self, dir_type='', filename=None):
 986         paths = self.params.get('paths', {})
 987         assert isinstance(paths, dict)
 988         path = os.path.join(
 989             expand_path(paths.get('home', '').strip()),
 990             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 991             filename or '')
 992
 993         # Temporary fix for #4787
 994         # 'Treat' all problem characters by passing filename through preferredencoding
 995         # to workaround encoding issues with subprocess on python2 @ Windows
 996         if sys.version_info < (3, 0) and sys.platform == 'win32':
 997             path = encodeFilename(path, True).decode(preferredencoding())
 998         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 999
1000     @staticmethod
1001     def _outtmpl_expandpath(outtmpl):
1002         # expand_path translates '%%' into '%' and '$$' into '$'
1003         # correspondingly that is not what we want since we need to keep
1004         # '%%' intact for template dict substitution step. Working around
1005         # with boundary-alike separator hack.
1006         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1007         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1008
1009         # outtmpl should be expand_path'ed before template dict substitution
1010         # because meta fields may contain env variables we don't want to
1011         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1012         # title "Hello $PATH", we don't want `$PATH` to be expanded.
1013         return expand_path(outtmpl).replace(sep, '')
1014
1015     @staticmethod
1016     def escape_outtmpl(outtmpl):
1017         ''' Escape any remaining strings like %s, %abc% etc. '''
1018         return re.sub(
1019             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1020             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1021             outtmpl)
1022
1023     @classmethod
1024     def validate_outtmpl(cls, outtmpl):
1025         ''' @return None or Exception object '''
1026         outtmpl = re.sub(
1027             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1028             lambda mobj: f'{mobj.group(0)[:-1]}s',
1029             cls._outtmpl_expandpath(outtmpl))
1030         try:
1031             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1032             return None
1033         except ValueError as err:
1034             return err
1035
1036     @staticmethod
1037     def _copy_infodict(info_dict):
1038         info_dict = dict(info_dict)
1039         for key in ('__original_infodict', '__postprocessors'):
1040             info_dict.pop(key, None)
1041         return info_dict
1042
1043     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1044         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1045         @param sanitize    Whether to sanitize the output as a filename.
1046                            For backward compatibility, a function can also be passed
1047         """
1048
1049         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1050
1051         info_dict = self._copy_infodict(info_dict)
1052         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1053             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1054             if info_dict.get('duration', None) is not None
1055             else None)
1056         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1057         info_dict['video_autonumber'] = self._num_videos
1058         if info_dict.get('resolution') is None:
1059             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1060
1061         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1062         # of %(field)s to %(field)0Nd for backward compatibility
1063         field_size_compat_map = {
1064             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1065             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1066             'autonumber': self.params.get('autonumber_size') or 5,
1067         }
1068
1069         TMPL_DICT = {}
1070         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1071         MATH_FUNCTIONS = {
1072             '+': float.__add__,
1073             '-': float.__sub__,
1074         }
1075         # Field is of the form key1.key2...
1076         # where keys (except first) can be string, int or slice
1077         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1078         MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1079         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1080         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1081             (?P<negate>-)?
1082             (?P<fields>{field})
1083             (?P<maths>(?:{math_op}{math_field})*)
1084             (?:>(?P<strf_format>.+?))?
1085             (?P<alternate>(?<!\\),[^|&)]+)?
1086             (?:&(?P<replacement>.*?))?
1087             (?:\|(?P<default>.*?))?
1088             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1089
1090         def _traverse_infodict(k):
1091             k = k.split('.')
1092             if k[0] == '':
1093                 k.pop(0)
1094             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1095
1096         def get_value(mdict):
1097             # Object traversal
1098             value = _traverse_infodict(mdict['fields'])
1099             # Negative
1100             if mdict['negate']:
1101                 value = float_or_none(value)
1102                 if value is not None:
1103                     value *= -1
1104             # Do maths
1105             offset_key = mdict['maths']
1106             if offset_key:
1107                 value = float_or_none(value)
1108                 operator = None
1109                 while offset_key:
1110                     item = re.match(
1111                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1112                         offset_key).group(0)
1113                     offset_key = offset_key[len(item):]
1114                     if operator is None:
1115                         operator = MATH_FUNCTIONS[item]
1116                         continue
1117                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1118                     offset = float_or_none(item)
1119                     if offset is None:
1120                         offset = float_or_none(_traverse_infodict(item))
1121                     try:
1122                         value = operator(value, multiplier * offset)
1123                     except (TypeError, ZeroDivisionError):
1124                         return None
1125                     operator = None
1126             # Datetime formatting
1127             if mdict['strf_format']:
1128                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1129
1130             return value
1131
1132         na = self.params.get('outtmpl_na_placeholder', 'NA')
1133
1134         def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1135             return sanitize_filename(str(value), restricted=restricted,
1136                                      is_id=re.search(r'(^|[_.])id(\.|$)', key))
1137
1138         sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1139         sanitize = bool(sanitize)
1140
1141         def _dumpjson_default(obj):
1142             if isinstance(obj, (set, LazyList)):
1143                 return list(obj)
1144             return repr(obj)
1145
1146         def create_key(outer_mobj):
1147             if not outer_mobj.group('has_key'):
1148                 return outer_mobj.group(0)
1149             key = outer_mobj.group('key')
1150             mobj = re.match(INTERNAL_FORMAT_RE, key)
1151             initial_field = mobj.group('fields') if mobj else ''
1152             value, replacement, default = None, None, na
1153             while mobj:
1154                 mobj = mobj.groupdict()
1155                 default = mobj['default'] if mobj['default'] is not None else default
1156                 value = get_value(mobj)
1157                 replacement = mobj['replacement']
1158                 if value is None and mobj['alternate']:
1159                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1160                 else:
1161                     break
1162
1163             fmt = outer_mobj.group('format')
1164             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1165                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1166
1167             value = default if value is None else value if replacement is None else replacement
1168
1169             flags = outer_mobj.group('conversion') or ''
1170             str_fmt = f'{fmt[:-1]}s'
1171             if fmt[-1] == 'l':  # list
1172                 delim = '\n' if '#' in flags else ', '
1173                 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1174             elif fmt[-1] == 'j':  # json
1175                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1176             elif fmt[-1] == 'q':  # quoted
1177                 value = map(str, variadic(value) if '#' in flags else [value])
1178                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1179             elif fmt[-1] == 'B':  # bytes
1180                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1181                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1182             elif fmt[-1] == 'U':  # unicode normalized
1183                 value, fmt = unicodedata.normalize(
1184                     # "+" = compatibility equivalence, "#" = NFD
1185                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1186                     value), str_fmt
1187             elif fmt[-1] == 'D':  # decimal suffix
1188                 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1189                 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1190                                               factor=1024 if '#' in flags else 1000)
1191             elif fmt[-1] == 'S':  # filename sanitization
1192                 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1193             elif fmt[-1] == 'c':
1194                 if value:
1195                     value = str(value)[0]
1196                 else:
1197                     fmt = str_fmt
1198             elif fmt[-1] not in 'rs':  # numeric
1199                 value = float_or_none(value)
1200                 if value is None:
1201                     value, fmt = default, 's'
1202
1203             if sanitize:
1204                 if fmt[-1] == 'r':
1205                     # If value is an object, sanitize might convert it to a string
1206                     # So we convert it to repr first
1207                     value, fmt = repr(value), str_fmt
1208                 if fmt[-1] in 'csr':
1209                     value = sanitizer(initial_field, value)
1210
1211             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1212             TMPL_DICT[key] = value
1213             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1214
1215         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1216
1217     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1218         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1219         return self.escape_outtmpl(outtmpl) % info_dict
1220
1221     def _prepare_filename(self, info_dict, tmpl_type='default'):
1222         try:
1223             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1224             filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1225             if not filename:
1226                 return None
1227
1228             if tmpl_type in ('default', 'temp'):
1229                 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1230                 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1231                     filename = replace_extension(filename, ext, final_ext)
1232             else:
1233                 force_ext = OUTTMPL_TYPES[tmpl_type]
1234                 if force_ext:
1235                     filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1236
1237             # https://github.com/blackjack4494/youtube-dlc/issues/85
1238             trim_file_name = self.params.get('trim_file_name', False)
1239             if trim_file_name:
1240                 no_ext, *ext = filename.rsplit('.', 2)
1241                 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1242
1243             return filename
1244         except ValueError as err:
1245             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1246             return None
1247
1248     def prepare_filename(self, info_dict, dir_type='', warn=False):
1249         """Generate the output filename."""
1250
1251         filename = self._prepare_filename(info_dict, dir_type or 'default')
1252         if not filename and dir_type not in ('', 'temp'):
1253             return ''
1254
1255         if warn:
1256             if not self.params.get('paths'):
1257                 pass
1258             elif filename == '-':
1259                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1260             elif os.path.isabs(filename):
1261                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1262         if filename == '-' or not filename:
1263             return filename
1264
1265         return self.get_output_path(dir_type, filename)
1266
1267     def _match_entry(self, info_dict, incomplete=False, silent=False):
1268         """ Returns None if the file should be downloaded """
1269
1270         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1271
1272         def check_filter():
1273             if 'title' in info_dict:
1274                 # This can happen when we're just evaluating the playlist
1275                 title = info_dict['title']
1276                 matchtitle = self.params.get('matchtitle', False)
1277                 if matchtitle:
1278                     if not re.search(matchtitle, title, re.IGNORECASE):
1279                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1280                 rejecttitle = self.params.get('rejecttitle', False)
1281                 if rejecttitle:
1282                     if re.search(rejecttitle, title, re.IGNORECASE):
1283                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1284             date = info_dict.get('upload_date')
1285             if date is not None:
1286                 dateRange = self.params.get('daterange', DateRange())
1287                 if date not in dateRange:
1288                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1289             view_count = info_dict.get('view_count')
1290             if view_count is not None:
1291                 min_views = self.params.get('min_views')
1292                 if min_views is not None and view_count < min_views:
1293                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1294                 max_views = self.params.get('max_views')
1295                 if max_views is not None and view_count > max_views:
1296                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1297             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1298                 return 'Skipping "%s" because it is age restricted' % video_title
1299
1300             match_filter = self.params.get('match_filter')
1301             if match_filter is not None:
1302                 try:
1303                     ret = match_filter(info_dict, incomplete=incomplete)
1304                 except TypeError:
1305                     # For backward compatibility
1306                     ret = None if incomplete else match_filter(info_dict)
1307                 if ret is not None:
1308                     return ret
1309             return None
1310
1311         if self.in_download_archive(info_dict):
1312             reason = '%s has already been recorded in the archive' % video_title
1313             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1314         else:
1315             reason = check_filter()
1316             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1317         if reason is not None:
1318             if not silent:
1319                 self.to_screen('[download] ' + reason)
1320             if self.params.get(break_opt, False):
1321                 raise break_err()
1322         return reason
1323
1324     @staticmethod
1325     def add_extra_info(info_dict, extra_info):
1326         '''Set the keys from extra_info in info dict if they are missing'''
1327         for key, value in extra_info.items():
1328             info_dict.setdefault(key, value)
1329
1330     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1331                      process=True, force_generic_extractor=False):
1332         """
1333         Return a list with a dictionary for each video extracted.
1334
1335         Arguments:
1336         url -- URL to extract
1337
1338         Keyword arguments:
1339         download -- whether to download videos during extraction
1340         ie_key -- extractor key hint
1341         extra_info -- dictionary containing the extra values to add to each result
1342         process -- whether to resolve all unresolved references (URLs, playlist items),
1343             must be True for download to work.
1344         force_generic_extractor -- force using the generic extractor
1345         """
1346
1347         if extra_info is None:
1348             extra_info = {}
1349
1350         if not ie_key and force_generic_extractor:
1351             ie_key = 'Generic'
1352
1353         if ie_key:
1354             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1355         else:
1356             ies = self._ies
1357
1358         for ie_key, ie in ies.items():
1359             if not ie.suitable(url):
1360                 continue
1361
1362             if not ie.working():
1363                 self.report_warning('The program functionality for this site has been marked as broken, '
1364                                     'and will probably not work.')
1365
1366             temp_id = ie.get_temp_id(url)
1367             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1368                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1369                 if self.params.get('break_on_existing', False):
1370                     raise ExistingVideoReached()
1371                 break
1372             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1373         else:
1374             self.report_error('no suitable InfoExtractor for URL %s' % url)
1375
1376     def __handle_extraction_exceptions(func):
1377         @functools.wraps(func)
1378         def wrapper(self, *args, **kwargs):
1379             while True:
1380                 try:
1381                     return func(self, *args, **kwargs)
1382                 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1383                     raise
1384                 except ReExtractInfo as e:
1385                     if e.expected:
1386                         self.to_screen(f'{e}; Re-extracting data')
1387                     else:
1388                         self.to_stderr('\r')
1389                         self.report_warning(f'{e}; Re-extracting data')
1390                     continue
1391                 except GeoRestrictedError as e:
1392                     msg = e.msg
1393                     if e.countries:
1394                         msg += '\nThis video is available in %s.' % ', '.join(
1395                             map(ISO3166Utils.short2full, e.countries))
1396                     msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1397                     self.report_error(msg)
1398                 except ExtractorError as e:  # An error we somewhat expected
1399                     self.report_error(str(e), e.format_traceback())
1400                 except Exception as e:
1401                     if self.params.get('ignoreerrors'):
1402                         self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1403                     else:
1404                         raise
1405                 break
1406         return wrapper
1407
1408     def _wait_for_video(self, ie_result):
1409         if (not self.params.get('wait_for_video')
1410                 or ie_result.get('_type', 'video') != 'video'
1411                 or ie_result.get('formats') or ie_result.get('url')):
1412             return
1413
1414         format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1415         last_msg = ''
1416
1417         def progress(msg):
1418             nonlocal last_msg
1419             self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1420             last_msg = msg
1421
1422         min_wait, max_wait = self.params.get('wait_for_video')
1423         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1424         if diff is None and ie_result.get('live_status') == 'is_upcoming':
1425             diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1426             self.report_warning('Release time of video is not known')
1427         elif (diff or 0) <= 0:
1428             self.report_warning('Video should already be available according to extracted info')
1429         diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1430         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1431
1432         wait_till = time.time() + diff
1433         try:
1434             while True:
1435                 diff = wait_till - time.time()
1436                 if diff <= 0:
1437                     progress('')
1438                     raise ReExtractInfo('[wait] Wait period ended', expected=True)
1439                 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1440                 time.sleep(1)
1441         except KeyboardInterrupt:
1442             progress('')
1443             raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1444         except BaseException as e:
1445             if not isinstance(e, ReExtractInfo):
1446                 self.to_screen('')
1447             raise
1448
1449     @__handle_extraction_exceptions
1450     def __extract_info(self, url, ie, download, extra_info, process):
1451         ie_result = ie.extract(url)
1452         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1453             return
1454         if isinstance(ie_result, list):
1455             # Backwards compatibility: old IE result format
1456             ie_result = {
1457                 '_type': 'compat_list',
1458                 'entries': ie_result,
1459             }
1460         if extra_info.get('original_url'):
1461             ie_result.setdefault('original_url', extra_info['original_url'])
1462         self.add_default_extra_info(ie_result, ie, url)
1463         if process:
1464             self._wait_for_video(ie_result)
1465             return self.process_ie_result(ie_result, download, extra_info)
1466         else:
1467             return ie_result
1468
1469     def add_default_extra_info(self, ie_result, ie, url):
1470         if url is not None:
1471             self.add_extra_info(ie_result, {
1472                 'webpage_url': url,
1473                 'original_url': url,
1474                 'webpage_url_basename': url_basename(url),
1475                 'webpage_url_domain': get_domain(url),
1476             })
1477         if ie is not None:
1478             self.add_extra_info(ie_result, {
1479                 'extractor': ie.IE_NAME,
1480                 'extractor_key': ie.ie_key(),
1481             })
1482
1483     def process_ie_result(self, ie_result, download=True, extra_info=None):
1484         """
1485         Take the result of the ie(may be modified) and resolve all unresolved
1486         references (URLs, playlist items).
1487
1488         It will also download the videos if 'download'.
1489         Returns the resolved ie_result.
1490         """
1491         if extra_info is None:
1492             extra_info = {}
1493         result_type = ie_result.get('_type', 'video')
1494
1495         if result_type in ('url', 'url_transparent'):
1496             ie_result['url'] = sanitize_url(ie_result['url'])
1497             if ie_result.get('original_url'):
1498                 extra_info.setdefault('original_url', ie_result['original_url'])
1499
1500             extract_flat = self.params.get('extract_flat', False)
1501             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1502                     or extract_flat is True):
1503                 info_copy = ie_result.copy()
1504                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1505                 if ie and not ie_result.get('id'):
1506                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1507                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1508                 self.add_extra_info(info_copy, extra_info)
1509                 info_copy, _ = self.pre_process(info_copy)
1510                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1511                 if self.params.get('force_write_download_archive', False):
1512                     self.record_download_archive(info_copy)
1513                 return ie_result
1514
1515         if result_type == 'video':
1516             self.add_extra_info(ie_result, extra_info)
1517             ie_result = self.process_video_result(ie_result, download=download)
1518             additional_urls = (ie_result or {}).get('additional_urls')
1519             if additional_urls:
1520                 # TODO: Improve MetadataParserPP to allow setting a list
1521                 if isinstance(additional_urls, compat_str):
1522                     additional_urls = [additional_urls]
1523                 self.to_screen(
1524                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1525                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1526                 ie_result['additional_entries'] = [
1527                     self.extract_info(
1528                         url, download, extra_info=extra_info,
1529                         force_generic_extractor=self.params.get('force_generic_extractor'))
1530                     for url in additional_urls
1531                 ]
1532             return ie_result
1533         elif result_type == 'url':
1534             # We have to add extra_info to the results because it may be
1535             # contained in a playlist
1536             return self.extract_info(
1537                 ie_result['url'], download,
1538                 ie_key=ie_result.get('ie_key'),
1539                 extra_info=extra_info)
1540         elif result_type == 'url_transparent':
1541             # Use the information from the embedding page
1542             info = self.extract_info(
1543                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1544                 extra_info=extra_info, download=False, process=False)
1545
1546             # extract_info may return None when ignoreerrors is enabled and
1547             # extraction failed with an error, don't crash and return early
1548             # in this case
1549             if not info:
1550                 return info
1551
1552             force_properties = dict(
1553                 (k, v) for k, v in ie_result.items() if v is not None)
1554             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1555                 if f in force_properties:
1556                     del force_properties[f]
1557             new_result = info.copy()
1558             new_result.update(force_properties)
1559
1560             # Extracted info may not be a video result (i.e.
1561             # info.get('_type', 'video') != video) but rather an url or
1562             # url_transparent. In such cases outer metadata (from ie_result)
1563             # should be propagated to inner one (info). For this to happen
1564             # _type of info should be overridden with url_transparent. This
1565             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1566             if new_result.get('_type') == 'url':
1567                 new_result['_type'] = 'url_transparent'
1568
1569             return self.process_ie_result(
1570                 new_result, download=download, extra_info=extra_info)
1571         elif result_type in ('playlist', 'multi_video'):
1572             # Protect from infinite recursion due to recursively nested playlists
1573             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1574             webpage_url = ie_result['webpage_url']
1575             if webpage_url in self._playlist_urls:
1576                 self.to_screen(
1577                     '[download] Skipping already downloaded playlist: %s'
1578                     % ie_result.get('title') or ie_result.get('id'))
1579                 return
1580
1581             self._playlist_level += 1
1582             self._playlist_urls.add(webpage_url)
1583             self._sanitize_thumbnails(ie_result)
1584             try:
1585                 return self.__process_playlist(ie_result, download)
1586             finally:
1587                 self._playlist_level -= 1
1588                 if not self._playlist_level:
1589                     self._playlist_urls.clear()
1590         elif result_type == 'compat_list':
1591             self.report_warning(
1592                 'Extractor %s returned a compat_list result. '
1593                 'It needs to be updated.' % ie_result.get('extractor'))
1594
1595             def _fixup(r):
1596                 self.add_extra_info(r, {
1597                     'extractor': ie_result['extractor'],
1598                     'webpage_url': ie_result['webpage_url'],
1599                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1600                     'webpage_url_domain': get_domain(ie_result['webpage_url']),
1601                     'extractor_key': ie_result['extractor_key'],
1602                 })
1603                 return r
1604             ie_result['entries'] = [
1605                 self.process_ie_result(_fixup(r), download, extra_info)
1606                 for r in ie_result['entries']
1607             ]
1608             return ie_result
1609         else:
1610             raise Exception('Invalid result type: %s' % result_type)
1611
1612     def _ensure_dir_exists(self, path):
1613         return make_dir(path, self.report_error)
1614
1615     @staticmethod
1616     def _playlist_infodict(ie_result, **kwargs):
1617         return {
1618             **ie_result,
1619             'playlist': ie_result.get('title') or ie_result.get('id'),
1620             'playlist_id': ie_result.get('id'),
1621             'playlist_title': ie_result.get('title'),
1622             'playlist_uploader': ie_result.get('uploader'),
1623             'playlist_uploader_id': ie_result.get('uploader_id'),
1624             'playlist_index': 0,
1625             **kwargs,
1626         }
1627
1628     def __process_playlist(self, ie_result, download):
1629         # We process each entry in the playlist
1630         playlist = ie_result.get('title') or ie_result.get('id')
1631         self.to_screen('[download] Downloading playlist: %s' % playlist)
1632
1633         if 'entries' not in ie_result:
1634             raise EntryNotInPlaylist('There are no entries')
1635
1636         MissingEntry = object()
1637         incomplete_entries = bool(ie_result.get('requested_entries'))
1638         if incomplete_entries:
1639             def fill_missing_entries(entries, indices):
1640                 ret = [MissingEntry] * max(indices)
1641                 for i, entry in zip(indices, entries):
1642                     ret[i - 1] = entry
1643                 return ret
1644             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1645
1646         playlist_results = []
1647
1648         playliststart = self.params.get('playliststart', 1)
1649         playlistend = self.params.get('playlistend')
1650         # For backwards compatibility, interpret -1 as whole list
1651         if playlistend == -1:
1652             playlistend = None
1653
1654         playlistitems_str = self.params.get('playlist_items')
1655         playlistitems = None
1656         if playlistitems_str is not None:
1657             def iter_playlistitems(format):
1658                 for string_segment in format.split(','):
1659                     if '-' in string_segment:
1660                         start, end = string_segment.split('-')
1661                         for item in range(int(start), int(end) + 1):
1662                             yield int(item)
1663                     else:
1664                         yield int(string_segment)
1665             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1666
1667         ie_entries = ie_result['entries']
1668         if isinstance(ie_entries, list):
1669             playlist_count = len(ie_entries)
1670             msg = f'Collected {playlist_count} videos; downloading %d of them'
1671             ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1672
1673             def get_entry(i):
1674                 return ie_entries[i - 1]
1675         else:
1676             msg = 'Downloading %d videos'
1677             if not isinstance(ie_entries, (PagedList, LazyList)):
1678                 ie_entries = LazyList(ie_entries)
1679             elif isinstance(ie_entries, InAdvancePagedList):
1680                 if ie_entries._pagesize == 1:
1681                     playlist_count = ie_entries._pagecount
1682
1683             def get_entry(i):
1684                 return YoutubeDL.__handle_extraction_exceptions(
1685                     lambda self, i: ie_entries[i - 1]
1686                 )(self, i)
1687
1688         entries, broken = [], False
1689         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1690         for i in items:
1691             if i == 0:
1692                 continue
1693             if playlistitems is None and playlistend is not None and playlistend < i:
1694                 break
1695             entry = None
1696             try:
1697                 entry = get_entry(i)
1698                 if entry is MissingEntry:
1699                     raise EntryNotInPlaylist()
1700             except (IndexError, EntryNotInPlaylist):
1701                 if incomplete_entries:
1702                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1703                 elif not playlistitems:
1704                     break
1705             entries.append(entry)
1706             try:
1707                 if entry is not None:
1708                     self._match_entry(entry, incomplete=True, silent=True)
1709             except (ExistingVideoReached, RejectedVideoReached):
1710                 broken = True
1711                 break
1712         ie_result['entries'] = entries
1713
1714         # Save playlist_index before re-ordering
1715         entries = [
1716             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1717             for i, entry in enumerate(entries, 1)
1718             if entry is not None]
1719         n_entries = len(entries)
1720
1721         if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1722             ie_result['playlist_count'] = n_entries
1723
1724         if not playlistitems and (playliststart != 1 or playlistend):
1725             playlistitems = list(range(playliststart, playliststart + n_entries))
1726         ie_result['requested_entries'] = playlistitems
1727
1728         _infojson_written = False
1729         write_playlist_files = self.params.get('allow_playlist_files', True)
1730         if write_playlist_files and self.params.get('list_thumbnails'):
1731             self.list_thumbnails(ie_result)
1732         if write_playlist_files and not self.params.get('simulate'):
1733             ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1734             _infojson_written = self._write_info_json(
1735                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1736             if _infojson_written is None:
1737                 return
1738             if self._write_description('playlist', ie_result,
1739                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1740                 return
1741             # TODO: This should be passed to ThumbnailsConvertor if necessary
1742             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1743
1744         if self.params.get('playlistreverse', False):
1745             entries = entries[::-1]
1746         if self.params.get('playlistrandom', False):
1747             random.shuffle(entries)
1748
1749         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1750
1751         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1752         failures = 0
1753         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1754         for i, entry_tuple in enumerate(entries, 1):
1755             playlist_index, entry = entry_tuple
1756             if 'playlist-index' in self.params.get('compat_opts', []):
1757                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1758             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1759             # This __x_forwarded_for_ip thing is a bit ugly but requires
1760             # minimal changes
1761             if x_forwarded_for:
1762                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1763             extra = {
1764                 'n_entries': n_entries,
1765                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1766                 'playlist_count': ie_result.get('playlist_count'),
1767                 'playlist_index': playlist_index,
1768                 'playlist_autonumber': i,
1769                 'playlist': playlist,
1770                 'playlist_id': ie_result.get('id'),
1771                 'playlist_title': ie_result.get('title'),
1772                 'playlist_uploader': ie_result.get('uploader'),
1773                 'playlist_uploader_id': ie_result.get('uploader_id'),
1774                 'extractor': ie_result['extractor'],
1775                 'webpage_url': ie_result['webpage_url'],
1776                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1777                 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1778                 'extractor_key': ie_result['extractor_key'],
1779             }
1780
1781             if self._match_entry(entry, incomplete=True) is not None:
1782                 continue
1783
1784             entry_result = self.__process_iterable_entry(entry, download, extra)
1785             if not entry_result:
1786                 failures += 1
1787             if failures >= max_failures:
1788                 self.report_error(
1789                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1790                 break
1791             playlist_results.append(entry_result)
1792         ie_result['entries'] = playlist_results
1793
1794         # Write the updated info to json
1795         if _infojson_written and self._write_info_json(
1796                 'updated playlist', ie_result,
1797                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1798             return
1799
1800         ie_result = self.run_all_pps('playlist', ie_result)
1801         self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1802         return ie_result
1803
1804     @__handle_extraction_exceptions
1805     def __process_iterable_entry(self, entry, download, extra_info):
1806         return self.process_ie_result(
1807             entry, download=download, extra_info=extra_info)
1808
1809     def _build_format_filter(self, filter_spec):
1810         " Returns a function to filter the formats according to the filter_spec "
1811
1812         OPERATORS = {
1813             '<': operator.lt,
1814             '<=': operator.le,
1815             '>': operator.gt,
1816             '>=': operator.ge,
1817             '=': operator.eq,
1818             '!=': operator.ne,
1819         }
1820         operator_rex = re.compile(r'''(?x)\s*
1821             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1822             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1823             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1824             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1825         m = operator_rex.fullmatch(filter_spec)
1826         if m:
1827             try:
1828                 comparison_value = int(m.group('value'))
1829             except ValueError:
1830                 comparison_value = parse_filesize(m.group('value'))
1831                 if comparison_value is None:
1832                     comparison_value = parse_filesize(m.group('value') + 'B')
1833                 if comparison_value is None:
1834                     raise ValueError(
1835                         'Invalid value %r in format specification %r' % (
1836                             m.group('value'), filter_spec))
1837             op = OPERATORS[m.group('op')]
1838
1839         if not m:
1840             STR_OPERATORS = {
1841                 '=': operator.eq,
1842                 '^=': lambda attr, value: attr.startswith(value),
1843                 '$=': lambda attr, value: attr.endswith(value),
1844                 '*=': lambda attr, value: value in attr,
1845             }
1846             str_operator_rex = re.compile(r'''(?x)\s*
1847                 (?P<key>[a-zA-Z0-9._-]+)\s*
1848                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1849                 (?P<value>[a-zA-Z0-9._-]+)\s*
1850                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1851             m = str_operator_rex.fullmatch(filter_spec)
1852             if m:
1853                 comparison_value = m.group('value')
1854                 str_op = STR_OPERATORS[m.group('op')]
1855                 if m.group('negation'):
1856                     op = lambda attr, value: not str_op(attr, value)
1857                 else:
1858                     op = str_op
1859
1860         if not m:
1861             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1862
1863         def _filter(f):
1864             actual_value = f.get(m.group('key'))
1865             if actual_value is None:
1866                 return m.group('none_inclusive')
1867             return op(actual_value, comparison_value)
1868         return _filter
1869
1870     def _check_formats(self, formats):
1871         for f in formats:
1872             self.to_screen('[info] Testing format %s' % f['format_id'])
1873             path = self.get_output_path('temp')
1874             if not self._ensure_dir_exists(f'{path}/'):
1875                 continue
1876             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1877             temp_file.close()
1878             try:
1879                 success, _ = self.dl(temp_file.name, f, test=True)
1880             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1881                 success = False
1882             finally:
1883                 if os.path.exists(temp_file.name):
1884                     try:
1885                         os.remove(temp_file.name)
1886                     except OSError:
1887                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1888             if success:
1889                 yield f
1890             else:
1891                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1892
1893     def _default_format_spec(self, info_dict, download=True):
1894
1895         def can_merge():
1896             merger = FFmpegMergerPP(self)
1897             return merger.available and merger.can_merge()
1898
1899         prefer_best = (
1900             not self.params.get('simulate')
1901             and download
1902             and (
1903                 not can_merge()
1904                 or info_dict.get('is_live', False)
1905                 or self.outtmpl_dict['default'] == '-'))
1906         compat = (
1907             prefer_best
1908             or self.params.get('allow_multiple_audio_streams', False)
1909             or 'format-spec' in self.params.get('compat_opts', []))
1910
1911         return (
1912             'best/bestvideo+bestaudio' if prefer_best
1913             else 'bestvideo*+bestaudio/best' if not compat
1914             else 'bestvideo+bestaudio/best')
1915
1916     def build_format_selector(self, format_spec):
1917         def syntax_error(note, start):
1918             message = (
1919                 'Invalid format specification: '
1920                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1921             return SyntaxError(message)
1922
1923         PICKFIRST = 'PICKFIRST'
1924         MERGE = 'MERGE'
1925         SINGLE = 'SINGLE'
1926         GROUP = 'GROUP'
1927         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1928
1929         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1930                                   'video': self.params.get('allow_multiple_video_streams', False)}
1931
1932         check_formats = self.params.get('check_formats') == 'selected'
1933
1934         def _parse_filter(tokens):
1935             filter_parts = []
1936             for type, string, start, _, _ in tokens:
1937                 if type == tokenize.OP and string == ']':
1938                     return ''.join(filter_parts)
1939                 else:
1940                     filter_parts.append(string)
1941
1942         def _remove_unused_ops(tokens):
1943             # Remove operators that we don't use and join them with the surrounding strings
1944             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1945             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1946             last_string, last_start, last_end, last_line = None, None, None, None
1947             for type, string, start, end, line in tokens:
1948                 if type == tokenize.OP and string == '[':
1949                     if last_string:
1950                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1951                         last_string = None
1952                     yield type, string, start, end, line
1953                     # everything inside brackets will be handled by _parse_filter
1954                     for type, string, start, end, line in tokens:
1955                         yield type, string, start, end, line
1956                         if type == tokenize.OP and string == ']':
1957                             break
1958                 elif type == tokenize.OP and string in ALLOWED_OPS:
1959                     if last_string:
1960                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1961                         last_string = None
1962                     yield type, string, start, end, line
1963                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1964                     if not last_string:
1965                         last_string = string
1966                         last_start = start
1967                         last_end = end
1968                     else:
1969                         last_string += string
1970             if last_string:
1971                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1972
1973         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1974             selectors = []
1975             current_selector = None
1976             for type, string, start, _, _ in tokens:
1977                 # ENCODING is only defined in python 3.x
1978                 if type == getattr(tokenize, 'ENCODING', None):
1979                     continue
1980                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1981                     current_selector = FormatSelector(SINGLE, string, [])
1982                 elif type == tokenize.OP:
1983                     if string == ')':
1984                         if not inside_group:
1985                             # ')' will be handled by the parentheses group
1986                             tokens.restore_last_token()
1987                         break
1988                     elif inside_merge and string in ['/', ',']:
1989                         tokens.restore_last_token()
1990                         break
1991                     elif inside_choice and string == ',':
1992                         tokens.restore_last_token()
1993                         break
1994                     elif string == ',':
1995                         if not current_selector:
1996                             raise syntax_error('"," must follow a format selector', start)
1997                         selectors.append(current_selector)
1998                         current_selector = None
1999                     elif string == '/':
2000                         if not current_selector:
2001                             raise syntax_error('"/" must follow a format selector', start)
2002                         first_choice = current_selector
2003                         second_choice = _parse_format_selection(tokens, inside_choice=True)
2004                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2005                     elif string == '[':
2006                         if not current_selector:
2007                             current_selector = FormatSelector(SINGLE, 'best', [])
2008                         format_filter = _parse_filter(tokens)
2009                         current_selector.filters.append(format_filter)
2010                     elif string == '(':
2011                         if current_selector:
2012                             raise syntax_error('Unexpected "("', start)
2013                         group = _parse_format_selection(tokens, inside_group=True)
2014                         current_selector = FormatSelector(GROUP, group, [])
2015                     elif string == '+':
2016                         if not current_selector:
2017                             raise syntax_error('Unexpected "+"', start)
2018                         selector_1 = current_selector
2019                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
2020                         if not selector_2:
2021                             raise syntax_error('Expected a selector', start)
2022                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2023                     else:
2024                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2025                 elif type == tokenize.ENDMARKER:
2026                     break
2027             if current_selector:
2028                 selectors.append(current_selector)
2029             return selectors
2030
2031         def _merge(formats_pair):
2032             format_1, format_2 = formats_pair
2033
2034             formats_info = []
2035             formats_info.extend(format_1.get('requested_formats', (format_1,)))
2036             formats_info.extend(format_2.get('requested_formats', (format_2,)))
2037
2038             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2039                 get_no_more = {'video': False, 'audio': False}
2040                 for (i, fmt_info) in enumerate(formats_info):
2041                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2042                         formats_info.pop(i)
2043                         continue
2044                     for aud_vid in ['audio', 'video']:
2045                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2046                             if get_no_more[aud_vid]:
2047                                 formats_info.pop(i)
2048                                 break
2049                             get_no_more[aud_vid] = True
2050
2051             if len(formats_info) == 1:
2052                 return formats_info[0]
2053
2054             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2055             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2056
2057             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2058             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2059
2060             output_ext = self.params.get('merge_output_format')
2061             if not output_ext:
2062                 if the_only_video:
2063                     output_ext = the_only_video['ext']
2064                 elif the_only_audio and not video_fmts:
2065                     output_ext = the_only_audio['ext']
2066                 else:
2067                     output_ext = 'mkv'
2068
2069             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2070
2071             new_dict = {
2072                 'requested_formats': formats_info,
2073                 'format': '+'.join(filtered('format')),
2074                 'format_id': '+'.join(filtered('format_id')),
2075                 'ext': output_ext,
2076                 'protocol': '+'.join(map(determine_protocol, formats_info)),
2077                 'language': '+'.join(orderedSet(filtered('language'))) or None,
2078                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2079                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2080                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2081             }
2082
2083             if the_only_video:
2084                 new_dict.update({
2085                     'width': the_only_video.get('width'),
2086                     'height': the_only_video.get('height'),
2087                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2088                     'fps': the_only_video.get('fps'),
2089                     'dynamic_range': the_only_video.get('dynamic_range'),
2090                     'vcodec': the_only_video.get('vcodec'),
2091                     'vbr': the_only_video.get('vbr'),
2092                     'stretched_ratio': the_only_video.get('stretched_ratio'),
2093                 })
2094
2095             if the_only_audio:
2096                 new_dict.update({
2097                     'acodec': the_only_audio.get('acodec'),
2098                     'abr': the_only_audio.get('abr'),
2099                     'asr': the_only_audio.get('asr'),
2100                 })
2101
2102             return new_dict
2103
2104         def _check_formats(formats):
2105             if not check_formats:
2106                 yield from formats
2107                 return
2108             yield from self._check_formats(formats)
2109
2110         def _build_selector_function(selector):
2111             if isinstance(selector, list):  # ,
2112                 fs = [_build_selector_function(s) for s in selector]
2113
2114                 def selector_function(ctx):
2115                     for f in fs:
2116                         yield from f(ctx)
2117                 return selector_function
2118
2119             elif selector.type == GROUP:  # ()
2120                 selector_function = _build_selector_function(selector.selector)
2121
2122             elif selector.type == PICKFIRST:  # /
2123                 fs = [_build_selector_function(s) for s in selector.selector]
2124
2125                 def selector_function(ctx):
2126                     for f in fs:
2127                         picked_formats = list(f(ctx))
2128                         if picked_formats:
2129                             return picked_formats
2130                     return []
2131
2132             elif selector.type == MERGE:  # +
2133                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2134
2135                 def selector_function(ctx):
2136                     for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2137                         yield _merge(pair)
2138
2139             elif selector.type == SINGLE:  # atom
2140                 format_spec = selector.selector or 'best'
2141
2142                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2143                 if format_spec == 'all':
2144                     def selector_function(ctx):
2145                         yield from _check_formats(ctx['formats'][::-1])
2146                 elif format_spec == 'mergeall':
2147                     def selector_function(ctx):
2148                         formats = list(_check_formats(ctx['formats']))
2149                         if not formats:
2150                             return
2151                         merged_format = formats[-1]
2152                         for f in formats[-2::-1]:
2153                             merged_format = _merge((merged_format, f))
2154                         yield merged_format
2155
2156                 else:
2157                     format_fallback, format_reverse, format_idx = False, True, 1
2158                     mobj = re.match(
2159                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2160                         format_spec)
2161                     if mobj is not None:
2162                         format_idx = int_or_none(mobj.group('n'), default=1)
2163                         format_reverse = mobj.group('bw')[0] == 'b'
2164                         format_type = (mobj.group('type') or [None])[0]
2165                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2166                         format_modified = mobj.group('mod') is not None
2167
2168                         format_fallback = not format_type and not format_modified  # for b, w
2169                         _filter_f = (
2170                             (lambda f: f.get('%scodec' % format_type) != 'none')
2171                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2172                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2173                             if format_type  # bv, ba, wv, wa
2174                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2175                             if not format_modified  # b, w
2176                             else lambda f: True)  # b*, w*
2177                         filter_f = lambda f: _filter_f(f) and (
2178                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2179                     else:
2180                         if format_spec in self._format_selection_exts['audio']:
2181                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2182                         elif format_spec in self._format_selection_exts['video']:
2183                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2184                         elif format_spec in self._format_selection_exts['storyboards']:
2185                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2186                         else:
2187                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2188
2189                     def selector_function(ctx):
2190                         formats = list(ctx['formats'])
2191                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2192                         if format_fallback and ctx['incomplete_formats'] and not matches:
2193                             # for extractors with incomplete formats (audio only (soundcloud)
2194                             # or video only (imgur)) best/worst will fallback to
2195                             # best/worst {video,audio}-only format
2196                             matches = formats
2197                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2198                         try:
2199                             yield matches[format_idx - 1]
2200                         except IndexError:
2201                             return
2202
2203             filters = [self._build_format_filter(f) for f in selector.filters]
2204
2205             def final_selector(ctx):
2206                 ctx_copy = dict(ctx)
2207                 for _filter in filters:
2208                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2209                 return selector_function(ctx_copy)
2210             return final_selector
2211
2212         stream = io.BytesIO(format_spec.encode('utf-8'))
2213         try:
2214             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2215         except tokenize.TokenError:
2216             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2217
2218         class TokenIterator(object):
2219             def __init__(self, tokens):
2220                 self.tokens = tokens
2221                 self.counter = 0
2222
2223             def __iter__(self):
2224                 return self
2225
2226             def __next__(self):
2227                 if self.counter >= len(self.tokens):
2228                     raise StopIteration()
2229                 value = self.tokens[self.counter]
2230                 self.counter += 1
2231                 return value
2232
2233             next = __next__
2234
2235             def restore_last_token(self):
2236                 self.counter -= 1
2237
2238         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2239         return _build_selector_function(parsed_selector)
2240
2241     def _calc_headers(self, info_dict):
2242         res = std_headers.copy()
2243
2244         add_headers = info_dict.get('http_headers')
2245         if add_headers:
2246             res.update(add_headers)
2247
2248         cookies = self._calc_cookies(info_dict)
2249         if cookies:
2250             res['Cookie'] = cookies
2251
2252         if 'X-Forwarded-For' not in res:
2253             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2254             if x_forwarded_for_ip:
2255                 res['X-Forwarded-For'] = x_forwarded_for_ip
2256
2257         return res
2258
2259     def _calc_cookies(self, info_dict):
2260         pr = sanitized_Request(info_dict['url'])
2261         self.cookiejar.add_cookie_header(pr)
2262         return pr.get_header('Cookie')
2263
2264     def _sort_thumbnails(self, thumbnails):
2265         thumbnails.sort(key=lambda t: (
2266             t.get('preference') if t.get('preference') is not None else -1,
2267             t.get('width') if t.get('width') is not None else -1,
2268             t.get('height') if t.get('height') is not None else -1,
2269             t.get('id') if t.get('id') is not None else '',
2270             t.get('url')))
2271
2272     def _sanitize_thumbnails(self, info_dict):
2273         thumbnails = info_dict.get('thumbnails')
2274         if thumbnails is None:
2275             thumbnail = info_dict.get('thumbnail')
2276             if thumbnail:
2277                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2278         if not thumbnails:
2279             return
2280
2281         def check_thumbnails(thumbnails):
2282             for t in thumbnails:
2283                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2284                 try:
2285                     self.urlopen(HEADRequest(t['url']))
2286                 except network_exceptions as err:
2287                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2288                     continue
2289                 yield t
2290
2291         self._sort_thumbnails(thumbnails)
2292         for i, t in enumerate(thumbnails):
2293             if t.get('id') is None:
2294                 t['id'] = '%d' % i
2295             if t.get('width') and t.get('height'):
2296                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2297             t['url'] = sanitize_url(t['url'])
2298
2299         if self.params.get('check_formats') is True:
2300             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2301         else:
2302             info_dict['thumbnails'] = thumbnails
2303
2304     def process_video_result(self, info_dict, download=True):
2305         assert info_dict.get('_type', 'video') == 'video'
2306         self._num_videos += 1
2307
2308         if 'id' not in info_dict:
2309             raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2310         elif not info_dict.get('id'):
2311             raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2312         if 'title' not in info_dict:
2313             raise ExtractorError('Missing "title" field in extractor result',
2314                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2315         elif not info_dict.get('title'):
2316             self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2317             info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2318
2319         def report_force_conversion(field, field_not, conversion):
2320             self.report_warning(
2321                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2322                 % (field, field_not, conversion))
2323
2324         def sanitize_string_field(info, string_field):
2325             field = info.get(string_field)
2326             if field is None or isinstance(field, compat_str):
2327                 return
2328             report_force_conversion(string_field, 'a string', 'string')
2329             info[string_field] = compat_str(field)
2330
2331         def sanitize_numeric_fields(info):
2332             for numeric_field in self._NUMERIC_FIELDS:
2333                 field = info.get(numeric_field)
2334                 if field is None or isinstance(field, compat_numeric_types):
2335                     continue
2336                 report_force_conversion(numeric_field, 'numeric', 'int')
2337                 info[numeric_field] = int_or_none(field)
2338
2339         sanitize_string_field(info_dict, 'id')
2340         sanitize_numeric_fields(info_dict)
2341
2342         if 'playlist' not in info_dict:
2343             # It isn't part of a playlist
2344             info_dict['playlist'] = None
2345             info_dict['playlist_index'] = None
2346
2347         self._sanitize_thumbnails(info_dict)
2348
2349         thumbnail = info_dict.get('thumbnail')
2350         thumbnails = info_dict.get('thumbnails')
2351         if thumbnail:
2352             info_dict['thumbnail'] = sanitize_url(thumbnail)
2353         elif thumbnails:
2354             info_dict['thumbnail'] = thumbnails[-1]['url']
2355
2356         if info_dict.get('display_id') is None and 'id' in info_dict:
2357             info_dict['display_id'] = info_dict['id']
2358
2359         if info_dict.get('duration') is not None:
2360             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2361
2362         for ts_key, date_key in (
2363                 ('timestamp', 'upload_date'),
2364                 ('release_timestamp', 'release_date'),
2365                 ('modified_timestamp', 'modified_date'),
2366         ):
2367             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2368                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2369                 # see http://bugs.python.org/issue1646728)
2370                 try:
2371                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2372                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2373                 except (ValueError, OverflowError, OSError):
2374                     pass
2375
2376         live_keys = ('is_live', 'was_live')
2377         live_status = info_dict.get('live_status')
2378         if live_status is None:
2379             for key in live_keys:
2380                 if info_dict.get(key) is False:
2381                     continue
2382                 if info_dict.get(key):
2383                     live_status = key
2384                 break
2385             if all(info_dict.get(key) is False for key in live_keys):
2386                 live_status = 'not_live'
2387         if live_status:
2388             info_dict['live_status'] = live_status
2389             for key in live_keys:
2390                 if info_dict.get(key) is None:
2391                     info_dict[key] = (live_status == key)
2392
2393         # Auto generate title fields corresponding to the *_number fields when missing
2394         # in order to always have clean titles. This is very common for TV series.
2395         for field in ('chapter', 'season', 'episode'):
2396             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2397                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2398
2399         for cc_kind in ('subtitles', 'automatic_captions'):
2400             cc = info_dict.get(cc_kind)
2401             if cc:
2402                 for _, subtitle in cc.items():
2403                     for subtitle_format in subtitle:
2404                         if subtitle_format.get('url'):
2405                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2406                         if subtitle_format.get('ext') is None:
2407                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2408
2409         automatic_captions = info_dict.get('automatic_captions')
2410         subtitles = info_dict.get('subtitles')
2411
2412         info_dict['requested_subtitles'] = self.process_subtitles(
2413             info_dict['id'], subtitles, automatic_captions)
2414
2415         if info_dict.get('formats') is None:
2416             # There's only one format available
2417             formats = [info_dict]
2418         else:
2419             formats = info_dict['formats']
2420
2421         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2422         if not self.params.get('allow_unplayable_formats'):
2423             formats = [f for f in formats if not f.get('has_drm')]
2424
2425         # backward compatibility
2426         info_dict['fulltitle'] = info_dict['title']
2427
2428         if info_dict.get('is_live'):
2429             get_from_start = bool(self.params.get('live_from_start'))
2430             formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2431             if not get_from_start:
2432                 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2433
2434         if not formats:
2435             self.raise_no_formats(info_dict)
2436
2437         def is_wellformed(f):
2438             url = f.get('url')
2439             if not url:
2440                 self.report_warning(
2441                     '"url" field is missing or empty - skipping format, '
2442                     'there is an error in extractor')
2443                 return False
2444             if isinstance(url, bytes):
2445                 sanitize_string_field(f, 'url')
2446             return True
2447
2448         # Filter out malformed formats for better extraction robustness
2449         formats = list(filter(is_wellformed, formats))
2450
2451         formats_dict = {}
2452
2453         # We check that all the formats have the format and format_id fields
2454         for i, format in enumerate(formats):
2455             sanitize_string_field(format, 'format_id')
2456             sanitize_numeric_fields(format)
2457             format['url'] = sanitize_url(format['url'])
2458             if not format.get('format_id'):
2459                 format['format_id'] = compat_str(i)
2460             else:
2461                 # Sanitize format_id from characters used in format selector expression
2462                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2463             format_id = format['format_id']
2464             if format_id not in formats_dict:
2465                 formats_dict[format_id] = []
2466             formats_dict[format_id].append(format)
2467
2468         # Make sure all formats have unique format_id
2469         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2470         for format_id, ambiguous_formats in formats_dict.items():
2471             ambigious_id = len(ambiguous_formats) > 1
2472             for i, format in enumerate(ambiguous_formats):
2473                 if ambigious_id:
2474                     format['format_id'] = '%s-%d' % (format_id, i)
2475                 if format.get('ext') is None:
2476                     format['ext'] = determine_ext(format['url']).lower()
2477                 # Ensure there is no conflict between id and ext in format selection
2478                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2479                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2480                     format['format_id'] = 'f%s' % format['format_id']
2481
2482         for i, format in enumerate(formats):
2483             if format.get('format') is None:
2484                 format['format'] = '{id} - {res}{note}'.format(
2485                     id=format['format_id'],
2486                     res=self.format_resolution(format),
2487                     note=format_field(format, 'format_note', ' (%s)'),
2488                 )
2489             if format.get('protocol') is None:
2490                 format['protocol'] = determine_protocol(format)
2491             if format.get('resolution') is None:
2492                 format['resolution'] = self.format_resolution(format, default=None)
2493             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2494                 format['dynamic_range'] = 'SDR'
2495             if (info_dict.get('duration') and format.get('tbr')
2496                     and not format.get('filesize') and not format.get('filesize_approx')):
2497                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2498
2499             # Add HTTP headers, so that external programs can use them from the
2500             # json output
2501             full_format_info = info_dict.copy()
2502             full_format_info.update(format)
2503             format['http_headers'] = self._calc_headers(full_format_info)
2504         # Remove private housekeeping stuff
2505         if '__x_forwarded_for_ip' in info_dict:
2506             del info_dict['__x_forwarded_for_ip']
2507
2508         # TODO Central sorting goes here
2509
2510         if self.params.get('check_formats') is True:
2511             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2512
2513         if not formats or formats[0] is not info_dict:
2514             # only set the 'formats' fields if the original info_dict list them
2515             # otherwise we end up with a circular reference, the first (and unique)
2516             # element in the 'formats' field in info_dict is info_dict itself,
2517             # which can't be exported to json
2518             info_dict['formats'] = formats
2519
2520         info_dict, _ = self.pre_process(info_dict)
2521
2522         # The pre-processors may have modified the formats
2523         formats = info_dict.get('formats', [info_dict])
2524
2525         list_only = self.params.get('simulate') is None and (
2526             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2527         interactive_format_selection = not list_only and self.format_selector == '-'
2528         if self.params.get('list_thumbnails'):
2529             self.list_thumbnails(info_dict)
2530         if self.params.get('listsubtitles'):
2531             if 'automatic_captions' in info_dict:
2532                 self.list_subtitles(
2533                     info_dict['id'], automatic_captions, 'automatic captions')
2534             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2535         if self.params.get('listformats') or interactive_format_selection:
2536             self.list_formats(info_dict)
2537         if list_only:
2538             # Without this printing, -F --print-json will not work
2539             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2540             return
2541
2542         format_selector = self.format_selector
2543         if format_selector is None:
2544             req_format = self._default_format_spec(info_dict, download=download)
2545             self.write_debug('Default format spec: %s' % req_format)
2546             format_selector = self.build_format_selector(req_format)
2547
2548         while True:
2549             if interactive_format_selection:
2550                 req_format = input(
2551                     self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2552                 try:
2553                     format_selector = self.build_format_selector(req_format)
2554                 except SyntaxError as err:
2555                     self.report_error(err, tb=False, is_error=False)
2556                     continue
2557
2558             # While in format selection we may need to have an access to the original
2559             # format set in order to calculate some metrics or do some processing.
2560             # For now we need to be able to guess whether original formats provided
2561             # by extractor are incomplete or not (i.e. whether extractor provides only
2562             # video-only or audio-only formats) for proper formats selection for
2563             # extractors with such incomplete formats (see
2564             # https://github.com/ytdl-org/youtube-dl/pull/5556).
2565             # Since formats may be filtered during format selection and may not match
2566             # the original formats the results may be incorrect. Thus original formats
2567             # or pre-calculated metrics should be passed to format selection routines
2568             # as well.
2569             # We will pass a context object containing all necessary additional data
2570             # instead of just formats.
2571             # This fixes incorrect format selection issue (see
2572             # https://github.com/ytdl-org/youtube-dl/issues/10083).
2573             incomplete_formats = (
2574                 # All formats are video-only or
2575                 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2576                 # all formats are audio-only
2577                 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2578
2579             ctx = {
2580                 'formats': formats,
2581                 'incomplete_formats': incomplete_formats,
2582             }
2583
2584             formats_to_download = list(format_selector(ctx))
2585             if interactive_format_selection and not formats_to_download:
2586                 self.report_error('Requested format is not available', tb=False, is_error=False)
2587                 continue
2588             break
2589
2590         if not formats_to_download:
2591             if not self.params.get('ignore_no_formats_error'):
2592                 raise ExtractorError('Requested format is not available', expected=True,
2593                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2594             self.report_warning('Requested format is not available')
2595             # Process what we can, even without any available formats.
2596             formats_to_download = [{}]
2597
2598         best_format = formats_to_download[-1]
2599         if download:
2600             if best_format:
2601                 self.to_screen(
2602                     f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2603                     + ', '.join([f['format_id'] for f in formats_to_download]))
2604             max_downloads_reached = False
2605             for i, fmt in enumerate(formats_to_download):
2606                 formats_to_download[i] = new_info = dict(info_dict)
2607                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2608                 new_info.update(fmt)
2609                 new_info['__original_infodict'] = info_dict
2610                 try:
2611                     self.process_info(new_info)
2612                 except MaxDownloadsReached:
2613                     max_downloads_reached = True
2614                 new_info.pop('__original_infodict')
2615                 # Remove copied info
2616                 for key, val in tuple(new_info.items()):
2617                     if info_dict.get(key) == val:
2618                         new_info.pop(key)
2619                 if max_downloads_reached:
2620                     break
2621
2622             write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2623             assert write_archive.issubset({True, False, 'ignore'})
2624             if True in write_archive and False not in write_archive:
2625                 self.record_download_archive(info_dict)
2626
2627             info_dict['requested_downloads'] = formats_to_download
2628             info_dict = self.run_all_pps('after_video', info_dict)
2629             if max_downloads_reached:
2630                 raise MaxDownloadsReached()
2631
2632         # We update the info dict with the selected best quality format (backwards compatibility)
2633         info_dict.update(best_format)
2634         return info_dict
2635
2636     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2637         """Select the requested subtitles and their format"""
2638         available_subs = {}
2639         if normal_subtitles and self.params.get('writesubtitles'):
2640             available_subs.update(normal_subtitles)
2641         if automatic_captions and self.params.get('writeautomaticsub'):
2642             for lang, cap_info in automatic_captions.items():
2643                 if lang not in available_subs:
2644                     available_subs[lang] = cap_info
2645
2646         if (not self.params.get('writesubtitles') and not
2647                 self.params.get('writeautomaticsub') or not
2648                 available_subs):
2649             return None
2650
2651         all_sub_langs = available_subs.keys()
2652         if self.params.get('allsubtitles', False):
2653             requested_langs = all_sub_langs
2654         elif self.params.get('subtitleslangs', False):
2655             # A list is used so that the order of languages will be the same as
2656             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2657             requested_langs = []
2658             for lang_re in self.params.get('subtitleslangs'):
2659                 if lang_re == 'all':
2660                     requested_langs.extend(all_sub_langs)
2661                     continue
2662                 discard = lang_re[0] == '-'
2663                 if discard:
2664                     lang_re = lang_re[1:]
2665                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2666                 if discard:
2667                     for lang in current_langs:
2668                         while lang in requested_langs:
2669                             requested_langs.remove(lang)
2670                 else:
2671                     requested_langs.extend(current_langs)
2672             requested_langs = orderedSet(requested_langs)
2673         elif 'en' in available_subs:
2674             requested_langs = ['en']
2675         else:
2676             requested_langs = [list(all_sub_langs)[0]]
2677         if requested_langs:
2678             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2679
2680         formats_query = self.params.get('subtitlesformat', 'best')
2681         formats_preference = formats_query.split('/') if formats_query else []
2682         subs = {}
2683         for lang in requested_langs:
2684             formats = available_subs.get(lang)
2685             if formats is None:
2686                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2687                 continue
2688             for ext in formats_preference:
2689                 if ext == 'best':
2690                     f = formats[-1]
2691                     break
2692                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2693                 if matches:
2694                     f = matches[-1]
2695                     break
2696             else:
2697                 f = formats[-1]
2698                 self.report_warning(
2699                     'No subtitle format found matching "%s" for language %s, '
2700                     'using %s' % (formats_query, lang, f['ext']))
2701             subs[lang] = f
2702         return subs
2703
2704     def _forceprint(self, key, info_dict):
2705         if info_dict is None:
2706             return
2707         info_copy = info_dict.copy()
2708         info_copy['formats_table'] = self.render_formats_table(info_dict)
2709         info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2710         info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2711         info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2712
2713         def format_tmpl(tmpl):
2714             mobj = re.match(r'\w+(=?)$', tmpl)
2715             if mobj and mobj.group(1):
2716                 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2717             elif mobj:
2718                 return f'%({tmpl})s'
2719             return tmpl
2720
2721         for tmpl in self.params['forceprint'].get(key, []):
2722             self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2723
2724         for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2725             filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2726             tmpl = format_tmpl(tmpl)
2727             self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2728             with io.open(filename, 'a', encoding='utf-8') as f:
2729                 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2730
2731     def __forced_printings(self, info_dict, filename, incomplete):
2732         def print_mandatory(field, actual_field=None):
2733             if actual_field is None:
2734                 actual_field = field
2735             if (self.params.get('force%s' % field, False)
2736                     and (not incomplete or info_dict.get(actual_field) is not None)):
2737                 self.to_stdout(info_dict[actual_field])
2738
2739         def print_optional(field):
2740             if (self.params.get('force%s' % field, False)
2741                     and info_dict.get(field) is not None):
2742                 self.to_stdout(info_dict[field])
2743
2744         info_dict = info_dict.copy()
2745         if filename is not None:
2746             info_dict['filename'] = filename
2747         if info_dict.get('requested_formats') is not None:
2748             # For RTMP URLs, also include the playpath
2749             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2750         elif 'url' in info_dict:
2751             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2752
2753         if (self.params.get('forcejson')
2754                 or self.params['forceprint'].get('video')
2755                 or self.params['print_to_file'].get('video')):
2756             self.post_extract(info_dict)
2757         self._forceprint('video', info_dict)
2758
2759         print_mandatory('title')
2760         print_mandatory('id')
2761         print_mandatory('url', 'urls')
2762         print_optional('thumbnail')
2763         print_optional('description')
2764         print_optional('filename')
2765         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2766             self.to_stdout(formatSeconds(info_dict['duration']))
2767         print_mandatory('format')
2768
2769         if self.params.get('forcejson'):
2770             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2771
2772     def dl(self, name, info, subtitle=False, test=False):
2773         if not info.get('url'):
2774             self.raise_no_formats(info, True)
2775
2776         if test:
2777             verbose = self.params.get('verbose')
2778             params = {
2779                 'test': True,
2780                 'quiet': self.params.get('quiet') or not verbose,
2781                 'verbose': verbose,
2782                 'noprogress': not verbose,
2783                 'nopart': True,
2784                 'skip_unavailable_fragments': False,
2785                 'keep_fragments': False,
2786                 'overwrites': True,
2787                 '_no_ytdl_file': True,
2788             }
2789         else:
2790             params = self.params
2791         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2792         if not test:
2793             for ph in self._progress_hooks:
2794                 fd.add_progress_hook(ph)
2795             urls = '", "'.join(
2796                 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2797                 for f in info.get('requested_formats', []) or [info])
2798             self.write_debug('Invoking downloader on "%s"' % urls)
2799
2800         # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2801         # But it may contain objects that are not deep-copyable
2802         new_info = self._copy_infodict(info)
2803         if new_info.get('http_headers') is None:
2804             new_info['http_headers'] = self._calc_headers(new_info)
2805         return fd.download(name, new_info, subtitle)
2806
2807     def existing_file(self, filepaths, *, default_overwrite=True):
2808         existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2809         if existing_files and not self.params.get('overwrites', default_overwrite):
2810             return existing_files[0]
2811
2812         for file in existing_files:
2813             self.report_file_delete(file)
2814             os.remove(file)
2815         return None
2816
2817     def process_info(self, info_dict):
2818         """Process a single resolved IE result. (Modified it in-place)"""
2819
2820         assert info_dict.get('_type', 'video') == 'video'
2821         original_infodict = info_dict
2822
2823         if 'format' not in info_dict and 'ext' in info_dict:
2824             info_dict['format'] = info_dict['ext']
2825
2826         if self._match_entry(info_dict) is not None:
2827             info_dict['__write_download_archive'] = 'ignore'
2828             return
2829
2830         self.post_extract(info_dict)
2831         self._num_downloads += 1
2832
2833         # info_dict['_filename'] needs to be set for backward compatibility
2834         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2835         temp_filename = self.prepare_filename(info_dict, 'temp')
2836         files_to_move = {}
2837
2838         # Forced printings
2839         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2840
2841         if self.params.get('simulate'):
2842             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2843             return
2844
2845         if full_filename is None:
2846             return
2847         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2848             return
2849         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2850             return
2851
2852         if self._write_description('video', info_dict,
2853                                    self.prepare_filename(info_dict, 'description')) is None:
2854             return
2855
2856         sub_files = self._write_subtitles(info_dict, temp_filename)
2857         if sub_files is None:
2858             return
2859         files_to_move.update(dict(sub_files))
2860
2861         thumb_files = self._write_thumbnails(
2862             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2863         if thumb_files is None:
2864             return
2865         files_to_move.update(dict(thumb_files))
2866
2867         infofn = self.prepare_filename(info_dict, 'infojson')
2868         _infojson_written = self._write_info_json('video', info_dict, infofn)
2869         if _infojson_written:
2870             info_dict['infojson_filename'] = infofn
2871             # For backward compatibility, even though it was a private field
2872             info_dict['__infojson_filename'] = infofn
2873         elif _infojson_written is None:
2874             return
2875
2876         # Note: Annotations are deprecated
2877         annofn = None
2878         if self.params.get('writeannotations', False):
2879             annofn = self.prepare_filename(info_dict, 'annotation')
2880         if annofn:
2881             if not self._ensure_dir_exists(encodeFilename(annofn)):
2882                 return
2883             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2884                 self.to_screen('[info] Video annotations are already present')
2885             elif not info_dict.get('annotations'):
2886                 self.report_warning('There are no annotations to write.')
2887             else:
2888                 try:
2889                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2890                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2891                         annofile.write(info_dict['annotations'])
2892                 except (KeyError, TypeError):
2893                     self.report_warning('There are no annotations to write.')
2894                 except (OSError, IOError):
2895                     self.report_error('Cannot write annotations file: ' + annofn)
2896                     return
2897
2898         # Write internet shortcut files
2899         def _write_link_file(link_type):
2900             if 'webpage_url' not in info_dict:
2901                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2902                 return False
2903             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2904             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2905                 return False
2906             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2907                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2908                 return True
2909             try:
2910                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2911                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2912                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2913                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2914                     if link_type == 'desktop':
2915                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2916                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2917             except (OSError, IOError):
2918                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2919                 return False
2920             return True
2921
2922         write_links = {
2923             'url': self.params.get('writeurllink'),
2924             'webloc': self.params.get('writewebloclink'),
2925             'desktop': self.params.get('writedesktoplink'),
2926         }
2927         if self.params.get('writelink'):
2928             link_type = ('webloc' if sys.platform == 'darwin'
2929                          else 'desktop' if sys.platform.startswith('linux')
2930                          else 'url')
2931             write_links[link_type] = True
2932
2933         if any(should_write and not _write_link_file(link_type)
2934                for link_type, should_write in write_links.items()):
2935             return
2936
2937         def replace_info_dict(new_info):
2938             nonlocal info_dict
2939             if new_info == info_dict:
2940                 return
2941             info_dict.clear()
2942             info_dict.update(new_info)
2943
2944         try:
2945             new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2946             replace_info_dict(new_info)
2947         except PostProcessingError as err:
2948             self.report_error('Preprocessing: %s' % str(err))
2949             return
2950
2951         if self.params.get('skip_download'):
2952             info_dict['filepath'] = temp_filename
2953             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2954             info_dict['__files_to_move'] = files_to_move
2955             replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2956             info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2957         else:
2958             # Download
2959             info_dict.setdefault('__postprocessors', [])
2960             try:
2961
2962                 def existing_video_file(*filepaths):
2963                     ext = info_dict.get('ext')
2964                     converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2965                     file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2966                                               default_overwrite=False)
2967                     if file:
2968                         info_dict['ext'] = os.path.splitext(file)[1][1:]
2969                     return file
2970
2971                 success = True
2972                 if info_dict.get('requested_formats') is not None:
2973
2974                     def compatible_formats(formats):
2975                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2976                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2977                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2978                         if len(video_formats) > 2 or len(audio_formats) > 2:
2979                             return False
2980
2981                         # Check extension
2982                         exts = set(format.get('ext') for format in formats)
2983                         COMPATIBLE_EXTS = (
2984                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2985                             set(('webm',)),
2986                         )
2987                         for ext_sets in COMPATIBLE_EXTS:
2988                             if ext_sets.issuperset(exts):
2989                                 return True
2990                         # TODO: Check acodec/vcodec
2991                         return False
2992
2993                     requested_formats = info_dict['requested_formats']
2994                     old_ext = info_dict['ext']
2995                     if self.params.get('merge_output_format') is None:
2996                         if not compatible_formats(requested_formats):
2997                             info_dict['ext'] = 'mkv'
2998                             self.report_warning(
2999                                 'Requested formats are incompatible for merge and will be merged into mkv')
3000                         if (info_dict['ext'] == 'webm'
3001                                 and info_dict.get('thumbnails')
3002                                 # check with type instead of pp_key, __name__, or isinstance
3003                                 # since we dont want any custom PPs to trigger this
3004                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3005                             info_dict['ext'] = 'mkv'
3006                             self.report_warning(
3007                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3008                     new_ext = info_dict['ext']
3009
3010                     def correct_ext(filename, ext=new_ext):
3011                         if filename == '-':
3012                             return filename
3013                         filename_real_ext = os.path.splitext(filename)[1][1:]
3014                         filename_wo_ext = (
3015                             os.path.splitext(filename)[0]
3016                             if filename_real_ext in (old_ext, new_ext)
3017                             else filename)
3018                         return '%s.%s' % (filename_wo_ext, ext)
3019
3020                     # Ensure filename always has a correct extension for successful merge
3021                     full_filename = correct_ext(full_filename)
3022                     temp_filename = correct_ext(temp_filename)
3023                     dl_filename = existing_video_file(full_filename, temp_filename)
3024                     info_dict['__real_download'] = False
3025
3026                     downloaded = []
3027                     merger = FFmpegMergerPP(self)
3028
3029                     fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3030                     if dl_filename is not None:
3031                         self.report_file_already_downloaded(dl_filename)
3032                     elif fd:
3033                         for f in requested_formats if fd != FFmpegFD else []:
3034                             f['filepath'] = fname = prepend_extension(
3035                                 correct_ext(temp_filename, info_dict['ext']),
3036                                 'f%s' % f['format_id'], info_dict['ext'])
3037                             downloaded.append(fname)
3038                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3039                         success, real_download = self.dl(temp_filename, info_dict)
3040                         info_dict['__real_download'] = real_download
3041                     else:
3042                         if self.params.get('allow_unplayable_formats'):
3043                             self.report_warning(
3044                                 'You have requested merging of multiple formats '
3045                                 'while also allowing unplayable formats to be downloaded. '
3046                                 'The formats won\'t be merged to prevent data corruption.')
3047                         elif not merger.available:
3048                             self.report_warning(
3049                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
3050                                 'The formats won\'t be merged.')
3051
3052                         if temp_filename == '-':
3053                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3054                                       else 'but the formats are incompatible for simultaneous download' if merger.available
3055                                       else 'but ffmpeg is not installed')
3056                             self.report_warning(
3057                                 f'You have requested downloading multiple formats to stdout {reason}. '
3058                                 'The formats will be streamed one after the other')
3059                             fname = temp_filename
3060                         for f in requested_formats:
3061                             new_info = dict(info_dict)
3062                             del new_info['requested_formats']
3063                             new_info.update(f)
3064                             if temp_filename != '-':
3065                                 fname = prepend_extension(
3066                                     correct_ext(temp_filename, new_info['ext']),
3067                                     'f%s' % f['format_id'], new_info['ext'])
3068                                 if not self._ensure_dir_exists(fname):
3069                                     return
3070                                 f['filepath'] = fname
3071                                 downloaded.append(fname)
3072                             partial_success, real_download = self.dl(fname, new_info)
3073                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
3074                             success = success and partial_success
3075
3076                     if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3077                         info_dict['__postprocessors'].append(merger)
3078                         info_dict['__files_to_merge'] = downloaded
3079                         # Even if there were no downloads, it is being merged only now
3080                         info_dict['__real_download'] = True
3081                     else:
3082                         for file in downloaded:
3083                             files_to_move[file] = None
3084                 else:
3085                     # Just a single file
3086                     dl_filename = existing_video_file(full_filename, temp_filename)
3087                     if dl_filename is None or dl_filename == temp_filename:
3088                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3089                         # So we should try to resume the download
3090                         success, real_download = self.dl(temp_filename, info_dict)
3091                         info_dict['__real_download'] = real_download
3092                     else:
3093                         self.report_file_already_downloaded(dl_filename)
3094
3095                 dl_filename = dl_filename or temp_filename
3096                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3097
3098             except network_exceptions as err:
3099                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3100                 return
3101             except (OSError, IOError) as err:
3102                 raise UnavailableVideoError(err)
3103             except (ContentTooShortError, ) as err:
3104                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3105                 return
3106
3107             if success and full_filename != '-':
3108
3109                 def fixup():
3110                     do_fixup = True
3111                     fixup_policy = self.params.get('fixup')
3112                     vid = info_dict['id']
3113
3114                     if fixup_policy in ('ignore', 'never'):
3115                         return
3116                     elif fixup_policy == 'warn':
3117                         do_fixup = False
3118                     elif fixup_policy != 'force':
3119                         assert fixup_policy in ('detect_or_warn', None)
3120                         if not info_dict.get('__real_download'):
3121                             do_fixup = False
3122
3123                     def ffmpeg_fixup(cndn, msg, cls):
3124                         if not cndn:
3125                             return
3126                         if not do_fixup:
3127                             self.report_warning(f'{vid}: {msg}')
3128                             return
3129                         pp = cls(self)
3130                         if pp.available:
3131                             info_dict['__postprocessors'].append(pp)
3132                         else:
3133                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3134
3135                     stretched_ratio = info_dict.get('stretched_ratio')
3136                     ffmpeg_fixup(
3137                         stretched_ratio not in (1, None),
3138                         f'Non-uniform pixel ratio {stretched_ratio}',
3139                         FFmpegFixupStretchedPP)
3140
3141                     ffmpeg_fixup(
3142                         (info_dict.get('requested_formats') is None
3143                          and info_dict.get('container') == 'm4a_dash'
3144                          and info_dict.get('ext') == 'm4a'),
3145                         'writing DASH m4a. Only some players support this container',
3146                         FFmpegFixupM4aPP)
3147
3148                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3149                     downloader = downloader.__name__ if downloader else None
3150
3151                     if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
3152                         ffmpeg_fixup(downloader == 'HlsFD',
3153                                      'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3154                                      FFmpegFixupM3u8PP)
3155                         ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3156                                      'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3157
3158                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3159                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3160
3161                 fixup()
3162                 try:
3163                     replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3164                 except PostProcessingError as err:
3165                     self.report_error('Postprocessing: %s' % str(err))
3166                     return
3167                 try:
3168                     for ph in self._post_hooks:
3169                         ph(info_dict['filepath'])
3170                 except Exception as err:
3171                     self.report_error('post hooks: %s' % str(err))
3172                     return
3173                 info_dict['__write_download_archive'] = True
3174
3175         if self.params.get('force_write_download_archive'):
3176             info_dict['__write_download_archive'] = True
3177
3178         # Make sure the info_dict was modified in-place
3179         assert info_dict is original_infodict
3180
3181         max_downloads = self.params.get('max_downloads')
3182         if max_downloads is not None and self._num_downloads >= int(max_downloads):
3183             raise MaxDownloadsReached()
3184
3185     def __download_wrapper(self, func):
3186         @functools.wraps(func)
3187         def wrapper(*args, **kwargs):
3188             try:
3189                 res = func(*args, **kwargs)
3190             except UnavailableVideoError as e:
3191                 self.report_error(e)
3192             except MaxDownloadsReached as e:
3193                 self.to_screen(f'[info] {e}')
3194                 raise
3195             except DownloadCancelled as e:
3196                 self.to_screen(f'[info] {e}')
3197                 if not self.params.get('break_per_url'):
3198                     raise
3199             else:
3200                 if self.params.get('dump_single_json', False):
3201                     self.post_extract(res)
3202                     self.to_stdout(json.dumps(self.sanitize_info(res)))
3203         return wrapper
3204
3205     def download(self, url_list):
3206         """Download a given list of URLs."""
3207         url_list = variadic(url_list)  # Passing a single URL is a common mistake
3208         outtmpl = self.outtmpl_dict['default']
3209         if (len(url_list) > 1
3210                 and outtmpl != '-'
3211                 and '%' not in outtmpl
3212                 and self.params.get('max_downloads') != 1):
3213             raise SameFileError(outtmpl)
3214
3215         for url in url_list:
3216             self.__download_wrapper(self.extract_info)(
3217                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3218
3219         return self._download_retcode
3220
3221     def download_with_info_file(self, info_filename):
3222         with contextlib.closing(fileinput.FileInput(
3223                 [info_filename], mode='r',
3224                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3225             # FileInput doesn't have a read method, we can't call json.load
3226             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3227         try:
3228             self.__download_wrapper(self.process_ie_result)(info, download=True)
3229         except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3230             if not isinstance(e, EntryNotInPlaylist):
3231                 self.to_stderr('\r')
3232             webpage_url = info.get('webpage_url')
3233             if webpage_url is not None:
3234                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3235                 return self.download([webpage_url])
3236             else:
3237                 raise
3238         return self._download_retcode
3239
3240     @staticmethod
3241     def sanitize_info(info_dict, remove_private_keys=False):
3242         ''' Sanitize the infodict for converting to json '''
3243         if info_dict is None:
3244             return info_dict
3245         info_dict.setdefault('epoch', int(time.time()))
3246         info_dict.setdefault('_type', 'video')
3247         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3248         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3249         if remove_private_keys:
3250             remove_keys |= {
3251                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3252                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3253             }
3254             reject = lambda k, v: k not in keep_keys and (
3255                 k.startswith('_') or k in remove_keys or v is None)
3256         else:
3257             reject = lambda k, v: k in remove_keys
3258
3259         def filter_fn(obj):
3260             if isinstance(obj, dict):
3261                 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3262             elif isinstance(obj, (list, tuple, set, LazyList)):
3263                 return list(map(filter_fn, obj))
3264             elif obj is None or isinstance(obj, (str, int, float, bool)):
3265                 return obj
3266             else:
3267                 return repr(obj)
3268
3269         return filter_fn(info_dict)
3270
3271     @staticmethod
3272     def filter_requested_info(info_dict, actually_filter=True):
3273         ''' Alias of sanitize_info for backward compatibility '''
3274         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3275
3276     @staticmethod
3277     def post_extract(info_dict):
3278         def actual_post_extract(info_dict):
3279             if info_dict.get('_type') in ('playlist', 'multi_video'):
3280                 for video_dict in info_dict.get('entries', {}):
3281                     actual_post_extract(video_dict or {})
3282                 return
3283
3284             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3285             extra = post_extractor().items()
3286             info_dict.update(extra)
3287             info_dict.pop('__post_extractor', None)
3288
3289             original_infodict = info_dict.get('__original_infodict') or {}
3290             original_infodict.update(extra)
3291             original_infodict.pop('__post_extractor', None)
3292
3293         actual_post_extract(info_dict or {})
3294
3295     def run_pp(self, pp, infodict):
3296         files_to_delete = []
3297         if '__files_to_move' not in infodict:
3298             infodict['__files_to_move'] = {}
3299         try:
3300             files_to_delete, infodict = pp.run(infodict)
3301         except PostProcessingError as e:
3302             # Must be True and not 'only_download'
3303             if self.params.get('ignoreerrors') is True:
3304                 self.report_error(e)
3305                 return infodict
3306             raise
3307
3308         if not files_to_delete:
3309             return infodict
3310         if self.params.get('keepvideo', False):
3311             for f in files_to_delete:
3312                 infodict['__files_to_move'].setdefault(f, '')
3313         else:
3314             for old_filename in set(files_to_delete):
3315                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3316                 try:
3317                     os.remove(encodeFilename(old_filename))
3318                 except (IOError, OSError):
3319                     self.report_warning('Unable to remove downloaded original file')
3320                 if old_filename in infodict['__files_to_move']:
3321                     del infodict['__files_to_move'][old_filename]
3322         return infodict
3323
3324     def run_all_pps(self, key, info, *, additional_pps=None):
3325         self._forceprint(key, info)
3326         for pp in (additional_pps or []) + self._pps[key]:
3327             info = self.run_pp(pp, info)
3328         return info
3329
3330     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3331         info = dict(ie_info)
3332         info['__files_to_move'] = files_to_move or {}
3333         info = self.run_all_pps(key, info)
3334         return info, info.pop('__files_to_move', None)
3335
3336     def post_process(self, filename, info, files_to_move=None):
3337         """Run all the postprocessors on the given file."""
3338         info['filepath'] = filename
3339         info['__files_to_move'] = files_to_move or {}
3340         info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3341         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3342         del info['__files_to_move']
3343         return self.run_all_pps('after_move', info)
3344
3345     def _make_archive_id(self, info_dict):
3346         video_id = info_dict.get('id')
3347         if not video_id:
3348             return
3349         # Future-proof against any change in case
3350         # and backwards compatibility with prior versions
3351         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3352         if extractor is None:
3353             url = str_or_none(info_dict.get('url'))
3354             if not url:
3355                 return
3356             # Try to find matching extractor for the URL and take its ie_key
3357             for ie_key, ie in self._ies.items():
3358                 if ie.suitable(url):
3359                     extractor = ie_key
3360                     break
3361             else:
3362                 return
3363         return '%s %s' % (extractor.lower(), video_id)
3364
3365     def in_download_archive(self, info_dict):
3366         fn = self.params.get('download_archive')
3367         if fn is None:
3368             return False
3369
3370         vid_id = self._make_archive_id(info_dict)
3371         if not vid_id:
3372             return False  # Incomplete video information
3373
3374         return vid_id in self.archive
3375
3376     def record_download_archive(self, info_dict):
3377         fn = self.params.get('download_archive')
3378         if fn is None:
3379             return
3380         vid_id = self._make_archive_id(info_dict)
3381         assert vid_id
3382         self.write_debug(f'Adding to archive: {vid_id}')
3383         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3384             archive_file.write(vid_id + '\n')
3385         self.archive.add(vid_id)
3386
3387     @staticmethod
3388     def format_resolution(format, default='unknown'):
3389         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3390             return 'audio only'
3391         if format.get('resolution') is not None:
3392             return format['resolution']
3393         if format.get('width') and format.get('height'):
3394             return '%dx%d' % (format['width'], format['height'])
3395         elif format.get('height'):
3396             return '%sp' % format['height']
3397         elif format.get('width'):
3398             return '%dx?' % format['width']
3399         return default
3400
3401     def _list_format_headers(self, *headers):
3402         if self.params.get('listformats_table', True) is not False:
3403             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3404         return headers
3405
3406     def _format_note(self, fdict):
3407         res = ''
3408         if fdict.get('ext') in ['f4f', 'f4m']:
3409             res += '(unsupported)'
3410         if fdict.get('language'):
3411             if res:
3412                 res += ' '
3413             res += '[%s]' % fdict['language']
3414         if fdict.get('format_note') is not None:
3415             if res:
3416                 res += ' '
3417             res += fdict['format_note']
3418         if fdict.get('tbr') is not None:
3419             if res:
3420                 res += ', '
3421             res += '%4dk' % fdict['tbr']
3422         if fdict.get('container') is not None:
3423             if res:
3424                 res += ', '
3425             res += '%s container' % fdict['container']
3426         if (fdict.get('vcodec') is not None
3427                 and fdict.get('vcodec') != 'none'):
3428             if res:
3429                 res += ', '
3430             res += fdict['vcodec']
3431             if fdict.get('vbr') is not None:
3432                 res += '@'
3433         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3434             res += 'video@'
3435         if fdict.get('vbr') is not None:
3436             res += '%4dk' % fdict['vbr']
3437         if fdict.get('fps') is not None:
3438             if res:
3439                 res += ', '
3440             res += '%sfps' % fdict['fps']
3441         if fdict.get('acodec') is not None:
3442             if res:
3443                 res += ', '
3444             if fdict['acodec'] == 'none':
3445                 res += 'video only'
3446             else:
3447                 res += '%-5s' % fdict['acodec']
3448         elif fdict.get('abr') is not None:
3449             if res:
3450                 res += ', '
3451             res += 'audio'
3452         if fdict.get('abr') is not None:
3453             res += '@%3dk' % fdict['abr']
3454         if fdict.get('asr') is not None:
3455             res += ' (%5dHz)' % fdict['asr']
3456         if fdict.get('filesize') is not None:
3457             if res:
3458                 res += ', '
3459             res += format_bytes(fdict['filesize'])
3460         elif fdict.get('filesize_approx') is not None:
3461             if res:
3462                 res += ', '
3463             res += '~' + format_bytes(fdict['filesize_approx'])
3464         return res
3465
3466     def render_formats_table(self, info_dict):
3467         if not info_dict.get('formats') and not info_dict.get('url'):
3468             return None
3469
3470         formats = info_dict.get('formats', [info_dict])
3471         if not self.params.get('listformats_table', True) is not False:
3472             table = [
3473                 [
3474                     format_field(f, 'format_id'),
3475                     format_field(f, 'ext'),
3476                     self.format_resolution(f),
3477                     self._format_note(f)
3478                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3479             return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3480
3481         delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3482         table = [
3483             [
3484                 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3485                 format_field(f, 'ext'),
3486                 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3487                 format_field(f, 'fps', '\t%d'),
3488                 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3489                 delim,
3490                 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3491                 format_field(f, 'tbr', '\t%dk'),
3492                 shorten_protocol_name(f.get('protocol', '')),
3493                 delim,
3494                 format_field(f, 'vcodec', default='unknown').replace(
3495                     'none', 'images' if f.get('acodec') == 'none'
3496                             else self._format_screen('audio only', self.Styles.SUPPRESS)),
3497                 format_field(f, 'vbr', '\t%dk'),
3498                 format_field(f, 'acodec', default='unknown').replace(
3499                     'none', '' if f.get('vcodec') == 'none'
3500                             else self._format_screen('video only', self.Styles.SUPPRESS)),
3501                 format_field(f, 'abr', '\t%dk'),
3502                 format_field(f, 'asr', '\t%dHz'),
3503                 join_nonempty(
3504                     self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3505                     format_field(f, 'language', '[%s]'),
3506                     join_nonempty(format_field(f, 'format_note'),
3507                                   format_field(f, 'container', ignore=(None, f.get('ext'))),
3508                                   delim=', '),
3509                     delim=' '),
3510             ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3511         header_line = self._list_format_headers(
3512             'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3513             delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3514
3515         return render_table(
3516             header_line, table, hide_empty=True,
3517             delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3518
3519     def render_thumbnails_table(self, info_dict):
3520         thumbnails = list(info_dict.get('thumbnails') or [])
3521         if not thumbnails:
3522             return None
3523         return render_table(
3524             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3525             [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3526
3527     def render_subtitles_table(self, video_id, subtitles):
3528         def _row(lang, formats):
3529             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3530             if len(set(names)) == 1:
3531                 names = [] if names[0] == 'unknown' else names[:1]
3532             return [lang, ', '.join(names), ', '.join(exts)]
3533
3534         if not subtitles:
3535             return None
3536         return render_table(
3537             self._list_format_headers('Language', 'Name', 'Formats'),
3538             [_row(lang, formats) for lang, formats in subtitles.items()],
3539             hide_empty=True)
3540
3541     def __list_table(self, video_id, name, func, *args):
3542         table = func(*args)
3543         if not table:
3544             self.to_screen(f'{video_id} has no {name}')
3545             return
3546         self.to_screen(f'[info] Available {name} for {video_id}:')
3547         self.to_stdout(table)
3548
3549     def list_formats(self, info_dict):
3550         self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3551
3552     def list_thumbnails(self, info_dict):
3553         self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3554
3555     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3556         self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3557
3558     def urlopen(self, req):
3559         """ Start an HTTP download """
3560         if isinstance(req, compat_basestring):
3561             req = sanitized_Request(req)
3562         return self._opener.open(req, timeout=self._socket_timeout)
3563
3564     def print_debug_header(self):
3565         if not self.params.get('verbose'):
3566             return
3567
3568         def get_encoding(stream):
3569             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3570             if not supports_terminal_sequences(stream):
3571                 from .compat import WINDOWS_VT_MODE
3572                 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3573             return ret
3574
3575         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3576             locale.getpreferredencoding(),
3577             sys.getfilesystemencoding(),
3578             get_encoding(self._screen_file), get_encoding(self._err_file),
3579             self.get_encoding())
3580
3581         logger = self.params.get('logger')
3582         if logger:
3583             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3584             write_debug(encoding_str)
3585         else:
3586             write_string(f'[debug] {encoding_str}\n', encoding=None)
3587             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3588
3589         source = detect_variant()
3590         write_debug(join_nonempty(
3591             'yt-dlp version', __version__,
3592             f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3593             '' if source == 'unknown' else f'({source})',
3594             delim=' '))
3595         if not _LAZY_LOADER:
3596             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3597                 write_debug('Lazy loading extractors is forcibly disabled')
3598             else:
3599                 write_debug('Lazy loading extractors is disabled')
3600         if plugin_extractors or plugin_postprocessors:
3601             write_debug('Plugins: %s' % [
3602                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3603                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3604         if self.params.get('compat_opts'):
3605             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3606
3607         if source == 'source':
3608             try:
3609                 sp = Popen(
3610                     ['git', 'rev-parse', '--short', 'HEAD'],
3611                     stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3612                     cwd=os.path.dirname(os.path.abspath(__file__)))
3613                 out, err = sp.communicate_or_kill()
3614                 out = out.decode().strip()
3615                 if re.match('[0-9a-f]+', out):
3616                     write_debug('Git HEAD: %s' % out)
3617             except Exception:
3618                 try:
3619                     sys.exc_clear()
3620                 except Exception:
3621                     pass
3622
3623         def python_implementation():
3624             impl_name = platform.python_implementation()
3625             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3626                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3627             return impl_name
3628
3629         write_debug('Python version %s (%s %s) - %s' % (
3630             platform.python_version(),
3631             python_implementation(),
3632             platform.architecture()[0],
3633             platform_name()))
3634
3635         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3636         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3637         if ffmpeg_features:
3638             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3639
3640         exe_versions['rtmpdump'] = rtmpdump_version()
3641         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3642         exe_str = ', '.join(
3643             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3644         ) or 'none'
3645         write_debug('exe versions: %s' % exe_str)
3646
3647         from .downloader.websocket import has_websockets
3648         from .postprocessor.embedthumbnail import has_mutagen
3649         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3650
3651         lib_str = join_nonempty(
3652             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3653             SECRETSTORAGE_AVAILABLE and 'secretstorage',
3654             has_mutagen and 'mutagen',
3655             SQLITE_AVAILABLE and 'sqlite',
3656             has_websockets and 'websockets',
3657             delim=', ') or 'none'
3658         write_debug('Optional libraries: %s' % lib_str)
3659
3660         proxy_map = {}
3661         for handler in self._opener.handlers:
3662             if hasattr(handler, 'proxies'):
3663                 proxy_map.update(handler.proxies)
3664         write_debug(f'Proxy map: {proxy_map}')
3665
3666         # Not implemented
3667         if False and self.params.get('call_home'):
3668             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3669             write_debug('Public IP address: %s' % ipaddr)
3670             latest_version = self.urlopen(
3671                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3672             if version_tuple(latest_version) > version_tuple(__version__):
3673                 self.report_warning(
3674                     'You are using an outdated version (newest version: %s)! '
3675                     'See https://yt-dl.org/update if you need help updating.' %
3676                     latest_version)
3677
3678     def _setup_opener(self):
3679         timeout_val = self.params.get('socket_timeout')
3680         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3681
3682         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3683         opts_cookiefile = self.params.get('cookiefile')
3684         opts_proxy = self.params.get('proxy')
3685
3686         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3687
3688         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3689         if opts_proxy is not None:
3690             if opts_proxy == '':
3691                 proxies = {}
3692             else:
3693                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3694         else:
3695             proxies = compat_urllib_request.getproxies()
3696             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3697             if 'http' in proxies and 'https' not in proxies:
3698                 proxies['https'] = proxies['http']
3699         proxy_handler = PerRequestProxyHandler(proxies)
3700
3701         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3702         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3703         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3704         redirect_handler = YoutubeDLRedirectHandler()
3705         data_handler = compat_urllib_request_DataHandler()
3706
3707         # When passing our own FileHandler instance, build_opener won't add the
3708         # default FileHandler and allows us to disable the file protocol, which
3709         # can be used for malicious purposes (see
3710         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3711         file_handler = compat_urllib_request.FileHandler()
3712
3713         def file_open(*args, **kwargs):
3714             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3715         file_handler.file_open = file_open
3716
3717         opener = compat_urllib_request.build_opener(
3718             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3719
3720         # Delete the default user-agent header, which would otherwise apply in
3721         # cases where our custom HTTP handler doesn't come into play
3722         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3723         opener.addheaders = []
3724         self._opener = opener
3725
3726     def encode(self, s):
3727         if isinstance(s, bytes):
3728             return s  # Already encoded
3729
3730         try:
3731             return s.encode(self.get_encoding())
3732         except UnicodeEncodeError as err:
3733             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3734             raise
3735
3736     def get_encoding(self):
3737         encoding = self.params.get('encoding')
3738         if encoding is None:
3739             encoding = preferredencoding()
3740         return encoding
3741
3742     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3743         ''' Write infojson and returns True = written, False = skip, None = error '''
3744         if overwrite is None:
3745             overwrite = self.params.get('overwrites', True)
3746         if not self.params.get('writeinfojson'):
3747             return False
3748         elif not infofn:
3749             self.write_debug(f'Skipping writing {label} infojson')
3750             return False
3751         elif not self._ensure_dir_exists(infofn):
3752             return None
3753         elif not overwrite and os.path.exists(infofn):
3754             self.to_screen(f'[info] {label.title()} metadata is already present')
3755         else:
3756             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3757             try:
3758                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3759             except (OSError, IOError):
3760                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3761                 return None
3762         return True
3763
3764     def _write_description(self, label, ie_result, descfn):
3765         ''' Write description and returns True = written, False = skip, None = error '''
3766         if not self.params.get('writedescription'):
3767             return False
3768         elif not descfn:
3769             self.write_debug(f'Skipping writing {label} description')
3770             return False
3771         elif not self._ensure_dir_exists(descfn):
3772             return None
3773         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3774             self.to_screen(f'[info] {label.title()} description is already present')
3775         elif ie_result.get('description') is None:
3776             self.report_warning(f'There\'s no {label} description to write')
3777             return False
3778         else:
3779             try:
3780                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3781                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3782                     descfile.write(ie_result['description'])
3783             except (OSError, IOError):
3784                 self.report_error(f'Cannot write {label} description file {descfn}')
3785                 return None
3786         return True
3787
3788     def _write_subtitles(self, info_dict, filename):
3789         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3790         ret = []
3791         subtitles = info_dict.get('requested_subtitles')
3792         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3793             # subtitles download errors are already managed as troubles in relevant IE
3794             # that way it will silently go on when used with unsupporting IE
3795             return ret
3796
3797         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3798         if not sub_filename_base:
3799             self.to_screen('[info] Skipping writing video subtitles')
3800             return ret
3801         for sub_lang, sub_info in subtitles.items():
3802             sub_format = sub_info['ext']
3803             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3804             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3805             existing_sub = self.existing_file((sub_filename_final, sub_filename))
3806             if existing_sub:
3807                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3808                 sub_info['filepath'] = existing_sub
3809                 ret.append((existing_sub, sub_filename_final))
3810                 continue
3811
3812             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3813             if sub_info.get('data') is not None:
3814                 try:
3815                     # Use newline='' to prevent conversion of newline characters
3816                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3817                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3818                         subfile.write(sub_info['data'])
3819                     sub_info['filepath'] = sub_filename
3820                     ret.append((sub_filename, sub_filename_final))
3821                     continue
3822                 except (OSError, IOError):
3823                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3824                     return None
3825
3826             try:
3827                 sub_copy = sub_info.copy()
3828                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3829                 self.dl(sub_filename, sub_copy, subtitle=True)
3830                 sub_info['filepath'] = sub_filename
3831                 ret.append((sub_filename, sub_filename_final))
3832             except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3833                 if self.params.get('ignoreerrors') is not True:  # False or 'only_download'
3834                     raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3835                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3836         return ret
3837
3838     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3839         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3840         write_all = self.params.get('write_all_thumbnails', False)
3841         thumbnails, ret = [], []
3842         if write_all or self.params.get('writethumbnail', False):
3843             thumbnails = info_dict.get('thumbnails') or []
3844         multiple = write_all and len(thumbnails) > 1
3845
3846         if thumb_filename_base is None:
3847             thumb_filename_base = filename
3848         if thumbnails and not thumb_filename_base:
3849             self.write_debug(f'Skipping writing {label} thumbnail')
3850             return ret
3851
3852         for idx, t in list(enumerate(thumbnails))[::-1]:
3853             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3854             thumb_display_id = f'{label} thumbnail {t["id"]}'
3855             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3856             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3857
3858             existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3859             if existing_thumb:
3860                 self.to_screen('[info] %s is already present' % (
3861                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3862                 t['filepath'] = existing_thumb
3863                 ret.append((existing_thumb, thumb_filename_final))
3864             else:
3865                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3866                 try:
3867                     uf = self.urlopen(t['url'])
3868                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3869                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3870                         shutil.copyfileobj(uf, thumbf)
3871                     ret.append((thumb_filename, thumb_filename_final))
3872                     t['filepath'] = thumb_filename
3873                 except network_exceptions as err:
3874                     thumbnails.pop(idx)
3875                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3876             if ret and not write_all:
3877                 break
3878         return ret