yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     join_nonempty,
  78     LazyList,
  79     LINK_TEMPLATES,
  80     locked_file,
  81     make_dir,
  82     make_HTTPS_handler,
  83     MaxDownloadsReached,
  84     network_exceptions,
  85     number_of_digits,
  86     orderedSet,
  87     OUTTMPL_TYPES,
  88     PagedList,
  89     parse_filesize,
  90     PerRequestProxyHandler,
  91     platform_name,
  92     Popen,
  93     PostProcessingError,
  94     preferredencoding,
  95     prepend_extension,
  96     register_socks_protocols,
  97     RejectedVideoReached,
  98     render_table,
  99     replace_extension,
 100     SameFileError,
 101     sanitize_filename,
 102     sanitize_path,
 103     sanitize_url,
 104     sanitized_Request,
 105     std_headers,
 106     STR_FORMAT_RE_TMPL,
 107     STR_FORMAT_TYPES,
 108     str_or_none,
 109     strftime_or_none,
 110     subtitles_filename,
 111     supports_terminal_sequences,
 112     ThrottledDownload,
 113     to_high_limit_path,
 114     traverse_obj,
 115     try_get,
 116     UnavailableVideoError,
 117     url_basename,
 118     variadic,
 119     version_tuple,
 120     write_json_file,
 121     write_string,
 122     YoutubeDLCookieProcessor,
 123     YoutubeDLHandler,
 124     YoutubeDLRedirectHandler,
 125 )
 126 from .cache import Cache
 127 from .minicurses import format_text
 128 from .extractor import (
 129     gen_extractor_classes,
 130     get_info_extractor,
 131     _LAZY_LOADER,
 132     _PLUGIN_CLASSES as plugin_extractors
 133 )
 134 from .extractor.openload import PhantomJSwrapper
 135 from .downloader import (
 136     FFmpegFD,
 137     get_suitable_downloader,
 138     shorten_protocol_name
 139 )
 140 from .downloader.rtmp import rtmpdump_version
 141 from .postprocessor import (
 142     get_postprocessor,
 143     EmbedThumbnailPP,
 144     FFmpegFixupDurationPP,
 145     FFmpegFixupM3u8PP,
 146     FFmpegFixupM4aPP,
 147     FFmpegFixupStretchedPP,
 148     FFmpegFixupTimestampPP,
 149     FFmpegMergerPP,
 150     FFmpegPostProcessor,
 151     MoveFilesAfterDownloadPP,
 152     _PLUGIN_CLASSES as plugin_postprocessors
 153 )
 154 from .update import detect_variant
 155 from .version import __version__
 156
 157 if compat_os_name == 'nt':
 158     import ctypes
 159
 160
 161 class YoutubeDL(object):
 162     """YoutubeDL class.
 163
 164     YoutubeDL objects are the ones responsible of downloading the
 165     actual video file and writing it to disk if the user has requested
 166     it, among some other tasks. In most cases there should be one per
 167     program. As, given a video URL, the downloader doesn't know how to
 168     extract all the needed information, task that InfoExtractors do, it
 169     has to pass the URL to one of them.
 170
 171     For this, YoutubeDL objects have a method that allows
 172     InfoExtractors to be registered in a given order. When it is passed
 173     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 174     finds that reports being able to handle it. The InfoExtractor extracts
 175     all the information about the video or videos the URL refers to, and
 176     YoutubeDL process the extracted information, possibly using a File
 177     Downloader to download the video.
 178
 179     YoutubeDL objects accept a lot of parameters. In order not to saturate
 180     the object constructor with arguments, it receives a dictionary of
 181     options instead. These options are available through the params
 182     attribute for the InfoExtractors to use. The YoutubeDL also
 183     registers itself as the downloader in charge for the InfoExtractors
 184     that are added to it, so this is a "mutual registration".
 185
 186     Available options:
 187
 188     username:          Username for authentication purposes.
 189     password:          Password for authentication purposes.
 190     videopassword:     Password for accessing a video.
 191     ap_mso:            Adobe Pass multiple-system operator identifier.
 192     ap_username:       Multiple-system operator account username.
 193     ap_password:       Multiple-system operator account password.
 194     usenetrc:          Use netrc for authentication instead.
 195     verbose:           Print additional info to stdout.
 196     quiet:             Do not print messages to stdout.
 197     no_warnings:       Do not print out anything for warnings.
 198     forceprint:        A list of templates to force print
 199     forceurl:          Force printing final URL. (Deprecated)
 200     forcetitle:        Force printing title. (Deprecated)
 201     forceid:           Force printing ID. (Deprecated)
 202     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 203     forcedescription:  Force printing description. (Deprecated)
 204     forcefilename:     Force printing final filename. (Deprecated)
 205     forceduration:     Force printing duration. (Deprecated)
 206     forcejson:         Force printing info_dict as JSON.
 207     dump_single_json:  Force printing the info_dict of the whole playlist
 208                        (or video) as a single JSON line.
 209     force_write_download_archive: Force writing download archive regardless
 210                        of 'skip_download' or 'simulate'.
 211     simulate:          Do not download the video files. If unset (or None),
 212                        simulate only if listsubtitles, listformats or list_thumbnails is used
 213     format:            Video format code. see "FORMAT SELECTION" for more details.
 214                        You can also pass a function. The function takes 'ctx' as
 215                        argument and returns the formats to download.
 216                        See "build_format_selector" for an implementation
 217     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 218     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 219                        extracting metadata even if the video is not actually
 220                        available for download (experimental)
 221     format_sort:       A list of fields by which to sort the video formats.
 222                        See "Sorting Formats" for more details.
 223     format_sort_force: Force the given format_sort. see "Sorting Formats"
 224                        for more details.
 225     allow_multiple_video_streams:   Allow multiple video streams to be merged
 226                        into a single file
 227     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 228                        into a single file
 229     check_formats      Whether to test if the formats are downloadable.
 230                        Can be True (check all), False (check none),
 231                        'selected' (check selected formats),
 232                        or None (check only if requested by extractor)
 233     paths:             Dictionary of output paths. The allowed keys are 'home'
 234                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 235     outtmpl:           Dictionary of templates for output names. Allowed keys
 236                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 237                        For compatibility with youtube-dl, a single string can also be used
 238     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 239     restrictfilenames: Do not allow "&" and spaces in file names
 240     trim_file_name:    Limit length of filename (extension excluded)
 241     windowsfilenames:  Force the filenames to be windows compatible
 242     ignoreerrors:      Do not stop on download/postprocessing errors.
 243                        Can be 'only_download' to ignore only download errors.
 244                        Default is 'only_download' for CLI, but False for API
 245     skip_playlist_after_errors: Number of allowed failures until the rest of
 246                        the playlist is skipped
 247     force_generic_extractor: Force downloader to use the generic extractor
 248     overwrites:        Overwrite all video and metadata files if True,
 249                        overwrite only non-video files if None
 250                        and don't overwrite any file if False
 251                        For compatibility with youtube-dl,
 252                        "nooverwrites" may also be used instead
 253     playliststart:     Playlist item to start at.
 254     playlistend:       Playlist item to end at.
 255     playlist_items:    Specific indices of playlist to download.
 256     playlistreverse:   Download playlist items in reverse order.
 257     playlistrandom:    Download playlist items in random order.
 258     matchtitle:        Download only matching titles.
 259     rejecttitle:       Reject downloads for matching titles.
 260     logger:            Log messages to a logging.Logger instance.
 261     logtostderr:       Log messages to stderr instead of stdout.
 262     consoletitle:       Display progress in console window's titlebar.
 263     writedescription:  Write the video description to a .description file
 264     writeinfojson:     Write the video description to a .info.json file
 265     clean_infojson:    Remove private fields from the infojson
 266     getcomments:       Extract video comments. This will not be written to disk
 267                        unless writeinfojson is also given
 268     writeannotations:  Write the video annotations to a .annotations.xml file
 269     writethumbnail:    Write the thumbnail image to a file
 270     allow_playlist_files: Whether to write playlists' description, infojson etc
 271                        also to disk when using the 'write*' options
 272     write_all_thumbnails:  Write all thumbnail formats to files
 273     writelink:         Write an internet shortcut file, depending on the
 274                        current platform (.url/.webloc/.desktop)
 275     writeurllink:      Write a Windows internet shortcut file (.url)
 276     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 277     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 278     writesubtitles:    Write the video subtitles to a file
 279     writeautomaticsub: Write the automatically generated subtitles to a file
 280     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 281                        Downloads all the subtitles of the video
 282                        (requires writesubtitles or writeautomaticsub)
 283     listsubtitles:     Lists all available subtitles for the video
 284     subtitlesformat:   The format code for subtitles
 285     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 286                        The list may contain "all" to refer to all the available
 287                        subtitles. The language can be prefixed with a "-" to
 288                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 289     keepvideo:         Keep the video file after post-processing
 290     daterange:         A DateRange object, download only if the upload_date is in the range.
 291     skip_download:     Skip the actual download of the video file
 292     cachedir:          Location of the cache files in the filesystem.
 293                        False to disable filesystem cache.
 294     noplaylist:        Download single video instead of a playlist if in doubt.
 295     age_limit:         An integer representing the user's age in years.
 296                        Unsuitable videos for the given age are skipped.
 297     min_views:         An integer representing the minimum view count the video
 298                        must have in order to not be skipped.
 299                        Videos without view count information are always
 300                        downloaded. None for no limit.
 301     max_views:         An integer representing the maximum view count.
 302                        Videos that are more popular than that are not
 303                        downloaded.
 304                        Videos without view count information are always
 305                        downloaded. None for no limit.
 306     download_archive:  File name of a file where all downloads are recorded.
 307                        Videos already present in the file are not downloaded
 308                        again.
 309     break_on_existing: Stop the download process after attempting to download a
 310                        file that is in the archive.
 311     break_on_reject:   Stop the download process when encountering a video that
 312                        has been filtered out.
 313     cookiefile:        File name where cookies should be read from and dumped to
 314     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 315                        name/path from where cookies are loaded.
 316                        Eg: ('chrome', ) or ('vivaldi', 'default')
 317     nocheckcertificate:Do not verify SSL certificates
 318     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 319                        At the moment, this is only supported by YouTube.
 320     proxy:             URL of the proxy server to use
 321     geo_verification_proxy:  URL of the proxy to use for IP address verification
 322                        on geo-restricted sites.
 323     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 324     bidi_workaround:   Work around buggy terminals without bidirectional text
 325                        support, using fridibi
 326     debug_printtraffic:Print out sent and received HTTP traffic
 327     include_ads:       Download ads as well
 328     default_search:    Prepend this string if an input url is not valid.
 329                        'auto' for elaborate guessing
 330     encoding:          Use this encoding instead of the system-specified.
 331     extract_flat:      Do not resolve URLs, return the immediate result.
 332                        Pass in 'in_playlist' to only show this behavior for
 333                        playlist items.
 334     postprocessors:    A list of dictionaries, each with an entry
 335                        * key:  The name of the postprocessor. See
 336                                yt_dlp/postprocessor/__init__.py for a list.
 337                        * when: When to run the postprocessor. Can be one of
 338                                pre_process|before_dl|post_process|after_move.
 339                                Assumed to be 'post_process' if not given
 340     post_hooks:        Deprecated - Register a custom postprocessor instead
 341                        A list of functions that get called as the final step
 342                        for each video file, after all postprocessors have been
 343                        called. The filename will be passed as the only argument.
 344     progress_hooks:    A list of functions that get called on download
 345                        progress, with a dictionary with the entries
 346                        * status: One of "downloading", "error", or "finished".
 347                                  Check this first and ignore unknown values.
 348                        * info_dict: The extracted info_dict
 349
 350                        If status is one of "downloading", or "finished", the
 351                        following properties may also be present:
 352                        * filename: The final filename (always present)
 353                        * tmpfilename: The filename we're currently writing to
 354                        * downloaded_bytes: Bytes on disk
 355                        * total_bytes: Size of the whole file, None if unknown
 356                        * total_bytes_estimate: Guess of the eventual file size,
 357                                                None if unavailable.
 358                        * elapsed: The number of seconds since download started.
 359                        * eta: The estimated time in seconds, None if unknown
 360                        * speed: The download speed in bytes/second, None if
 361                                 unknown
 362                        * fragment_index: The counter of the currently
 363                                          downloaded video fragment.
 364                        * fragment_count: The number of fragments (= individual
 365                                          files that will be merged)
 366
 367                        Progress hooks are guaranteed to be called at least once
 368                        (with status "finished") if the download is successful.
 369     postprocessor_hooks:  A list of functions that get called on postprocessing
 370                        progress, with a dictionary with the entries
 371                        * status: One of "started", "processing", or "finished".
 372                                  Check this first and ignore unknown values.
 373                        * postprocessor: Name of the postprocessor
 374                        * info_dict: The extracted info_dict
 375
 376                        Progress hooks are guaranteed to be called at least twice
 377                        (with status "started" and "finished") if the processing is successful.
 378     merge_output_format: Extension to use when merging formats.
 379     final_ext:         Expected final extension; used to detect when the file was
 380                        already downloaded and converted
 381     fixup:             Automatically correct known faults of the file.
 382                        One of:
 383                        - "never": do nothing
 384                        - "warn": only emit a warning
 385                        - "detect_or_warn": check whether we can do anything
 386                                            about it, warn otherwise (default)
 387     source_address:    Client-side IP address to bind to.
 388     call_home:         Boolean, true iff we are allowed to contact the
 389                        yt-dlp servers for debugging. (BROKEN)
 390     sleep_interval_requests: Number of seconds to sleep between requests
 391                        during extraction
 392     sleep_interval:    Number of seconds to sleep before each download when
 393                        used alone or a lower bound of a range for randomized
 394                        sleep before each download (minimum possible number
 395                        of seconds to sleep) when used along with
 396                        max_sleep_interval.
 397     max_sleep_interval:Upper bound of a range for randomized sleep before each
 398                        download (maximum possible number of seconds to sleep).
 399                        Must only be used along with sleep_interval.
 400                        Actual sleep time will be a random float from range
 401                        [sleep_interval; max_sleep_interval].
 402     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 403     listformats:       Print an overview of available video formats and exit.
 404     list_thumbnails:   Print a table of all thumbnails and exit.
 405     match_filter:      A function that gets called with the info_dict of
 406                        every video.
 407                        If it returns a message, the video is ignored.
 408                        If it returns None, the video is downloaded.
 409                        match_filter_func in utils.py is one example for this.
 410     no_color:          Do not emit color codes in output.
 411     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 412                        HTTP header
 413     geo_bypass_country:
 414                        Two-letter ISO 3166-2 country code that will be used for
 415                        explicit geographic restriction bypassing via faking
 416                        X-Forwarded-For HTTP header
 417     geo_bypass_ip_block:
 418                        IP range in CIDR notation that will be used similarly to
 419                        geo_bypass_country
 420
 421     The following options determine which downloader is picked:
 422     external_downloader: A dictionary of protocol keys and the executable of the
 423                        external downloader to use for it. The allowed protocols
 424                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 425                        Set the value to 'native' to use the native downloader
 426     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 427                        or {'m3u8': 'ffmpeg'} instead.
 428                        Use the native HLS downloader instead of ffmpeg/avconv
 429                        if True, otherwise use ffmpeg/avconv if False, otherwise
 430                        use downloader suggested by extractor if None.
 431     compat_opts:       Compatibility options. See "Differences in default behavior".
 432                        The following options do not work when used through the API:
 433                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 434                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 435                        Refer __init__.py for their implementation
 436     progress_template: Dictionary of templates for progress outputs.
 437                        Allowed keys are 'download', 'postprocess',
 438                        'download-title' (console title) and 'postprocess-title'.
 439                        The template is mapped on a dictionary with keys 'progress' and 'info'
 440
 441     The following parameters are not used by YoutubeDL itself, they are used by
 442     the downloader (see yt_dlp/downloader/common.py):
 443     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 444     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 445     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 446     external_downloader_args, concurrent_fragment_downloads.
 447
 448     The following options are used by the post processors:
 449     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 450                        otherwise prefer ffmpeg. (avconv support is deprecated)
 451     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 452                        to the binary or its containing directory.
 453     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 454                        and a list of additional command-line arguments for the
 455                        postprocessor/executable. The dict can also have "PP+EXE" keys
 456                        which are used when the given exe is used by the given PP.
 457                        Use 'default' as the name for arguments to passed to all PP
 458                        For compatibility with youtube-dl, a single list of args
 459                        can also be used
 460
 461     The following options are used by the extractors:
 462     extractor_retries: Number of times to retry for known errors
 463     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 464     hls_split_discontinuity: Split HLS playlists to different formats at
 465                        discontinuities such as ad breaks (default: False)
 466     extractor_args:    A dictionary of arguments to be passed to the extractors.
 467                        See "EXTRACTOR ARGUMENTS" for details.
 468                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 469     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 470                        If True (default), DASH manifests and related
 471                        data will be downloaded and processed by extractor.
 472                        You can reduce network I/O by disabling it if you don't
 473                        care about DASH. (only for youtube)
 474     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 475                        If True (default), HLS manifests and related
 476                        data will be downloaded and processed by extractor.
 477                        You can reduce network I/O by disabling it if you don't
 478                        care about HLS. (only for youtube)
 479     """
 480
 481     _NUMERIC_FIELDS = set((
 482         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 483         'timestamp', 'release_timestamp',
 484         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 485         'average_rating', 'comment_count', 'age_limit',
 486         'start_time', 'end_time',
 487         'chapter_number', 'season_number', 'episode_number',
 488         'track_number', 'disc_number', 'release_year',
 489     ))
 490
 491     _format_selection_exts = {
 492         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 493         'video': {'mp4', 'flv', 'webm', '3gp'},
 494         'storyboards': {'mhtml'},
 495     }
 496
 497     params = None
 498     _ies = {}
 499     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 500     _printed_messages = set()
 501     _first_webpage_request = True
 502     _download_retcode = None
 503     _num_downloads = None
 504     _playlist_level = 0
 505     _playlist_urls = set()
 506     _screen_file = None
 507
 508     def __init__(self, params=None, auto_init=True):
 509         """Create a FileDownloader object with the given options.
 510         @param auto_init    Whether to load the default extractors and print header (if verbose).
 511                             Set to 'no_verbose_header' to not print the header
 512         """
 513         if params is None:
 514             params = {}
 515         self._ies = {}
 516         self._ies_instances = {}
 517         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 518         self._printed_messages = set()
 519         self._first_webpage_request = True
 520         self._post_hooks = []
 521         self._progress_hooks = []
 522         self._postprocessor_hooks = []
 523         self._download_retcode = 0
 524         self._num_downloads = 0
 525         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 526         self._err_file = sys.stderr
 527         self.params = params
 528         self.cache = Cache(self)
 529
 530         windows_enable_vt_mode()
 531         self._allow_colors = {
 532             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 533             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 534         }
 535
 536         if sys.version_info < (3, 6):
 537             self.report_warning(
 538                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 539
 540         if self.params.get('allow_unplayable_formats'):
 541             self.report_warning(
 542                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 543                 'This is a developer option intended for debugging. \n'
 544                 '         If you experience any issues while using this option, '
 545                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 546
 547         def check_deprecated(param, option, suggestion):
 548             if self.params.get(param) is not None:
 549                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 550                 return True
 551             return False
 552
 553         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 554             if self.params.get('geo_verification_proxy') is None:
 555                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 556
 557         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 558         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 559         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 560
 561         for msg in self.params.get('_warnings', []):
 562             self.report_warning(msg)
 563
 564         if 'list-formats' in self.params.get('compat_opts', []):
 565             self.params['listformats_table'] = False
 566
 567         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 568             # nooverwrites was unnecessarily changed to overwrites
 569             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 570             # This ensures compatibility with both keys
 571             self.params['overwrites'] = not self.params['nooverwrites']
 572         elif self.params.get('overwrites') is None:
 573             self.params.pop('overwrites', None)
 574         else:
 575             self.params['nooverwrites'] = not self.params['overwrites']
 576
 577         if params.get('bidi_workaround', False):
 578             try:
 579                 import pty
 580                 master, slave = pty.openpty()
 581                 width = compat_get_terminal_size().columns
 582                 if width is None:
 583                     width_args = []
 584                 else:
 585                     width_args = ['-w', str(width)]
 586                 sp_kwargs = dict(
 587                     stdin=subprocess.PIPE,
 588                     stdout=slave,
 589                     stderr=self._err_file)
 590                 try:
 591                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 592                 except OSError:
 593                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 594                 self._output_channel = os.fdopen(master, 'rb')
 595             except OSError as ose:
 596                 if ose.errno == errno.ENOENT:
 597                     self.report_warning(
 598                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 599                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 600                 else:
 601                     raise
 602
 603         if (sys.platform != 'win32'
 604                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 605                 and not params.get('restrictfilenames', False)):
 606             # Unicode filesystem API will throw errors (#1474, #13027)
 607             self.report_warning(
 608                 'Assuming --restrict-filenames since file system encoding '
 609                 'cannot encode all characters. '
 610                 'Set the LC_ALL environment variable to fix this.')
 611             self.params['restrictfilenames'] = True
 612
 613         self.outtmpl_dict = self.parse_outtmpl()
 614
 615         # Creating format selector here allows us to catch syntax errors before the extraction
 616         self.format_selector = (
 617             None if self.params.get('format') is None
 618             else self.params['format'] if callable(self.params['format'])
 619             else self.build_format_selector(self.params['format']))
 620
 621         self._setup_opener()
 622
 623         if auto_init:
 624             if auto_init != 'no_verbose_header':
 625                 self.print_debug_header()
 626             self.add_default_info_extractors()
 627
 628         for pp_def_raw in self.params.get('postprocessors', []):
 629             pp_def = dict(pp_def_raw)
 630             when = pp_def.pop('when', 'post_process')
 631             pp_class = get_postprocessor(pp_def.pop('key'))
 632             pp = pp_class(self, **compat_kwargs(pp_def))
 633             self.add_post_processor(pp, when=when)
 634
 635         hooks = {
 636             'post_hooks': self.add_post_hook,
 637             'progress_hooks': self.add_progress_hook,
 638             'postprocessor_hooks': self.add_postprocessor_hook,
 639         }
 640         for opt, fn in hooks.items():
 641             for ph in self.params.get(opt, []):
 642                 fn(ph)
 643
 644         register_socks_protocols()
 645
 646         def preload_download_archive(fn):
 647             """Preload the archive, if any is specified"""
 648             if fn is None:
 649                 return False
 650             self.write_debug(f'Loading archive file {fn!r}')
 651             try:
 652                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 653                     for line in archive_file:
 654                         self.archive.add(line.strip())
 655             except IOError as ioe:
 656                 if ioe.errno != errno.ENOENT:
 657                     raise
 658                 return False
 659             return True
 660
 661         self.archive = set()
 662         preload_download_archive(self.params.get('download_archive'))
 663
 664     def warn_if_short_id(self, argv):
 665         # short YouTube ID starting with dash?
 666         idxs = [
 667             i for i, a in enumerate(argv)
 668             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 669         if idxs:
 670             correct_argv = (
 671                 ['yt-dlp']
 672                 + [a for i, a in enumerate(argv) if i not in idxs]
 673                 + ['--'] + [argv[i] for i in idxs]
 674             )
 675             self.report_warning(
 676                 'Long argument string detected. '
 677                 'Use -- to separate parameters and URLs, like this:\n%s' %
 678                 args_to_str(correct_argv))
 679
 680     def add_info_extractor(self, ie):
 681         """Add an InfoExtractor object to the end of the list."""
 682         ie_key = ie.ie_key()
 683         self._ies[ie_key] = ie
 684         if not isinstance(ie, type):
 685             self._ies_instances[ie_key] = ie
 686             ie.set_downloader(self)
 687
 688     def _get_info_extractor_class(self, ie_key):
 689         ie = self._ies.get(ie_key)
 690         if ie is None:
 691             ie = get_info_extractor(ie_key)
 692             self.add_info_extractor(ie)
 693         return ie
 694
 695     def get_info_extractor(self, ie_key):
 696         """
 697         Get an instance of an IE with name ie_key, it will try to get one from
 698         the _ies list, if there's no instance it will create a new one and add
 699         it to the extractor list.
 700         """
 701         ie = self._ies_instances.get(ie_key)
 702         if ie is None:
 703             ie = get_info_extractor(ie_key)()
 704             self.add_info_extractor(ie)
 705         return ie
 706
 707     def add_default_info_extractors(self):
 708         """
 709         Add the InfoExtractors returned by gen_extractors to the end of the list
 710         """
 711         for ie in gen_extractor_classes():
 712             self.add_info_extractor(ie)
 713
 714     def add_post_processor(self, pp, when='post_process'):
 715         """Add a PostProcessor object to the end of the chain."""
 716         self._pps[when].append(pp)
 717         pp.set_downloader(self)
 718
 719     def add_post_hook(self, ph):
 720         """Add the post hook"""
 721         self._post_hooks.append(ph)
 722
 723     def add_progress_hook(self, ph):
 724         """Add the download progress hook"""
 725         self._progress_hooks.append(ph)
 726
 727     def add_postprocessor_hook(self, ph):
 728         """Add the postprocessing progress hook"""
 729         self._postprocessor_hooks.append(ph)
 730
 731     def _bidi_workaround(self, message):
 732         if not hasattr(self, '_output_channel'):
 733             return message
 734
 735         assert hasattr(self, '_output_process')
 736         assert isinstance(message, compat_str)
 737         line_count = message.count('\n') + 1
 738         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 739         self._output_process.stdin.flush()
 740         res = ''.join(self._output_channel.readline().decode('utf-8')
 741                       for _ in range(line_count))
 742         return res[:-len('\n')]
 743
 744     def _write_string(self, message, out=None, only_once=False):
 745         if only_once:
 746             if message in self._printed_messages:
 747                 return
 748             self._printed_messages.add(message)
 749         write_string(message, out=out, encoding=self.params.get('encoding'))
 750
 751     def to_stdout(self, message, skip_eol=False, quiet=False):
 752         """Print message to stdout"""
 753         if self.params.get('logger'):
 754             self.params['logger'].debug(message)
 755         elif not quiet or self.params.get('verbose'):
 756             self._write_string(
 757                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 758                 self._err_file if quiet else self._screen_file)
 759
 760     def to_stderr(self, message, only_once=False):
 761         """Print message to stderr"""
 762         assert isinstance(message, compat_str)
 763         if self.params.get('logger'):
 764             self.params['logger'].error(message)
 765         else:
 766             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 767
 768     def to_console_title(self, message):
 769         if not self.params.get('consoletitle', False):
 770             return
 771         if compat_os_name == 'nt':
 772             if ctypes.windll.kernel32.GetConsoleWindow():
 773                 # c_wchar_p() might not be necessary if `message` is
 774                 # already of type unicode()
 775                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 776         elif 'TERM' in os.environ:
 777             self._write_string('\033]0;%s\007' % message, self._screen_file)
 778
 779     def save_console_title(self):
 780         if not self.params.get('consoletitle', False):
 781             return
 782         if self.params.get('simulate'):
 783             return
 784         if compat_os_name != 'nt' and 'TERM' in os.environ:
 785             # Save the title on stack
 786             self._write_string('\033[22;0t', self._screen_file)
 787
 788     def restore_console_title(self):
 789         if not self.params.get('consoletitle', False):
 790             return
 791         if self.params.get('simulate'):
 792             return
 793         if compat_os_name != 'nt' and 'TERM' in os.environ:
 794             # Restore the title from stack
 795             self._write_string('\033[23;0t', self._screen_file)
 796
 797     def __enter__(self):
 798         self.save_console_title()
 799         return self
 800
 801     def __exit__(self, *args):
 802         self.restore_console_title()
 803
 804         if self.params.get('cookiefile') is not None:
 805             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 806
 807     def trouble(self, message=None, tb=None):
 808         """Determine action to take when a download problem appears.
 809
 810         Depending on if the downloader has been configured to ignore
 811         download errors or not, this method may throw an exception or
 812         not when errors are found, after printing the message.
 813
 814         tb, if given, is additional traceback information.
 815         """
 816         if message is not None:
 817             self.to_stderr(message)
 818         if self.params.get('verbose'):
 819             if tb is None:
 820                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 821                     tb = ''
 822                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 823                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 824                     tb += encode_compat_str(traceback.format_exc())
 825                 else:
 826                     tb_data = traceback.format_list(traceback.extract_stack())
 827                     tb = ''.join(tb_data)
 828             if tb:
 829                 self.to_stderr(tb)
 830         if not self.params.get('ignoreerrors'):
 831             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 832                 exc_info = sys.exc_info()[1].exc_info
 833             else:
 834                 exc_info = sys.exc_info()
 835             raise DownloadError(message, exc_info)
 836         self._download_retcode = 1
 837
 838     def to_screen(self, message, skip_eol=False):
 839         """Print message to stdout if not in quiet mode"""
 840         self.to_stdout(
 841             message, skip_eol, quiet=self.params.get('quiet', False))
 842
 843     class Styles(Enum):
 844         HEADERS = 'yellow'
 845         EMPHASIS = 'blue'
 846         ID = 'green'
 847         DELIM = 'blue'
 848         ERROR = 'red'
 849         WARNING = 'yellow'
 850         SUPPRESS = 'light black'
 851
 852     def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
 853         if test_encoding:
 854             original_text = text
 855             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 856             text = text.encode(encoding, 'ignore').decode(encoding)
 857             if fallback is not None and text != original_text:
 858                 text = fallback
 859         if isinstance(f, self.Styles):
 860             f = f._value_
 861         return format_text(text, f) if allow_colors else text if fallback is None else fallback
 862
 863     def _format_screen(self, *args, **kwargs):
 864         return self._format_text(
 865             self._screen_file, self._allow_colors['screen'], *args, **kwargs)
 866
 867     def _format_err(self, *args, **kwargs):
 868         return self._format_text(
 869             self._err_file, self._allow_colors['err'], *args, **kwargs)
 870
 871     def report_warning(self, message, only_once=False):
 872         '''
 873         Print the message to stderr, it will be prefixed with 'WARNING:'
 874         If stderr is a tty file the 'WARNING:' will be colored
 875         '''
 876         if self.params.get('logger') is not None:
 877             self.params['logger'].warning(message)
 878         else:
 879             if self.params.get('no_warnings'):
 880                 return
 881             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 882
 883     def report_error(self, message, tb=None):
 884         '''
 885         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 886         in red if stderr is a tty file.
 887         '''
 888         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 889
 890     def write_debug(self, message, only_once=False):
 891         '''Log debug message or Print message to stderr'''
 892         if not self.params.get('verbose', False):
 893             return
 894         message = '[debug] %s' % message
 895         if self.params.get('logger'):
 896             self.params['logger'].debug(message)
 897         else:
 898             self.to_stderr(message, only_once)
 899
 900     def report_file_already_downloaded(self, file_name):
 901         """Report file has already been fully downloaded."""
 902         try:
 903             self.to_screen('[download] %s has already been downloaded' % file_name)
 904         except UnicodeEncodeError:
 905             self.to_screen('[download] The file has already been downloaded')
 906
 907     def report_file_delete(self, file_name):
 908         """Report that existing file will be deleted."""
 909         try:
 910             self.to_screen('Deleting existing file %s' % file_name)
 911         except UnicodeEncodeError:
 912             self.to_screen('Deleting existing file')
 913
 914     def raise_no_formats(self, info, forced=False):
 915         has_drm = info.get('__has_drm')
 916         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 917         expected = self.params.get('ignore_no_formats_error')
 918         if forced or not expected:
 919             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 920                                  expected=has_drm or expected)
 921         else:
 922             self.report_warning(msg)
 923
 924     def parse_outtmpl(self):
 925         outtmpl_dict = self.params.get('outtmpl', {})
 926         if not isinstance(outtmpl_dict, dict):
 927             outtmpl_dict = {'default': outtmpl_dict}
 928         # Remove spaces in the default template
 929         if self.params.get('restrictfilenames'):
 930             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 931         else:
 932             sanitize = lambda x: x
 933         outtmpl_dict.update({
 934             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 935             if outtmpl_dict.get(k) is None})
 936         for key, val in outtmpl_dict.items():
 937             if isinstance(val, bytes):
 938                 self.report_warning(
 939                     'Parameter outtmpl is bytes, but should be a unicode string. '
 940                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 941         return outtmpl_dict
 942
 943     def get_output_path(self, dir_type='', filename=None):
 944         paths = self.params.get('paths', {})
 945         assert isinstance(paths, dict)
 946         path = os.path.join(
 947             expand_path(paths.get('home', '').strip()),
 948             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 949             filename or '')
 950
 951         # Temporary fix for #4787
 952         # 'Treat' all problem characters by passing filename through preferredencoding
 953         # to workaround encoding issues with subprocess on python2 @ Windows
 954         if sys.version_info < (3, 0) and sys.platform == 'win32':
 955             path = encodeFilename(path, True).decode(preferredencoding())
 956         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 957
 958     @staticmethod
 959     def _outtmpl_expandpath(outtmpl):
 960         # expand_path translates '%%' into '%' and '$$' into '$'
 961         # correspondingly that is not what we want since we need to keep
 962         # '%%' intact for template dict substitution step. Working around
 963         # with boundary-alike separator hack.
 964         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 965         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 966
 967         # outtmpl should be expand_path'ed before template dict substitution
 968         # because meta fields may contain env variables we don't want to
 969         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 970         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 971         return expand_path(outtmpl).replace(sep, '')
 972
 973     @staticmethod
 974     def escape_outtmpl(outtmpl):
 975         ''' Escape any remaining strings like %s, %abc% etc. '''
 976         return re.sub(
 977             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 978             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 979             outtmpl)
 980
 981     @classmethod
 982     def validate_outtmpl(cls, outtmpl):
 983         ''' @return None or Exception object '''
 984         outtmpl = re.sub(
 985             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 986             lambda mobj: f'{mobj.group(0)[:-1]}s',
 987             cls._outtmpl_expandpath(outtmpl))
 988         try:
 989             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 990             return None
 991         except ValueError as err:
 992             return err
 993
 994     @staticmethod
 995     def _copy_infodict(info_dict):
 996         info_dict = dict(info_dict)
 997         for key in ('__original_infodict', '__postprocessors'):
 998             info_dict.pop(key, None)
 999         return info_dict
1000
1001     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
1002         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
1003         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1004
1005         info_dict = self._copy_infodict(info_dict)
1006         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1007             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1008             if info_dict.get('duration', None) is not None
1009             else None)
1010         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1011         if info_dict.get('resolution') is None:
1012             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1013
1014         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1015         # of %(field)s to %(field)0Nd for backward compatibility
1016         field_size_compat_map = {
1017             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1018             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1019             'autonumber': self.params.get('autonumber_size') or 5,
1020         }
1021
1022         TMPL_DICT = {}
1023         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1024         MATH_FUNCTIONS = {
1025             '+': float.__add__,
1026             '-': float.__sub__,
1027         }
1028         # Field is of the form key1.key2...
1029         # where keys (except first) can be string, int or slice
1030         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1031         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1032         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1033         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1034             (?P<negate>-)?
1035             (?P<fields>{field})
1036             (?P<maths>(?:{math_op}{math_field})*)
1037             (?:>(?P<strf_format>.+?))?
1038             (?P<alternate>(?<!\\),[^|)]+)?
1039             (?:\|(?P<default>.*?))?
1040             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1041
1042         def _traverse_infodict(k):
1043             k = k.split('.')
1044             if k[0] == '':
1045                 k.pop(0)
1046             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1047
1048         def get_value(mdict):
1049             # Object traversal
1050             value = _traverse_infodict(mdict['fields'])
1051             # Negative
1052             if mdict['negate']:
1053                 value = float_or_none(value)
1054                 if value is not None:
1055                     value *= -1
1056             # Do maths
1057             offset_key = mdict['maths']
1058             if offset_key:
1059                 value = float_or_none(value)
1060                 operator = None
1061                 while offset_key:
1062                     item = re.match(
1063                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1064                         offset_key).group(0)
1065                     offset_key = offset_key[len(item):]
1066                     if operator is None:
1067                         operator = MATH_FUNCTIONS[item]
1068                         continue
1069                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1070                     offset = float_or_none(item)
1071                     if offset is None:
1072                         offset = float_or_none(_traverse_infodict(item))
1073                     try:
1074                         value = operator(value, multiplier * offset)
1075                     except (TypeError, ZeroDivisionError):
1076                         return None
1077                     operator = None
1078             # Datetime formatting
1079             if mdict['strf_format']:
1080                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1081
1082             return value
1083
1084         na = self.params.get('outtmpl_na_placeholder', 'NA')
1085
1086         def _dumpjson_default(obj):
1087             if isinstance(obj, (set, LazyList)):
1088                 return list(obj)
1089             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1090
1091         def create_key(outer_mobj):
1092             if not outer_mobj.group('has_key'):
1093                 return outer_mobj.group(0)
1094             key = outer_mobj.group('key')
1095             mobj = re.match(INTERNAL_FORMAT_RE, key)
1096             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1097             value, default = None, na
1098             while mobj:
1099                 mobj = mobj.groupdict()
1100                 default = mobj['default'] if mobj['default'] is not None else default
1101                 value = get_value(mobj)
1102                 if value is None and mobj['alternate']:
1103                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1104                 else:
1105                     break
1106
1107             fmt = outer_mobj.group('format')
1108             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1109                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1110
1111             value = default if value is None else value
1112
1113             flags = outer_mobj.group('conversion') or ''
1114             str_fmt = f'{fmt[:-1]}s'
1115             if fmt[-1] == 'l':  # list
1116                 delim = '\n' if '#' in flags else ', '
1117                 value, fmt = delim.join(variadic(value)), str_fmt
1118             elif fmt[-1] == 'j':  # json
1119                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1120             elif fmt[-1] == 'q':  # quoted
1121                 value = map(str, variadic(value) if '#' in flags else [value])
1122                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1123             elif fmt[-1] == 'B':  # bytes
1124                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1125                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1126             elif fmt[-1] == 'U':  # unicode normalized
1127                 value, fmt = unicodedata.normalize(
1128                     # "+" = compatibility equivalence, "#" = NFD
1129                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1130                     value), str_fmt
1131             elif fmt[-1] == 'c':
1132                 if value:
1133                     value = str(value)[0]
1134                 else:
1135                     fmt = str_fmt
1136             elif fmt[-1] not in 'rs':  # numeric
1137                 value = float_or_none(value)
1138                 if value is None:
1139                     value, fmt = default, 's'
1140
1141             if sanitize:
1142                 if fmt[-1] == 'r':
1143                     # If value is an object, sanitize might convert it to a string
1144                     # So we convert it to repr first
1145                     value, fmt = repr(value), str_fmt
1146                 if fmt[-1] in 'csr':
1147                     value = sanitize(initial_field, value)
1148
1149             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1150             TMPL_DICT[key] = value
1151             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1152
1153         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1154
1155     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1156         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1157         return self.escape_outtmpl(outtmpl) % info_dict
1158
1159     def _prepare_filename(self, info_dict, tmpl_type='default'):
1160         try:
1161             sanitize = lambda k, v: sanitize_filename(
1162                 compat_str(v),
1163                 restricted=self.params.get('restrictfilenames'),
1164                 is_id=(k == 'id' or k.endswith('_id')))
1165             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1166             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1167
1168             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1169             if filename and force_ext is not None:
1170                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1171
1172             # https://github.com/blackjack4494/youtube-dlc/issues/85
1173             trim_file_name = self.params.get('trim_file_name', False)
1174             if trim_file_name:
1175                 fn_groups = filename.rsplit('.')
1176                 ext = fn_groups[-1]
1177                 sub_ext = ''
1178                 if len(fn_groups) > 2:
1179                     sub_ext = fn_groups[-2]
1180                 filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')
1181
1182             return filename
1183         except ValueError as err:
1184             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1185             return None
1186
1187     def prepare_filename(self, info_dict, dir_type='', warn=False):
1188         """Generate the output filename."""
1189
1190         filename = self._prepare_filename(info_dict, dir_type or 'default')
1191         if not filename and dir_type not in ('', 'temp'):
1192             return ''
1193
1194         if warn:
1195             if not self.params.get('paths'):
1196                 pass
1197             elif filename == '-':
1198                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1199             elif os.path.isabs(filename):
1200                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1201         if filename == '-' or not filename:
1202             return filename
1203
1204         return self.get_output_path(dir_type, filename)
1205
1206     def _match_entry(self, info_dict, incomplete=False, silent=False):
1207         """ Returns None if the file should be downloaded """
1208
1209         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1210
1211         def check_filter():
1212             if 'title' in info_dict:
1213                 # This can happen when we're just evaluating the playlist
1214                 title = info_dict['title']
1215                 matchtitle = self.params.get('matchtitle', False)
1216                 if matchtitle:
1217                     if not re.search(matchtitle, title, re.IGNORECASE):
1218                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1219                 rejecttitle = self.params.get('rejecttitle', False)
1220                 if rejecttitle:
1221                     if re.search(rejecttitle, title, re.IGNORECASE):
1222                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1223             date = info_dict.get('upload_date')
1224             if date is not None:
1225                 dateRange = self.params.get('daterange', DateRange())
1226                 if date not in dateRange:
1227                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1228             view_count = info_dict.get('view_count')
1229             if view_count is not None:
1230                 min_views = self.params.get('min_views')
1231                 if min_views is not None and view_count < min_views:
1232                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1233                 max_views = self.params.get('max_views')
1234                 if max_views is not None and view_count > max_views:
1235                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1236             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1237                 return 'Skipping "%s" because it is age restricted' % video_title
1238
1239             match_filter = self.params.get('match_filter')
1240             if match_filter is not None:
1241                 try:
1242                     ret = match_filter(info_dict, incomplete=incomplete)
1243                 except TypeError:
1244                     # For backward compatibility
1245                     ret = None if incomplete else match_filter(info_dict)
1246                 if ret is not None:
1247                     return ret
1248             return None
1249
1250         if self.in_download_archive(info_dict):
1251             reason = '%s has already been recorded in the archive' % video_title
1252             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1253         else:
1254             reason = check_filter()
1255             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1256         if reason is not None:
1257             if not silent:
1258                 self.to_screen('[download] ' + reason)
1259             if self.params.get(break_opt, False):
1260                 raise break_err()
1261         return reason
1262
1263     @staticmethod
1264     def add_extra_info(info_dict, extra_info):
1265         '''Set the keys from extra_info in info dict if they are missing'''
1266         for key, value in extra_info.items():
1267             info_dict.setdefault(key, value)
1268
1269     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1270                      process=True, force_generic_extractor=False):
1271         """
1272         Return a list with a dictionary for each video extracted.
1273
1274         Arguments:
1275         url -- URL to extract
1276
1277         Keyword arguments:
1278         download -- whether to download videos during extraction
1279         ie_key -- extractor key hint
1280         extra_info -- dictionary containing the extra values to add to each result
1281         process -- whether to resolve all unresolved references (URLs, playlist items),
1282             must be True for download to work.
1283         force_generic_extractor -- force using the generic extractor
1284         """
1285
1286         if extra_info is None:
1287             extra_info = {}
1288
1289         if not ie_key and force_generic_extractor:
1290             ie_key = 'Generic'
1291
1292         if ie_key:
1293             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1294         else:
1295             ies = self._ies
1296
1297         for ie_key, ie in ies.items():
1298             if not ie.suitable(url):
1299                 continue
1300
1301             if not ie.working():
1302                 self.report_warning('The program functionality for this site has been marked as broken, '
1303                                     'and will probably not work.')
1304
1305             temp_id = ie.get_temp_id(url)
1306             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1307                 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1308                 if self.params.get('break_on_existing', False):
1309                     raise ExistingVideoReached()
1310                 break
1311             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1312         else:
1313             self.report_error('no suitable InfoExtractor for URL %s' % url)
1314
1315     def __handle_extraction_exceptions(func):
1316         @functools.wraps(func)
1317         def wrapper(self, *args, **kwargs):
1318             try:
1319                 return func(self, *args, **kwargs)
1320             except GeoRestrictedError as e:
1321                 msg = e.msg
1322                 if e.countries:
1323                     msg += '\nThis video is available in %s.' % ', '.join(
1324                         map(ISO3166Utils.short2full, e.countries))
1325                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1326                 self.report_error(msg)
1327             except ExtractorError as e:  # An error we somewhat expected
1328                 self.report_error(compat_str(e), e.format_traceback())
1329             except ThrottledDownload as e:
1330                 self.to_stderr('\r')
1331                 self.report_warning(f'{e}; Re-extracting data')
1332                 return wrapper(self, *args, **kwargs)
1333             except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1334                 raise
1335             except Exception as e:
1336                 if self.params.get('ignoreerrors'):
1337                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1338                 else:
1339                     raise
1340         return wrapper
1341
1342     @__handle_extraction_exceptions
1343     def __extract_info(self, url, ie, download, extra_info, process):
1344         ie_result = ie.extract(url)
1345         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1346             return
1347         if isinstance(ie_result, list):
1348             # Backwards compatibility: old IE result format
1349             ie_result = {
1350                 '_type': 'compat_list',
1351                 'entries': ie_result,
1352             }
1353         if extra_info.get('original_url'):
1354             ie_result.setdefault('original_url', extra_info['original_url'])
1355         self.add_default_extra_info(ie_result, ie, url)
1356         if process:
1357             return self.process_ie_result(ie_result, download, extra_info)
1358         else:
1359             return ie_result
1360
1361     def add_default_extra_info(self, ie_result, ie, url):
1362         if url is not None:
1363             self.add_extra_info(ie_result, {
1364                 'webpage_url': url,
1365                 'original_url': url,
1366                 'webpage_url_basename': url_basename(url),
1367             })
1368         if ie is not None:
1369             self.add_extra_info(ie_result, {
1370                 'extractor': ie.IE_NAME,
1371                 'extractor_key': ie.ie_key(),
1372             })
1373
1374     def process_ie_result(self, ie_result, download=True, extra_info=None):
1375         """
1376         Take the result of the ie(may be modified) and resolve all unresolved
1377         references (URLs, playlist items).
1378
1379         It will also download the videos if 'download'.
1380         Returns the resolved ie_result.
1381         """
1382         if extra_info is None:
1383             extra_info = {}
1384         result_type = ie_result.get('_type', 'video')
1385
1386         if result_type in ('url', 'url_transparent'):
1387             ie_result['url'] = sanitize_url(ie_result['url'])
1388             if ie_result.get('original_url'):
1389                 extra_info.setdefault('original_url', ie_result['original_url'])
1390
1391             extract_flat = self.params.get('extract_flat', False)
1392             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1393                     or extract_flat is True):
1394                 info_copy = ie_result.copy()
1395                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1396                 if ie and not ie_result.get('id'):
1397                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1398                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1399                 self.add_extra_info(info_copy, extra_info)
1400                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1401                 if self.params.get('force_write_download_archive', False):
1402                     self.record_download_archive(info_copy)
1403                 return ie_result
1404
1405         if result_type == 'video':
1406             self.add_extra_info(ie_result, extra_info)
1407             ie_result = self.process_video_result(ie_result, download=download)
1408             additional_urls = (ie_result or {}).get('additional_urls')
1409             if additional_urls:
1410                 # TODO: Improve MetadataParserPP to allow setting a list
1411                 if isinstance(additional_urls, compat_str):
1412                     additional_urls = [additional_urls]
1413                 self.to_screen(
1414                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1415                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1416                 ie_result['additional_entries'] = [
1417                     self.extract_info(
1418                         url, download, extra_info,
1419                         force_generic_extractor=self.params.get('force_generic_extractor'))
1420                     for url in additional_urls
1421                 ]
1422             return ie_result
1423         elif result_type == 'url':
1424             # We have to add extra_info to the results because it may be
1425             # contained in a playlist
1426             return self.extract_info(
1427                 ie_result['url'], download,
1428                 ie_key=ie_result.get('ie_key'),
1429                 extra_info=extra_info)
1430         elif result_type == 'url_transparent':
1431             # Use the information from the embedding page
1432             info = self.extract_info(
1433                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1434                 extra_info=extra_info, download=False, process=False)
1435
1436             # extract_info may return None when ignoreerrors is enabled and
1437             # extraction failed with an error, don't crash and return early
1438             # in this case
1439             if not info:
1440                 return info
1441
1442             force_properties = dict(
1443                 (k, v) for k, v in ie_result.items() if v is not None)
1444             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1445                 if f in force_properties:
1446                     del force_properties[f]
1447             new_result = info.copy()
1448             new_result.update(force_properties)
1449
1450             # Extracted info may not be a video result (i.e.
1451             # info.get('_type', 'video') != video) but rather an url or
1452             # url_transparent. In such cases outer metadata (from ie_result)
1453             # should be propagated to inner one (info). For this to happen
1454             # _type of info should be overridden with url_transparent. This
1455             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1456             if new_result.get('_type') == 'url':
1457                 new_result['_type'] = 'url_transparent'
1458
1459             return self.process_ie_result(
1460                 new_result, download=download, extra_info=extra_info)
1461         elif result_type in ('playlist', 'multi_video'):
1462             # Protect from infinite recursion due to recursively nested playlists
1463             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1464             webpage_url = ie_result['webpage_url']
1465             if webpage_url in self._playlist_urls:
1466                 self.to_screen(
1467                     '[download] Skipping already downloaded playlist: %s'
1468                     % ie_result.get('title') or ie_result.get('id'))
1469                 return
1470
1471             self._playlist_level += 1
1472             self._playlist_urls.add(webpage_url)
1473             self._sanitize_thumbnails(ie_result)
1474             try:
1475                 return self.__process_playlist(ie_result, download)
1476             finally:
1477                 self._playlist_level -= 1
1478                 if not self._playlist_level:
1479                     self._playlist_urls.clear()
1480         elif result_type == 'compat_list':
1481             self.report_warning(
1482                 'Extractor %s returned a compat_list result. '
1483                 'It needs to be updated.' % ie_result.get('extractor'))
1484
1485             def _fixup(r):
1486                 self.add_extra_info(r, {
1487                     'extractor': ie_result['extractor'],
1488                     'webpage_url': ie_result['webpage_url'],
1489                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1490                     'extractor_key': ie_result['extractor_key'],
1491                 })
1492                 return r
1493             ie_result['entries'] = [
1494                 self.process_ie_result(_fixup(r), download, extra_info)
1495                 for r in ie_result['entries']
1496             ]
1497             return ie_result
1498         else:
1499             raise Exception('Invalid result type: %s' % result_type)
1500
1501     def _ensure_dir_exists(self, path):
1502         return make_dir(path, self.report_error)
1503
1504     def __process_playlist(self, ie_result, download):
1505         # We process each entry in the playlist
1506         playlist = ie_result.get('title') or ie_result.get('id')
1507         self.to_screen('[download] Downloading playlist: %s' % playlist)
1508
1509         if 'entries' not in ie_result:
1510             raise EntryNotInPlaylist('There are no entries')
1511
1512         MissingEntry = object()
1513         incomplete_entries = bool(ie_result.get('requested_entries'))
1514         if incomplete_entries:
1515             def fill_missing_entries(entries, indices):
1516                 ret = [MissingEntry] * max(indices)
1517                 for i, entry in zip(indices, entries):
1518                     ret[i - 1] = entry
1519                 return ret
1520             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1521
1522         playlist_results = []
1523
1524         playliststart = self.params.get('playliststart', 1)
1525         playlistend = self.params.get('playlistend')
1526         # For backwards compatibility, interpret -1 as whole list
1527         if playlistend == -1:
1528             playlistend = None
1529
1530         playlistitems_str = self.params.get('playlist_items')
1531         playlistitems = None
1532         if playlistitems_str is not None:
1533             def iter_playlistitems(format):
1534                 for string_segment in format.split(','):
1535                     if '-' in string_segment:
1536                         start, end = string_segment.split('-')
1537                         for item in range(int(start), int(end) + 1):
1538                             yield int(item)
1539                     else:
1540                         yield int(string_segment)
1541             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1542
1543         ie_entries = ie_result['entries']
1544         msg = (
1545             'Downloading %d videos' if not isinstance(ie_entries, list)
1546             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1547
1548         if isinstance(ie_entries, list):
1549             def get_entry(i):
1550                 return ie_entries[i - 1]
1551         else:
1552             if not isinstance(ie_entries, (PagedList, LazyList)):
1553                 ie_entries = LazyList(ie_entries)
1554
1555             def get_entry(i):
1556                 return YoutubeDL.__handle_extraction_exceptions(
1557                     lambda self, i: ie_entries[i - 1]
1558                 )(self, i)
1559
1560         entries = []
1561         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1562         for i in items:
1563             if i == 0:
1564                 continue
1565             if playlistitems is None and playlistend is not None and playlistend < i:
1566                 break
1567             entry = None
1568             try:
1569                 entry = get_entry(i)
1570                 if entry is MissingEntry:
1571                     raise EntryNotInPlaylist()
1572             except (IndexError, EntryNotInPlaylist):
1573                 if incomplete_entries:
1574                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1575                 elif not playlistitems:
1576                     break
1577             entries.append(entry)
1578             try:
1579                 if entry is not None:
1580                     self._match_entry(entry, incomplete=True, silent=True)
1581             except (ExistingVideoReached, RejectedVideoReached):
1582                 break
1583         ie_result['entries'] = entries
1584
1585         # Save playlist_index before re-ordering
1586         entries = [
1587             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1588             for i, entry in enumerate(entries, 1)
1589             if entry is not None]
1590         n_entries = len(entries)
1591
1592         if not playlistitems and (playliststart != 1 or playlistend):
1593             playlistitems = list(range(playliststart, playliststart + n_entries))
1594         ie_result['requested_entries'] = playlistitems
1595
1596         _infojson_written = False
1597         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1598             ie_copy = {
1599                 'playlist': playlist,
1600                 'playlist_id': ie_result.get('id'),
1601                 'playlist_title': ie_result.get('title'),
1602                 'playlist_uploader': ie_result.get('uploader'),
1603                 'playlist_uploader_id': ie_result.get('uploader_id'),
1604                 'playlist_index': 0,
1605                 'n_entries': n_entries,
1606             }
1607             ie_copy.update(dict(ie_result))
1608
1609             _infojson_written = self._write_info_json(
1610                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1611             if _infojson_written is None:
1612                 return
1613             if self._write_description('playlist', ie_result,
1614                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1615                 return
1616             # TODO: This should be passed to ThumbnailsConvertor if necessary
1617             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1618
1619         if self.params.get('playlistreverse', False):
1620             entries = entries[::-1]
1621         if self.params.get('playlistrandom', False):
1622             random.shuffle(entries)
1623
1624         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1625
1626         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1627         failures = 0
1628         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1629         for i, entry_tuple in enumerate(entries, 1):
1630             playlist_index, entry = entry_tuple
1631             if 'playlist-index' in self.params.get('compat_opts', []):
1632                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1633             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1634             # This __x_forwarded_for_ip thing is a bit ugly but requires
1635             # minimal changes
1636             if x_forwarded_for:
1637                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1638             extra = {
1639                 'n_entries': n_entries,
1640                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1641                 'playlist_index': playlist_index,
1642                 'playlist_autonumber': i,
1643                 'playlist': playlist,
1644                 'playlist_id': ie_result.get('id'),
1645                 'playlist_title': ie_result.get('title'),
1646                 'playlist_uploader': ie_result.get('uploader'),
1647                 'playlist_uploader_id': ie_result.get('uploader_id'),
1648                 'extractor': ie_result['extractor'],
1649                 'webpage_url': ie_result['webpage_url'],
1650                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1651                 'extractor_key': ie_result['extractor_key'],
1652             }
1653
1654             if self._match_entry(entry, incomplete=True) is not None:
1655                 continue
1656
1657             entry_result = self.__process_iterable_entry(entry, download, extra)
1658             if not entry_result:
1659                 failures += 1
1660             if failures >= max_failures:
1661                 self.report_error(
1662                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1663                 break
1664             playlist_results.append(entry_result)
1665         ie_result['entries'] = playlist_results
1666
1667         # Write the updated info to json
1668         if _infojson_written and self._write_info_json(
1669                 'updated playlist', ie_result,
1670                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1671             return
1672         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1673         return ie_result
1674
1675     @__handle_extraction_exceptions
1676     def __process_iterable_entry(self, entry, download, extra_info):
1677         return self.process_ie_result(
1678             entry, download=download, extra_info=extra_info)
1679
1680     def _build_format_filter(self, filter_spec):
1681         " Returns a function to filter the formats according to the filter_spec "
1682
1683         OPERATORS = {
1684             '<': operator.lt,
1685             '<=': operator.le,
1686             '>': operator.gt,
1687             '>=': operator.ge,
1688             '=': operator.eq,
1689             '!=': operator.ne,
1690         }
1691         operator_rex = re.compile(r'''(?x)\s*
1692             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1693             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1694             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1695             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1696         m = operator_rex.fullmatch(filter_spec)
1697         if m:
1698             try:
1699                 comparison_value = int(m.group('value'))
1700             except ValueError:
1701                 comparison_value = parse_filesize(m.group('value'))
1702                 if comparison_value is None:
1703                     comparison_value = parse_filesize(m.group('value') + 'B')
1704                 if comparison_value is None:
1705                     raise ValueError(
1706                         'Invalid value %r in format specification %r' % (
1707                             m.group('value'), filter_spec))
1708             op = OPERATORS[m.group('op')]
1709
1710         if not m:
1711             STR_OPERATORS = {
1712                 '=': operator.eq,
1713                 '^=': lambda attr, value: attr.startswith(value),
1714                 '$=': lambda attr, value: attr.endswith(value),
1715                 '*=': lambda attr, value: value in attr,
1716             }
1717             str_operator_rex = re.compile(r'''(?x)\s*
1718                 (?P<key>[a-zA-Z0-9._-]+)\s*
1719                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1720                 (?P<value>[a-zA-Z0-9._-]+)\s*
1721                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1722             m = str_operator_rex.fullmatch(filter_spec)
1723             if m:
1724                 comparison_value = m.group('value')
1725                 str_op = STR_OPERATORS[m.group('op')]
1726                 if m.group('negation'):
1727                     op = lambda attr, value: not str_op(attr, value)
1728                 else:
1729                     op = str_op
1730
1731         if not m:
1732             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1733
1734         def _filter(f):
1735             actual_value = f.get(m.group('key'))
1736             if actual_value is None:
1737                 return m.group('none_inclusive')
1738             return op(actual_value, comparison_value)
1739         return _filter
1740
1741     def _check_formats(self, formats):
1742         for f in formats:
1743             self.to_screen('[info] Testing format %s' % f['format_id'])
1744             path = self.get_output_path('temp')
1745             if not self._ensure_dir_exists(f'{path}/'):
1746                 continue
1747             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1748             temp_file.close()
1749             try:
1750                 success, _ = self.dl(temp_file.name, f, test=True)
1751             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1752                 success = False
1753             finally:
1754                 if os.path.exists(temp_file.name):
1755                     try:
1756                         os.remove(temp_file.name)
1757                     except OSError:
1758                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1759             if success:
1760                 yield f
1761             else:
1762                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1763
1764     def _default_format_spec(self, info_dict, download=True):
1765
1766         def can_merge():
1767             merger = FFmpegMergerPP(self)
1768             return merger.available and merger.can_merge()
1769
1770         prefer_best = (
1771             not self.params.get('simulate')
1772             and download
1773             and (
1774                 not can_merge()
1775                 or info_dict.get('is_live', False)
1776                 or self.outtmpl_dict['default'] == '-'))
1777         compat = (
1778             prefer_best
1779             or self.params.get('allow_multiple_audio_streams', False)
1780             or 'format-spec' in self.params.get('compat_opts', []))
1781
1782         return (
1783             'best/bestvideo+bestaudio' if prefer_best
1784             else 'bestvideo*+bestaudio/best' if not compat
1785             else 'bestvideo+bestaudio/best')
1786
1787     def build_format_selector(self, format_spec):
1788         def syntax_error(note, start):
1789             message = (
1790                 'Invalid format specification: '
1791                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1792             return SyntaxError(message)
1793
1794         PICKFIRST = 'PICKFIRST'
1795         MERGE = 'MERGE'
1796         SINGLE = 'SINGLE'
1797         GROUP = 'GROUP'
1798         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1799
1800         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1801                                   'video': self.params.get('allow_multiple_video_streams', False)}
1802
1803         check_formats = self.params.get('check_formats') == 'selected'
1804
1805         def _parse_filter(tokens):
1806             filter_parts = []
1807             for type, string, start, _, _ in tokens:
1808                 if type == tokenize.OP and string == ']':
1809                     return ''.join(filter_parts)
1810                 else:
1811                     filter_parts.append(string)
1812
1813         def _remove_unused_ops(tokens):
1814             # Remove operators that we don't use and join them with the surrounding strings
1815             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1816             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1817             last_string, last_start, last_end, last_line = None, None, None, None
1818             for type, string, start, end, line in tokens:
1819                 if type == tokenize.OP and string == '[':
1820                     if last_string:
1821                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1822                         last_string = None
1823                     yield type, string, start, end, line
1824                     # everything inside brackets will be handled by _parse_filter
1825                     for type, string, start, end, line in tokens:
1826                         yield type, string, start, end, line
1827                         if type == tokenize.OP and string == ']':
1828                             break
1829                 elif type == tokenize.OP and string in ALLOWED_OPS:
1830                     if last_string:
1831                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1832                         last_string = None
1833                     yield type, string, start, end, line
1834                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1835                     if not last_string:
1836                         last_string = string
1837                         last_start = start
1838                         last_end = end
1839                     else:
1840                         last_string += string
1841             if last_string:
1842                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1843
1844         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1845             selectors = []
1846             current_selector = None
1847             for type, string, start, _, _ in tokens:
1848                 # ENCODING is only defined in python 3.x
1849                 if type == getattr(tokenize, 'ENCODING', None):
1850                     continue
1851                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1852                     current_selector = FormatSelector(SINGLE, string, [])
1853                 elif type == tokenize.OP:
1854                     if string == ')':
1855                         if not inside_group:
1856                             # ')' will be handled by the parentheses group
1857                             tokens.restore_last_token()
1858                         break
1859                     elif inside_merge and string in ['/', ',']:
1860                         tokens.restore_last_token()
1861                         break
1862                     elif inside_choice and string == ',':
1863                         tokens.restore_last_token()
1864                         break
1865                     elif string == ',':
1866                         if not current_selector:
1867                             raise syntax_error('"," must follow a format selector', start)
1868                         selectors.append(current_selector)
1869                         current_selector = None
1870                     elif string == '/':
1871                         if not current_selector:
1872                             raise syntax_error('"/" must follow a format selector', start)
1873                         first_choice = current_selector
1874                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1875                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1876                     elif string == '[':
1877                         if not current_selector:
1878                             current_selector = FormatSelector(SINGLE, 'best', [])
1879                         format_filter = _parse_filter(tokens)
1880                         current_selector.filters.append(format_filter)
1881                     elif string == '(':
1882                         if current_selector:
1883                             raise syntax_error('Unexpected "("', start)
1884                         group = _parse_format_selection(tokens, inside_group=True)
1885                         current_selector = FormatSelector(GROUP, group, [])
1886                     elif string == '+':
1887                         if not current_selector:
1888                             raise syntax_error('Unexpected "+"', start)
1889                         selector_1 = current_selector
1890                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1891                         if not selector_2:
1892                             raise syntax_error('Expected a selector', start)
1893                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1894                     else:
1895                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1896                 elif type == tokenize.ENDMARKER:
1897                     break
1898             if current_selector:
1899                 selectors.append(current_selector)
1900             return selectors
1901
1902         def _merge(formats_pair):
1903             format_1, format_2 = formats_pair
1904
1905             formats_info = []
1906             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1907             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1908
1909             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1910                 get_no_more = {'video': False, 'audio': False}
1911                 for (i, fmt_info) in enumerate(formats_info):
1912                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1913                         formats_info.pop(i)
1914                         continue
1915                     for aud_vid in ['audio', 'video']:
1916                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1917                             if get_no_more[aud_vid]:
1918                                 formats_info.pop(i)
1919                                 break
1920                             get_no_more[aud_vid] = True
1921
1922             if len(formats_info) == 1:
1923                 return formats_info[0]
1924
1925             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1926             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1927
1928             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1929             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1930
1931             output_ext = self.params.get('merge_output_format')
1932             if not output_ext:
1933                 if the_only_video:
1934                     output_ext = the_only_video['ext']
1935                 elif the_only_audio and not video_fmts:
1936                     output_ext = the_only_audio['ext']
1937                 else:
1938                     output_ext = 'mkv'
1939
1940             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1941
1942             new_dict = {
1943                 'requested_formats': formats_info,
1944                 'format': '+'.join(filtered('format')),
1945                 'format_id': '+'.join(filtered('format_id')),
1946                 'ext': output_ext,
1947                 'protocol': '+'.join(map(determine_protocol, formats_info)),
1948                 'language': '+'.join(orderedSet(filtered('language'))) or None,
1949                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
1950                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
1951                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1952             }
1953
1954             if the_only_video:
1955                 new_dict.update({
1956                     'width': the_only_video.get('width'),
1957                     'height': the_only_video.get('height'),
1958                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1959                     'fps': the_only_video.get('fps'),
1960                     'dynamic_range': the_only_video.get('dynamic_range'),
1961                     'vcodec': the_only_video.get('vcodec'),
1962                     'vbr': the_only_video.get('vbr'),
1963                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1964                 })
1965
1966             if the_only_audio:
1967                 new_dict.update({
1968                     'acodec': the_only_audio.get('acodec'),
1969                     'abr': the_only_audio.get('abr'),
1970                     'asr': the_only_audio.get('asr'),
1971                 })
1972
1973             return new_dict
1974
1975         def _check_formats(formats):
1976             if not check_formats:
1977                 yield from formats
1978                 return
1979             yield from self._check_formats(formats)
1980
1981         def _build_selector_function(selector):
1982             if isinstance(selector, list):  # ,
1983                 fs = [_build_selector_function(s) for s in selector]
1984
1985                 def selector_function(ctx):
1986                     for f in fs:
1987                         yield from f(ctx)
1988                 return selector_function
1989
1990             elif selector.type == GROUP:  # ()
1991                 selector_function = _build_selector_function(selector.selector)
1992
1993             elif selector.type == PICKFIRST:  # /
1994                 fs = [_build_selector_function(s) for s in selector.selector]
1995
1996                 def selector_function(ctx):
1997                     for f in fs:
1998                         picked_formats = list(f(ctx))
1999                         if picked_formats:
2000                             return picked_formats
2001                     return []
2002
2003             elif selector.type == MERGE:  # +
2004                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2005
2006                 def selector_function(ctx):
2007                     for pair in itertools.product(
2008                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
2009                         yield _merge(pair)
2010
2011             elif selector.type == SINGLE:  # atom
2012                 format_spec = selector.selector or 'best'
2013
2014                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2015                 if format_spec == 'all':
2016                     def selector_function(ctx):
2017                         yield from _check_formats(ctx['formats'][::-1])
2018                 elif format_spec == 'mergeall':
2019                     def selector_function(ctx):
2020                         formats = list(_check_formats(ctx['formats']))
2021                         if not formats:
2022                             return
2023                         merged_format = formats[-1]
2024                         for f in formats[-2::-1]:
2025                             merged_format = _merge((merged_format, f))
2026                         yield merged_format
2027
2028                 else:
2029                     format_fallback, format_reverse, format_idx = False, True, 1
2030                     mobj = re.match(
2031                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2032                         format_spec)
2033                     if mobj is not None:
2034                         format_idx = int_or_none(mobj.group('n'), default=1)
2035                         format_reverse = mobj.group('bw')[0] == 'b'
2036                         format_type = (mobj.group('type') or [None])[0]
2037                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2038                         format_modified = mobj.group('mod') is not None
2039
2040                         format_fallback = not format_type and not format_modified  # for b, w
2041                         _filter_f = (
2042                             (lambda f: f.get('%scodec' % format_type) != 'none')
2043                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2044                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2045                             if format_type  # bv, ba, wv, wa
2046                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2047                             if not format_modified  # b, w
2048                             else lambda f: True)  # b*, w*
2049                         filter_f = lambda f: _filter_f(f) and (
2050                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2051                     else:
2052                         if format_spec in self._format_selection_exts['audio']:
2053                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2054                         elif format_spec in self._format_selection_exts['video']:
2055                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2056                         elif format_spec in self._format_selection_exts['storyboards']:
2057                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2058                         else:
2059                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2060
2061                     def selector_function(ctx):
2062                         formats = list(ctx['formats'])
2063                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2064                         if format_fallback and ctx['incomplete_formats'] and not matches:
2065                             # for extractors with incomplete formats (audio only (soundcloud)
2066                             # or video only (imgur)) best/worst will fallback to
2067                             # best/worst {video,audio}-only format
2068                             matches = formats
2069                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2070                         try:
2071                             yield matches[format_idx - 1]
2072                         except IndexError:
2073                             return
2074
2075             filters = [self._build_format_filter(f) for f in selector.filters]
2076
2077             def final_selector(ctx):
2078                 ctx_copy = copy.deepcopy(ctx)
2079                 for _filter in filters:
2080                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2081                 return selector_function(ctx_copy)
2082             return final_selector
2083
2084         stream = io.BytesIO(format_spec.encode('utf-8'))
2085         try:
2086             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2087         except tokenize.TokenError:
2088             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2089
2090         class TokenIterator(object):
2091             def __init__(self, tokens):
2092                 self.tokens = tokens
2093                 self.counter = 0
2094
2095             def __iter__(self):
2096                 return self
2097
2098             def __next__(self):
2099                 if self.counter >= len(self.tokens):
2100                     raise StopIteration()
2101                 value = self.tokens[self.counter]
2102                 self.counter += 1
2103                 return value
2104
2105             next = __next__
2106
2107             def restore_last_token(self):
2108                 self.counter -= 1
2109
2110         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2111         return _build_selector_function(parsed_selector)
2112
2113     def _calc_headers(self, info_dict):
2114         res = std_headers.copy()
2115
2116         add_headers = info_dict.get('http_headers')
2117         if add_headers:
2118             res.update(add_headers)
2119
2120         cookies = self._calc_cookies(info_dict)
2121         if cookies:
2122             res['Cookie'] = cookies
2123
2124         if 'X-Forwarded-For' not in res:
2125             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2126             if x_forwarded_for_ip:
2127                 res['X-Forwarded-For'] = x_forwarded_for_ip
2128
2129         return res
2130
2131     def _calc_cookies(self, info_dict):
2132         pr = sanitized_Request(info_dict['url'])
2133         self.cookiejar.add_cookie_header(pr)
2134         return pr.get_header('Cookie')
2135
2136     def _sort_thumbnails(self, thumbnails):
2137         thumbnails.sort(key=lambda t: (
2138             t.get('preference') if t.get('preference') is not None else -1,
2139             t.get('width') if t.get('width') is not None else -1,
2140             t.get('height') if t.get('height') is not None else -1,
2141             t.get('id') if t.get('id') is not None else '',
2142             t.get('url')))
2143
2144     def _sanitize_thumbnails(self, info_dict):
2145         thumbnails = info_dict.get('thumbnails')
2146         if thumbnails is None:
2147             thumbnail = info_dict.get('thumbnail')
2148             if thumbnail:
2149                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2150         if not thumbnails:
2151             return
2152
2153         def check_thumbnails(thumbnails):
2154             for t in thumbnails:
2155                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2156                 try:
2157                     self.urlopen(HEADRequest(t['url']))
2158                 except network_exceptions as err:
2159                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2160                     continue
2161                 yield t
2162
2163         self._sort_thumbnails(thumbnails)
2164         for i, t in enumerate(thumbnails):
2165             if t.get('id') is None:
2166                 t['id'] = '%d' % i
2167             if t.get('width') and t.get('height'):
2168                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2169             t['url'] = sanitize_url(t['url'])
2170
2171         if self.params.get('check_formats') is True:
2172             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2173         else:
2174             info_dict['thumbnails'] = thumbnails
2175
2176     def process_video_result(self, info_dict, download=True):
2177         assert info_dict.get('_type', 'video') == 'video'
2178
2179         if 'id' not in info_dict:
2180             raise ExtractorError('Missing "id" field in extractor result')
2181         if 'title' not in info_dict:
2182             raise ExtractorError('Missing "title" field in extractor result',
2183                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2184
2185         def report_force_conversion(field, field_not, conversion):
2186             self.report_warning(
2187                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2188                 % (field, field_not, conversion))
2189
2190         def sanitize_string_field(info, string_field):
2191             field = info.get(string_field)
2192             if field is None or isinstance(field, compat_str):
2193                 return
2194             report_force_conversion(string_field, 'a string', 'string')
2195             info[string_field] = compat_str(field)
2196
2197         def sanitize_numeric_fields(info):
2198             for numeric_field in self._NUMERIC_FIELDS:
2199                 field = info.get(numeric_field)
2200                 if field is None or isinstance(field, compat_numeric_types):
2201                     continue
2202                 report_force_conversion(numeric_field, 'numeric', 'int')
2203                 info[numeric_field] = int_or_none(field)
2204
2205         sanitize_string_field(info_dict, 'id')
2206         sanitize_numeric_fields(info_dict)
2207
2208         if 'playlist' not in info_dict:
2209             # It isn't part of a playlist
2210             info_dict['playlist'] = None
2211             info_dict['playlist_index'] = None
2212
2213         self._sanitize_thumbnails(info_dict)
2214
2215         thumbnail = info_dict.get('thumbnail')
2216         thumbnails = info_dict.get('thumbnails')
2217         if thumbnail:
2218             info_dict['thumbnail'] = sanitize_url(thumbnail)
2219         elif thumbnails:
2220             info_dict['thumbnail'] = thumbnails[-1]['url']
2221
2222         if info_dict.get('display_id') is None and 'id' in info_dict:
2223             info_dict['display_id'] = info_dict['id']
2224
2225         if info_dict.get('duration') is not None:
2226             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2227
2228         for ts_key, date_key in (
2229                 ('timestamp', 'upload_date'),
2230                 ('release_timestamp', 'release_date'),
2231         ):
2232             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2233                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2234                 # see http://bugs.python.org/issue1646728)
2235                 try:
2236                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2237                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2238                 except (ValueError, OverflowError, OSError):
2239                     pass
2240
2241         live_keys = ('is_live', 'was_live')
2242         live_status = info_dict.get('live_status')
2243         if live_status is None:
2244             for key in live_keys:
2245                 if info_dict.get(key) is False:
2246                     continue
2247                 if info_dict.get(key):
2248                     live_status = key
2249                 break
2250             if all(info_dict.get(key) is False for key in live_keys):
2251                 live_status = 'not_live'
2252         if live_status:
2253             info_dict['live_status'] = live_status
2254             for key in live_keys:
2255                 if info_dict.get(key) is None:
2256                     info_dict[key] = (live_status == key)
2257
2258         # Auto generate title fields corresponding to the *_number fields when missing
2259         # in order to always have clean titles. This is very common for TV series.
2260         for field in ('chapter', 'season', 'episode'):
2261             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2262                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2263
2264         for cc_kind in ('subtitles', 'automatic_captions'):
2265             cc = info_dict.get(cc_kind)
2266             if cc:
2267                 for _, subtitle in cc.items():
2268                     for subtitle_format in subtitle:
2269                         if subtitle_format.get('url'):
2270                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2271                         if subtitle_format.get('ext') is None:
2272                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2273
2274         automatic_captions = info_dict.get('automatic_captions')
2275         subtitles = info_dict.get('subtitles')
2276
2277         info_dict['requested_subtitles'] = self.process_subtitles(
2278             info_dict['id'], subtitles, automatic_captions)
2279
2280         if info_dict.get('formats') is None:
2281             # There's only one format available
2282             formats = [info_dict]
2283         else:
2284             formats = info_dict['formats']
2285
2286         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2287         if not self.params.get('allow_unplayable_formats'):
2288             formats = [f for f in formats if not f.get('has_drm')]
2289
2290         if not formats:
2291             self.raise_no_formats(info_dict)
2292
2293         def is_wellformed(f):
2294             url = f.get('url')
2295             if not url:
2296                 self.report_warning(
2297                     '"url" field is missing or empty - skipping format, '
2298                     'there is an error in extractor')
2299                 return False
2300             if isinstance(url, bytes):
2301                 sanitize_string_field(f, 'url')
2302             return True
2303
2304         # Filter out malformed formats for better extraction robustness
2305         formats = list(filter(is_wellformed, formats))
2306
2307         formats_dict = {}
2308
2309         # We check that all the formats have the format and format_id fields
2310         for i, format in enumerate(formats):
2311             sanitize_string_field(format, 'format_id')
2312             sanitize_numeric_fields(format)
2313             format['url'] = sanitize_url(format['url'])
2314             if not format.get('format_id'):
2315                 format['format_id'] = compat_str(i)
2316             else:
2317                 # Sanitize format_id from characters used in format selector expression
2318                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2319             format_id = format['format_id']
2320             if format_id not in formats_dict:
2321                 formats_dict[format_id] = []
2322             formats_dict[format_id].append(format)
2323
2324         # Make sure all formats have unique format_id
2325         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2326         for format_id, ambiguous_formats in formats_dict.items():
2327             ambigious_id = len(ambiguous_formats) > 1
2328             for i, format in enumerate(ambiguous_formats):
2329                 if ambigious_id:
2330                     format['format_id'] = '%s-%d' % (format_id, i)
2331                 if format.get('ext') is None:
2332                     format['ext'] = determine_ext(format['url']).lower()
2333                 # Ensure there is no conflict between id and ext in format selection
2334                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2335                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2336                     format['format_id'] = 'f%s' % format['format_id']
2337
2338         for i, format in enumerate(formats):
2339             if format.get('format') is None:
2340                 format['format'] = '{id} - {res}{note}'.format(
2341                     id=format['format_id'],
2342                     res=self.format_resolution(format),
2343                     note=format_field(format, 'format_note', ' (%s)'),
2344                 )
2345             if format.get('protocol') is None:
2346                 format['protocol'] = determine_protocol(format)
2347             if format.get('resolution') is None:
2348                 format['resolution'] = self.format_resolution(format, default=None)
2349             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2350                 format['dynamic_range'] = 'SDR'
2351             if (info_dict.get('duration') and format.get('tbr')
2352                     and not format.get('filesize') and not format.get('filesize_approx')):
2353                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2354
2355             # Add HTTP headers, so that external programs can use them from the
2356             # json output
2357             full_format_info = info_dict.copy()
2358             full_format_info.update(format)
2359             format['http_headers'] = self._calc_headers(full_format_info)
2360         # Remove private housekeeping stuff
2361         if '__x_forwarded_for_ip' in info_dict:
2362             del info_dict['__x_forwarded_for_ip']
2363
2364         # TODO Central sorting goes here
2365
2366         if self.params.get('check_formats') is True:
2367             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2368
2369         if not formats or formats[0] is not info_dict:
2370             # only set the 'formats' fields if the original info_dict list them
2371             # otherwise we end up with a circular reference, the first (and unique)
2372             # element in the 'formats' field in info_dict is info_dict itself,
2373             # which can't be exported to json
2374             info_dict['formats'] = formats
2375
2376         info_dict, _ = self.pre_process(info_dict)
2377
2378         # The pre-processors may have modified the formats
2379         formats = info_dict.get('formats', [info_dict])
2380
2381         if self.params.get('list_thumbnails'):
2382             self.list_thumbnails(info_dict)
2383         if self.params.get('listformats'):
2384             if not info_dict.get('formats') and not info_dict.get('url'):
2385                 self.to_screen('%s has no formats' % info_dict['id'])
2386             else:
2387                 self.list_formats(info_dict)
2388         if self.params.get('listsubtitles'):
2389             if 'automatic_captions' in info_dict:
2390                 self.list_subtitles(
2391                     info_dict['id'], automatic_captions, 'automatic captions')
2392             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2393         list_only = self.params.get('simulate') is None and (
2394             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2395         if list_only:
2396             # Without this printing, -F --print-json will not work
2397             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2398             return
2399
2400         format_selector = self.format_selector
2401         if format_selector is None:
2402             req_format = self._default_format_spec(info_dict, download=download)
2403             self.write_debug('Default format spec: %s' % req_format)
2404             format_selector = self.build_format_selector(req_format)
2405
2406         # While in format selection we may need to have an access to the original
2407         # format set in order to calculate some metrics or do some processing.
2408         # For now we need to be able to guess whether original formats provided
2409         # by extractor are incomplete or not (i.e. whether extractor provides only
2410         # video-only or audio-only formats) for proper formats selection for
2411         # extractors with such incomplete formats (see
2412         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2413         # Since formats may be filtered during format selection and may not match
2414         # the original formats the results may be incorrect. Thus original formats
2415         # or pre-calculated metrics should be passed to format selection routines
2416         # as well.
2417         # We will pass a context object containing all necessary additional data
2418         # instead of just formats.
2419         # This fixes incorrect format selection issue (see
2420         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2421         incomplete_formats = (
2422             # All formats are video-only or
2423             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2424             # all formats are audio-only
2425             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2426
2427         ctx = {
2428             'formats': formats,
2429             'incomplete_formats': incomplete_formats,
2430         }
2431
2432         formats_to_download = list(format_selector(ctx))
2433         if not formats_to_download:
2434             if not self.params.get('ignore_no_formats_error'):
2435                 raise ExtractorError('Requested format is not available', expected=True,
2436                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2437             else:
2438                 self.report_warning('Requested format is not available')
2439                 # Process what we can, even without any available formats.
2440                 self.process_info(dict(info_dict))
2441         elif download:
2442             self.to_screen(
2443                 '[info] %s: Downloading %d format(s): %s' % (
2444                     info_dict['id'], len(formats_to_download),
2445                     ", ".join([f['format_id'] for f in formats_to_download])))
2446             for fmt in formats_to_download:
2447                 new_info = dict(info_dict)
2448                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2449                 new_info['__original_infodict'] = info_dict
2450                 new_info.update(fmt)
2451                 self.process_info(new_info)
2452         # We update the info dict with the selected best quality format (backwards compatibility)
2453         if formats_to_download:
2454             info_dict.update(formats_to_download[-1])
2455         return info_dict
2456
2457     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2458         """Select the requested subtitles and their format"""
2459         available_subs = {}
2460         if normal_subtitles and self.params.get('writesubtitles'):
2461             available_subs.update(normal_subtitles)
2462         if automatic_captions and self.params.get('writeautomaticsub'):
2463             for lang, cap_info in automatic_captions.items():
2464                 if lang not in available_subs:
2465                     available_subs[lang] = cap_info
2466
2467         if (not self.params.get('writesubtitles') and not
2468                 self.params.get('writeautomaticsub') or not
2469                 available_subs):
2470             return None
2471
2472         all_sub_langs = available_subs.keys()
2473         if self.params.get('allsubtitles', False):
2474             requested_langs = all_sub_langs
2475         elif self.params.get('subtitleslangs', False):
2476             # A list is used so that the order of languages will be the same as
2477             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2478             requested_langs = []
2479             for lang_re in self.params.get('subtitleslangs'):
2480                 if lang_re == 'all':
2481                     requested_langs.extend(all_sub_langs)
2482                     continue
2483                 discard = lang_re[0] == '-'
2484                 if discard:
2485                     lang_re = lang_re[1:]
2486                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2487                 if discard:
2488                     for lang in current_langs:
2489                         while lang in requested_langs:
2490                             requested_langs.remove(lang)
2491                 else:
2492                     requested_langs.extend(current_langs)
2493             requested_langs = orderedSet(requested_langs)
2494         elif 'en' in available_subs:
2495             requested_langs = ['en']
2496         else:
2497             requested_langs = [list(all_sub_langs)[0]]
2498         if requested_langs:
2499             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2500
2501         formats_query = self.params.get('subtitlesformat', 'best')
2502         formats_preference = formats_query.split('/') if formats_query else []
2503         subs = {}
2504         for lang in requested_langs:
2505             formats = available_subs.get(lang)
2506             if formats is None:
2507                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2508                 continue
2509             for ext in formats_preference:
2510                 if ext == 'best':
2511                     f = formats[-1]
2512                     break
2513                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2514                 if matches:
2515                     f = matches[-1]
2516                     break
2517             else:
2518                 f = formats[-1]
2519                 self.report_warning(
2520                     'No subtitle format found matching "%s" for language %s, '
2521                     'using %s' % (formats_query, lang, f['ext']))
2522             subs[lang] = f
2523         return subs
2524
2525     def __forced_printings(self, info_dict, filename, incomplete):
2526         def print_mandatory(field, actual_field=None):
2527             if actual_field is None:
2528                 actual_field = field
2529             if (self.params.get('force%s' % field, False)
2530                     and (not incomplete or info_dict.get(actual_field) is not None)):
2531                 self.to_stdout(info_dict[actual_field])
2532
2533         def print_optional(field):
2534             if (self.params.get('force%s' % field, False)
2535                     and info_dict.get(field) is not None):
2536                 self.to_stdout(info_dict[field])
2537
2538         info_dict = info_dict.copy()
2539         if filename is not None:
2540             info_dict['filename'] = filename
2541         if info_dict.get('requested_formats') is not None:
2542             # For RTMP URLs, also include the playpath
2543             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2544         elif 'url' in info_dict:
2545             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2546
2547         if self.params.get('forceprint') or self.params.get('forcejson'):
2548             self.post_extract(info_dict)
2549         for tmpl in self.params.get('forceprint', []):
2550             mobj = re.match(r'\w+(=?)$', tmpl)
2551             if mobj and mobj.group(1):
2552                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2553             elif mobj:
2554                 tmpl = '%({})s'.format(tmpl)
2555             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2556
2557         print_mandatory('title')
2558         print_mandatory('id')
2559         print_mandatory('url', 'urls')
2560         print_optional('thumbnail')
2561         print_optional('description')
2562         print_optional('filename')
2563         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2564             self.to_stdout(formatSeconds(info_dict['duration']))
2565         print_mandatory('format')
2566
2567         if self.params.get('forcejson'):
2568             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2569
2570     def dl(self, name, info, subtitle=False, test=False):
2571         if not info.get('url'):
2572             self.raise_no_formats(info, True)
2573
2574         if test:
2575             verbose = self.params.get('verbose')
2576             params = {
2577                 'test': True,
2578                 'quiet': self.params.get('quiet') or not verbose,
2579                 'verbose': verbose,
2580                 'noprogress': not verbose,
2581                 'nopart': True,
2582                 'skip_unavailable_fragments': False,
2583                 'keep_fragments': False,
2584                 'overwrites': True,
2585                 '_no_ytdl_file': True,
2586             }
2587         else:
2588             params = self.params
2589         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2590         if not test:
2591             for ph in self._progress_hooks:
2592                 fd.add_progress_hook(ph)
2593             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2594             self.write_debug('Invoking downloader on "%s"' % urls)
2595
2596         new_info = copy.deepcopy(self._copy_infodict(info))
2597         if new_info.get('http_headers') is None:
2598             new_info['http_headers'] = self._calc_headers(new_info)
2599         return fd.download(name, new_info, subtitle)
2600
2601     def process_info(self, info_dict):
2602         """Process a single resolved IE result."""
2603
2604         assert info_dict.get('_type', 'video') == 'video'
2605
2606         max_downloads = self.params.get('max_downloads')
2607         if max_downloads is not None:
2608             if self._num_downloads >= int(max_downloads):
2609                 raise MaxDownloadsReached()
2610
2611         # TODO: backward compatibility, to be removed
2612         info_dict['fulltitle'] = info_dict['title']
2613
2614         if 'format' not in info_dict and 'ext' in info_dict:
2615             info_dict['format'] = info_dict['ext']
2616
2617         if self._match_entry(info_dict) is not None:
2618             return
2619
2620         self.post_extract(info_dict)
2621         self._num_downloads += 1
2622
2623         # info_dict['_filename'] needs to be set for backward compatibility
2624         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2625         temp_filename = self.prepare_filename(info_dict, 'temp')
2626         files_to_move = {}
2627
2628         # Forced printings
2629         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2630
2631         if self.params.get('simulate'):
2632             if self.params.get('force_write_download_archive', False):
2633                 self.record_download_archive(info_dict)
2634             # Do nothing else if in simulate mode
2635             return
2636
2637         if full_filename is None:
2638             return
2639         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2640             return
2641         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2642             return
2643
2644         if self._write_description('video', info_dict,
2645                                    self.prepare_filename(info_dict, 'description')) is None:
2646             return
2647
2648         sub_files = self._write_subtitles(info_dict, temp_filename)
2649         if sub_files is None:
2650             return
2651         files_to_move.update(dict(sub_files))
2652
2653         thumb_files = self._write_thumbnails(
2654             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2655         if thumb_files is None:
2656             return
2657         files_to_move.update(dict(thumb_files))
2658
2659         infofn = self.prepare_filename(info_dict, 'infojson')
2660         _infojson_written = self._write_info_json('video', info_dict, infofn)
2661         if _infojson_written:
2662             info_dict['infojson_filename'] = infofn
2663             # For backward compatability, even though it was a private field
2664             info_dict['__infojson_filename'] = infofn
2665         elif _infojson_written is None:
2666             return
2667
2668         # Note: Annotations are deprecated
2669         annofn = None
2670         if self.params.get('writeannotations', False):
2671             annofn = self.prepare_filename(info_dict, 'annotation')
2672         if annofn:
2673             if not self._ensure_dir_exists(encodeFilename(annofn)):
2674                 return
2675             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2676                 self.to_screen('[info] Video annotations are already present')
2677             elif not info_dict.get('annotations'):
2678                 self.report_warning('There are no annotations to write.')
2679             else:
2680                 try:
2681                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2682                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2683                         annofile.write(info_dict['annotations'])
2684                 except (KeyError, TypeError):
2685                     self.report_warning('There are no annotations to write.')
2686                 except (OSError, IOError):
2687                     self.report_error('Cannot write annotations file: ' + annofn)
2688                     return
2689
2690         # Write internet shortcut files
2691         def _write_link_file(link_type):
2692             if 'webpage_url' not in info_dict:
2693                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2694                 return False
2695             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2696             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2697                 return False
2698             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2699                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2700                 return True
2701             try:
2702                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2703                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2704                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2705                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2706                     if link_type == 'desktop':
2707                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2708                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2709             except (OSError, IOError):
2710                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2711                 return False
2712             return True
2713
2714         write_links = {
2715             'url': self.params.get('writeurllink'),
2716             'webloc': self.params.get('writewebloclink'),
2717             'desktop': self.params.get('writedesktoplink'),
2718         }
2719         if self.params.get('writelink'):
2720             link_type = ('webloc' if sys.platform == 'darwin'
2721                          else 'desktop' if sys.platform.startswith('linux')
2722                          else 'url')
2723             write_links[link_type] = True
2724
2725         if any(should_write and not _write_link_file(link_type)
2726                for link_type, should_write in write_links.items()):
2727             return
2728
2729         try:
2730             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2731         except PostProcessingError as err:
2732             self.report_error('Preprocessing: %s' % str(err))
2733             return
2734
2735         must_record_download_archive = False
2736         if self.params.get('skip_download', False):
2737             info_dict['filepath'] = temp_filename
2738             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2739             info_dict['__files_to_move'] = files_to_move
2740             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2741         else:
2742             # Download
2743             info_dict.setdefault('__postprocessors', [])
2744             try:
2745
2746                 def existing_file(*filepaths):
2747                     ext = info_dict.get('ext')
2748                     final_ext = self.params.get('final_ext', ext)
2749                     existing_files = []
2750                     for file in orderedSet(filepaths):
2751                         if final_ext != ext:
2752                             converted = replace_extension(file, final_ext, ext)
2753                             if os.path.exists(encodeFilename(converted)):
2754                                 existing_files.append(converted)
2755                         if os.path.exists(encodeFilename(file)):
2756                             existing_files.append(file)
2757
2758                     if not existing_files or self.params.get('overwrites', False):
2759                         for file in orderedSet(existing_files):
2760                             self.report_file_delete(file)
2761                             os.remove(encodeFilename(file))
2762                         return None
2763
2764                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2765                     return existing_files[0]
2766
2767                 success = True
2768                 if info_dict.get('requested_formats') is not None:
2769
2770                     def compatible_formats(formats):
2771                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2772                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2773                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2774                         if len(video_formats) > 2 or len(audio_formats) > 2:
2775                             return False
2776
2777                         # Check extension
2778                         exts = set(format.get('ext') for format in formats)
2779                         COMPATIBLE_EXTS = (
2780                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2781                             set(('webm',)),
2782                         )
2783                         for ext_sets in COMPATIBLE_EXTS:
2784                             if ext_sets.issuperset(exts):
2785                                 return True
2786                         # TODO: Check acodec/vcodec
2787                         return False
2788
2789                     requested_formats = info_dict['requested_formats']
2790                     old_ext = info_dict['ext']
2791                     if self.params.get('merge_output_format') is None:
2792                         if not compatible_formats(requested_formats):
2793                             info_dict['ext'] = 'mkv'
2794                             self.report_warning(
2795                                 'Requested formats are incompatible for merge and will be merged into mkv')
2796                         if (info_dict['ext'] == 'webm'
2797                                 and info_dict.get('thumbnails')
2798                                 # check with type instead of pp_key, __name__, or isinstance
2799                                 # since we dont want any custom PPs to trigger this
2800                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2801                             info_dict['ext'] = 'mkv'
2802                             self.report_warning(
2803                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2804                     new_ext = info_dict['ext']
2805
2806                     def correct_ext(filename, ext=new_ext):
2807                         if filename == '-':
2808                             return filename
2809                         filename_real_ext = os.path.splitext(filename)[1][1:]
2810                         filename_wo_ext = (
2811                             os.path.splitext(filename)[0]
2812                             if filename_real_ext in (old_ext, new_ext)
2813                             else filename)
2814                         return '%s.%s' % (filename_wo_ext, ext)
2815
2816                     # Ensure filename always has a correct extension for successful merge
2817                     full_filename = correct_ext(full_filename)
2818                     temp_filename = correct_ext(temp_filename)
2819                     dl_filename = existing_file(full_filename, temp_filename)
2820                     info_dict['__real_download'] = False
2821
2822                     if dl_filename is not None:
2823                         self.report_file_already_downloaded(dl_filename)
2824                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2825                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2826                         success, real_download = self.dl(temp_filename, info_dict)
2827                         info_dict['__real_download'] = real_download
2828                     else:
2829                         downloaded = []
2830                         merger = FFmpegMergerPP(self)
2831                         if self.params.get('allow_unplayable_formats'):
2832                             self.report_warning(
2833                                 'You have requested merging of multiple formats '
2834                                 'while also allowing unplayable formats to be downloaded. '
2835                                 'The formats won\'t be merged to prevent data corruption.')
2836                         elif not merger.available:
2837                             self.report_warning(
2838                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2839                                 'The formats won\'t be merged.')
2840
2841                         if temp_filename == '-':
2842                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2843                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2844                                       else 'but ffmpeg is not installed')
2845                             self.report_warning(
2846                                 f'You have requested downloading multiple formats to stdout {reason}. '
2847                                 'The formats will be streamed one after the other')
2848                             fname = temp_filename
2849                         for f in requested_formats:
2850                             new_info = dict(info_dict)
2851                             del new_info['requested_formats']
2852                             new_info.update(f)
2853                             if temp_filename != '-':
2854                                 fname = prepend_extension(
2855                                     correct_ext(temp_filename, new_info['ext']),
2856                                     'f%s' % f['format_id'], new_info['ext'])
2857                                 if not self._ensure_dir_exists(fname):
2858                                     return
2859                                 f['filepath'] = fname
2860                                 downloaded.append(fname)
2861                             partial_success, real_download = self.dl(fname, new_info)
2862                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2863                             success = success and partial_success
2864                         if merger.available and not self.params.get('allow_unplayable_formats'):
2865                             info_dict['__postprocessors'].append(merger)
2866                             info_dict['__files_to_merge'] = downloaded
2867                             # Even if there were no downloads, it is being merged only now
2868                             info_dict['__real_download'] = True
2869                         else:
2870                             for file in downloaded:
2871                                 files_to_move[file] = None
2872                 else:
2873                     # Just a single file
2874                     dl_filename = existing_file(full_filename, temp_filename)
2875                     if dl_filename is None or dl_filename == temp_filename:
2876                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2877                         # So we should try to resume the download
2878                         success, real_download = self.dl(temp_filename, info_dict)
2879                         info_dict['__real_download'] = real_download
2880                     else:
2881                         self.report_file_already_downloaded(dl_filename)
2882
2883                 dl_filename = dl_filename or temp_filename
2884                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2885
2886             except network_exceptions as err:
2887                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2888                 return
2889             except (OSError, IOError) as err:
2890                 raise UnavailableVideoError(err)
2891             except (ContentTooShortError, ) as err:
2892                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2893                 return
2894
2895             if success and full_filename != '-':
2896
2897                 def fixup():
2898                     do_fixup = True
2899                     fixup_policy = self.params.get('fixup')
2900                     vid = info_dict['id']
2901
2902                     if fixup_policy in ('ignore', 'never'):
2903                         return
2904                     elif fixup_policy == 'warn':
2905                         do_fixup = False
2906                     elif fixup_policy != 'force':
2907                         assert fixup_policy in ('detect_or_warn', None)
2908                         if not info_dict.get('__real_download'):
2909                             do_fixup = False
2910
2911                     def ffmpeg_fixup(cndn, msg, cls):
2912                         if not cndn:
2913                             return
2914                         if not do_fixup:
2915                             self.report_warning(f'{vid}: {msg}')
2916                             return
2917                         pp = cls(self)
2918                         if pp.available:
2919                             info_dict['__postprocessors'].append(pp)
2920                         else:
2921                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2922
2923                     stretched_ratio = info_dict.get('stretched_ratio')
2924                     ffmpeg_fixup(
2925                         stretched_ratio not in (1, None),
2926                         f'Non-uniform pixel ratio {stretched_ratio}',
2927                         FFmpegFixupStretchedPP)
2928
2929                     ffmpeg_fixup(
2930                         (info_dict.get('requested_formats') is None
2931                          and info_dict.get('container') == 'm4a_dash'
2932                          and info_dict.get('ext') == 'm4a'),
2933                         'writing DASH m4a. Only some players support this container',
2934                         FFmpegFixupM4aPP)
2935
2936                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2937                     downloader = downloader.__name__ if downloader else None
2938                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2939                                  'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
2940                                  FFmpegFixupM3u8PP)
2941                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
2942                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
2943
2944                 fixup()
2945                 try:
2946                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2947                 except PostProcessingError as err:
2948                     self.report_error('Postprocessing: %s' % str(err))
2949                     return
2950                 try:
2951                     for ph in self._post_hooks:
2952                         ph(info_dict['filepath'])
2953                 except Exception as err:
2954                     self.report_error('post hooks: %s' % str(err))
2955                     return
2956                 must_record_download_archive = True
2957
2958         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2959             self.record_download_archive(info_dict)
2960         max_downloads = self.params.get('max_downloads')
2961         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2962             raise MaxDownloadsReached()
2963
2964     def __download_wrapper(self, func):
2965         @functools.wraps(func)
2966         def wrapper(*args, **kwargs):
2967             try:
2968                 res = func(*args, **kwargs)
2969             except UnavailableVideoError as e:
2970                 self.report_error(e)
2971             except DownloadCancelled as e:
2972                 self.to_screen(f'[info] {e}')
2973                 raise
2974             else:
2975                 if self.params.get('dump_single_json', False):
2976                     self.post_extract(res)
2977                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2978         return wrapper
2979
2980     def download(self, url_list):
2981         """Download a given list of URLs."""
2982         url_list = variadic(url_list)  # Passing a single URL is a common mistake
2983         outtmpl = self.outtmpl_dict['default']
2984         if (len(url_list) > 1
2985                 and outtmpl != '-'
2986                 and '%' not in outtmpl
2987                 and self.params.get('max_downloads') != 1):
2988             raise SameFileError(outtmpl)
2989
2990         for url in url_list:
2991             self.__download_wrapper(self.extract_info)(
2992                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2993
2994         return self._download_retcode
2995
2996     def download_with_info_file(self, info_filename):
2997         with contextlib.closing(fileinput.FileInput(
2998                 [info_filename], mode='r',
2999                 openhook=fileinput.hook_encoded('utf-8'))) as f:
3000             # FileInput doesn't have a read method, we can't call json.load
3001             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3002         try:
3003             self.__download_wrapper(self.process_ie_result)(info, download=True)
3004         except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e:
3005             if not isinstance(e, EntryNotInPlaylist):
3006                 self.to_stderr('\r')
3007             webpage_url = info.get('webpage_url')
3008             if webpage_url is not None:
3009                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3010                 return self.download([webpage_url])
3011             else:
3012                 raise
3013         return self._download_retcode
3014
3015     @staticmethod
3016     def sanitize_info(info_dict, remove_private_keys=False):
3017         ''' Sanitize the infodict for converting to json '''
3018         if info_dict is None:
3019             return info_dict
3020         info_dict.setdefault('epoch', int(time.time()))
3021         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3022         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3023         if remove_private_keys:
3024             remove_keys |= {
3025                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3026                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3027             }
3028             empty_values = (None, {}, [], set(), tuple())
3029             reject = lambda k, v: k not in keep_keys and (
3030                 k.startswith('_') or k in remove_keys or v in empty_values)
3031         else:
3032             reject = lambda k, v: k in remove_keys
3033         filter_fn = lambda obj: (
3034             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3035             else obj if not isinstance(obj, dict)
3036             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3037         return filter_fn(info_dict)
3038
3039     @staticmethod
3040     def filter_requested_info(info_dict, actually_filter=True):
3041         ''' Alias of sanitize_info for backward compatibility '''
3042         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3043
3044     def run_pp(self, pp, infodict):
3045         files_to_delete = []
3046         if '__files_to_move' not in infodict:
3047             infodict['__files_to_move'] = {}
3048         try:
3049             files_to_delete, infodict = pp.run(infodict)
3050         except PostProcessingError as e:
3051             # Must be True and not 'only_download'
3052             if self.params.get('ignoreerrors') is True:
3053                 self.report_error(e)
3054                 return infodict
3055             raise
3056
3057         if not files_to_delete:
3058             return infodict
3059         if self.params.get('keepvideo', False):
3060             for f in files_to_delete:
3061                 infodict['__files_to_move'].setdefault(f, '')
3062         else:
3063             for old_filename in set(files_to_delete):
3064                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3065                 try:
3066                     os.remove(encodeFilename(old_filename))
3067                 except (IOError, OSError):
3068                     self.report_warning('Unable to remove downloaded original file')
3069                 if old_filename in infodict['__files_to_move']:
3070                     del infodict['__files_to_move'][old_filename]
3071         return infodict
3072
3073     @staticmethod
3074     def post_extract(info_dict):
3075         def actual_post_extract(info_dict):
3076             if info_dict.get('_type') in ('playlist', 'multi_video'):
3077                 for video_dict in info_dict.get('entries', {}):
3078                     actual_post_extract(video_dict or {})
3079                 return
3080
3081             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3082             extra = post_extractor().items()
3083             info_dict.update(extra)
3084             info_dict.pop('__post_extractor', None)
3085
3086             original_infodict = info_dict.get('__original_infodict') or {}
3087             original_infodict.update(extra)
3088             original_infodict.pop('__post_extractor', None)
3089
3090         actual_post_extract(info_dict or {})
3091
3092     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3093         info = dict(ie_info)
3094         info['__files_to_move'] = files_to_move or {}
3095         for pp in self._pps[key]:
3096             info = self.run_pp(pp, info)
3097         return info, info.pop('__files_to_move', None)
3098
3099     def post_process(self, filename, ie_info, files_to_move=None):
3100         """Run all the postprocessors on the given file."""
3101         info = dict(ie_info)
3102         info['filepath'] = filename
3103         info['__files_to_move'] = files_to_move or {}
3104
3105         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3106             info = self.run_pp(pp, info)
3107         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3108         del info['__files_to_move']
3109         for pp in self._pps['after_move']:
3110             info = self.run_pp(pp, info)
3111         return info
3112
3113     def _make_archive_id(self, info_dict):
3114         video_id = info_dict.get('id')
3115         if not video_id:
3116             return
3117         # Future-proof against any change in case
3118         # and backwards compatibility with prior versions
3119         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3120         if extractor is None:
3121             url = str_or_none(info_dict.get('url'))
3122             if not url:
3123                 return
3124             # Try to find matching extractor for the URL and take its ie_key
3125             for ie_key, ie in self._ies.items():
3126                 if ie.suitable(url):
3127                     extractor = ie_key
3128                     break
3129             else:
3130                 return
3131         return '%s %s' % (extractor.lower(), video_id)
3132
3133     def in_download_archive(self, info_dict):
3134         fn = self.params.get('download_archive')
3135         if fn is None:
3136             return False
3137
3138         vid_id = self._make_archive_id(info_dict)
3139         if not vid_id:
3140             return False  # Incomplete video information
3141
3142         return vid_id in self.archive
3143
3144     def record_download_archive(self, info_dict):
3145         fn = self.params.get('download_archive')
3146         if fn is None:
3147             return
3148         vid_id = self._make_archive_id(info_dict)
3149         assert vid_id
3150         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3151             archive_file.write(vid_id + '\n')
3152         self.archive.add(vid_id)
3153
3154     @staticmethod
3155     def format_resolution(format, default='unknown'):
3156         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3157             return 'audio only'
3158         if format.get('resolution') is not None:
3159             return format['resolution']
3160         if format.get('width') and format.get('height'):
3161             return '%dx%d' % (format['width'], format['height'])
3162         elif format.get('height'):
3163             return '%sp' % format['height']
3164         elif format.get('width'):
3165             return '%dx?' % format['width']
3166         return default
3167
3168     def _format_note(self, fdict):
3169         res = ''
3170         if fdict.get('ext') in ['f4f', 'f4m']:
3171             res += '(unsupported) '
3172         if fdict.get('language'):
3173             if res:
3174                 res += ' '
3175             res += '[%s] ' % fdict['language']
3176         if fdict.get('format_note') is not None:
3177             res += fdict['format_note'] + ' '
3178         if fdict.get('tbr') is not None:
3179             res += '%4dk ' % fdict['tbr']
3180         if fdict.get('container') is not None:
3181             if res:
3182                 res += ', '
3183             res += '%s container' % fdict['container']
3184         if (fdict.get('vcodec') is not None
3185                 and fdict.get('vcodec') != 'none'):
3186             if res:
3187                 res += ', '
3188             res += fdict['vcodec']
3189             if fdict.get('vbr') is not None:
3190                 res += '@'
3191         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3192             res += 'video@'
3193         if fdict.get('vbr') is not None:
3194             res += '%4dk' % fdict['vbr']
3195         if fdict.get('fps') is not None:
3196             if res:
3197                 res += ', '
3198             res += '%sfps' % fdict['fps']
3199         if fdict.get('acodec') is not None:
3200             if res:
3201                 res += ', '
3202             if fdict['acodec'] == 'none':
3203                 res += 'video only'
3204             else:
3205                 res += '%-5s' % fdict['acodec']
3206         elif fdict.get('abr') is not None:
3207             if res:
3208                 res += ', '
3209             res += 'audio'
3210         if fdict.get('abr') is not None:
3211             res += '@%3dk' % fdict['abr']
3212         if fdict.get('asr') is not None:
3213             res += ' (%5dHz)' % fdict['asr']
3214         if fdict.get('filesize') is not None:
3215             if res:
3216                 res += ', '
3217             res += format_bytes(fdict['filesize'])
3218         elif fdict.get('filesize_approx') is not None:
3219             if res:
3220                 res += ', '
3221             res += '~' + format_bytes(fdict['filesize_approx'])
3222         return res
3223
3224     def _list_format_headers(self, *headers):
3225         if self.params.get('listformats_table', True) is not False:
3226             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3227         return headers
3228
3229     def list_formats(self, info_dict):
3230         formats = info_dict.get('formats', [info_dict])
3231         new_format = self.params.get('listformats_table', True) is not False
3232         if new_format:
3233             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3234             table = [
3235                 [
3236                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3237                     format_field(f, 'ext'),
3238                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3239                     format_field(f, 'fps', '\t%d'),
3240                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3241                     delim,
3242                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3243                     format_field(f, 'tbr', '\t%dk'),
3244                     shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
3245                     delim,
3246                     format_field(f, 'vcodec', default='unknown').replace(
3247                         'none',
3248                         'images' if f.get('acodec') == 'none'
3249                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3250                     format_field(f, 'vbr', '\t%dk'),
3251                     format_field(f, 'acodec', default='unknown').replace(
3252                         'none',
3253                         '' if f.get('vcodec') == 'none'
3254                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3255                     format_field(f, 'abr', '\t%dk'),
3256                     format_field(f, 'asr', '\t%dHz'),
3257                     join_nonempty(
3258                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3259                         format_field(f, 'language', '[%s]'),
3260                         join_nonempty(
3261                             format_field(f, 'format_note'),
3262                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3263                             delim=', '),
3264                         delim=' '),
3265                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3266             header_line = self._list_format_headers(
3267                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3268                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3269         else:
3270             table = [
3271                 [
3272                     format_field(f, 'format_id'),
3273                     format_field(f, 'ext'),
3274                     self.format_resolution(f),
3275                     self._format_note(f)]
3276                 for f in formats
3277                 if f.get('preference') is None or f['preference'] >= -1000]
3278             header_line = ['format code', 'extension', 'resolution', 'note']
3279
3280         self.to_screen(
3281             '[info] Available formats for %s:' % info_dict['id'])
3282         self.to_stdout(render_table(
3283             header_line, table,
3284             extra_gap=(0 if new_format else 1),
3285             hide_empty=new_format,
3286             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3287
3288     def list_thumbnails(self, info_dict):
3289         thumbnails = list(info_dict.get('thumbnails'))
3290         if not thumbnails:
3291             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3292             return
3293
3294         self.to_screen(
3295             '[info] Thumbnails for %s:' % info_dict['id'])
3296         self.to_stdout(render_table(
3297             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3298             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3299
3300     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3301         if not subtitles:
3302             self.to_screen('%s has no %s' % (video_id, name))
3303             return
3304         self.to_screen(
3305             'Available %s for %s:' % (name, video_id))
3306
3307         def _row(lang, formats):
3308             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3309             if len(set(names)) == 1:
3310                 names = [] if names[0] == 'unknown' else names[:1]
3311             return [lang, ', '.join(names), ', '.join(exts)]
3312
3313         self.to_stdout(render_table(
3314             self._list_format_headers('Language', 'Name', 'Formats'),
3315             [_row(lang, formats) for lang, formats in subtitles.items()],
3316             hide_empty=True))
3317
3318     def urlopen(self, req):
3319         """ Start an HTTP download """
3320         if isinstance(req, compat_basestring):
3321             req = sanitized_Request(req)
3322         return self._opener.open(req, timeout=self._socket_timeout)
3323
3324     def print_debug_header(self):
3325         if not self.params.get('verbose'):
3326             return
3327
3328         def get_encoding(stream):
3329             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3330             if not supports_terminal_sequences(stream):
3331                 ret += ' (No ANSI)'
3332             return ret
3333
3334         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3335             locale.getpreferredencoding(),
3336             sys.getfilesystemencoding(),
3337             get_encoding(self._screen_file), get_encoding(self._err_file),
3338             self.get_encoding())
3339
3340         logger = self.params.get('logger')
3341         if logger:
3342             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3343             write_debug(encoding_str)
3344         else:
3345             write_string(f'[debug] {encoding_str}\n', encoding=None)
3346             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3347
3348         source = detect_variant()
3349         write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
3350         if not _LAZY_LOADER:
3351             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3352                 write_debug('Lazy loading extractors is forcibly disabled')
3353             else:
3354                 write_debug('Lazy loading extractors is disabled')
3355         if plugin_extractors or plugin_postprocessors:
3356             write_debug('Plugins: %s' % [
3357                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3358                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3359         if self.params.get('compat_opts'):
3360             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3361         try:
3362             sp = Popen(
3363                 ['git', 'rev-parse', '--short', 'HEAD'],
3364                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3365                 cwd=os.path.dirname(os.path.abspath(__file__)))
3366             out, err = sp.communicate_or_kill()
3367             out = out.decode().strip()
3368             if re.match('[0-9a-f]+', out):
3369                 write_debug('Git HEAD: %s' % out)
3370         except Exception:
3371             try:
3372                 sys.exc_clear()
3373             except Exception:
3374                 pass
3375
3376         def python_implementation():
3377             impl_name = platform.python_implementation()
3378             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3379                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3380             return impl_name
3381
3382         write_debug('Python version %s (%s %s) - %s' % (
3383             platform.python_version(),
3384             python_implementation(),
3385             platform.architecture()[0],
3386             platform_name()))
3387
3388         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3389         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3390         if ffmpeg_features:
3391             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3392
3393         exe_versions['rtmpdump'] = rtmpdump_version()
3394         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3395         exe_str = ', '.join(
3396             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3397         ) or 'none'
3398         write_debug('exe versions: %s' % exe_str)
3399
3400         from .downloader.websocket import has_websockets
3401         from .postprocessor.embedthumbnail import has_mutagen
3402         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3403
3404         lib_str = join_nonempty(
3405             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3406             KEYRING_AVAILABLE and 'keyring',
3407             has_mutagen and 'mutagen',
3408             SQLITE_AVAILABLE and 'sqlite',
3409             has_websockets and 'websockets',
3410             delim=', ') or 'none'
3411         write_debug('Optional libraries: %s' % lib_str)
3412
3413         proxy_map = {}
3414         for handler in self._opener.handlers:
3415             if hasattr(handler, 'proxies'):
3416                 proxy_map.update(handler.proxies)
3417         write_debug(f'Proxy map: {proxy_map}')
3418
3419         # Not implemented
3420         if False and self.params.get('call_home'):
3421             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3422             write_debug('Public IP address: %s' % ipaddr)
3423             latest_version = self.urlopen(
3424                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3425             if version_tuple(latest_version) > version_tuple(__version__):
3426                 self.report_warning(
3427                     'You are using an outdated version (newest version: %s)! '
3428                     'See https://yt-dl.org/update if you need help updating.' %
3429                     latest_version)
3430
3431     def _setup_opener(self):
3432         timeout_val = self.params.get('socket_timeout')
3433         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3434
3435         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3436         opts_cookiefile = self.params.get('cookiefile')
3437         opts_proxy = self.params.get('proxy')
3438
3439         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3440
3441         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3442         if opts_proxy is not None:
3443             if opts_proxy == '':
3444                 proxies = {}
3445             else:
3446                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3447         else:
3448             proxies = compat_urllib_request.getproxies()
3449             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3450             if 'http' in proxies and 'https' not in proxies:
3451                 proxies['https'] = proxies['http']
3452         proxy_handler = PerRequestProxyHandler(proxies)
3453
3454         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3455         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3456         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3457         redirect_handler = YoutubeDLRedirectHandler()
3458         data_handler = compat_urllib_request_DataHandler()
3459
3460         # When passing our own FileHandler instance, build_opener won't add the
3461         # default FileHandler and allows us to disable the file protocol, which
3462         # can be used for malicious purposes (see
3463         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3464         file_handler = compat_urllib_request.FileHandler()
3465
3466         def file_open(*args, **kwargs):
3467             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3468         file_handler.file_open = file_open
3469
3470         opener = compat_urllib_request.build_opener(
3471             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3472
3473         # Delete the default user-agent header, which would otherwise apply in
3474         # cases where our custom HTTP handler doesn't come into play
3475         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3476         opener.addheaders = []
3477         self._opener = opener
3478
3479     def encode(self, s):
3480         if isinstance(s, bytes):
3481             return s  # Already encoded
3482
3483         try:
3484             return s.encode(self.get_encoding())
3485         except UnicodeEncodeError as err:
3486             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3487             raise
3488
3489     def get_encoding(self):
3490         encoding = self.params.get('encoding')
3491         if encoding is None:
3492             encoding = preferredencoding()
3493         return encoding
3494
3495     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3496         ''' Write infojson and returns True = written, False = skip, None = error '''
3497         if overwrite is None:
3498             overwrite = self.params.get('overwrites', True)
3499         if not self.params.get('writeinfojson'):
3500             return False
3501         elif not infofn:
3502             self.write_debug(f'Skipping writing {label} infojson')
3503             return False
3504         elif not self._ensure_dir_exists(infofn):
3505             return None
3506         elif not overwrite and os.path.exists(infofn):
3507             self.to_screen(f'[info] {label.title()} metadata is already present')
3508         else:
3509             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3510             try:
3511                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3512             except (OSError, IOError):
3513                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3514                 return None
3515         return True
3516
3517     def _write_description(self, label, ie_result, descfn):
3518         ''' Write description and returns True = written, False = skip, None = error '''
3519         if not self.params.get('writedescription'):
3520             return False
3521         elif not descfn:
3522             self.write_debug(f'Skipping writing {label} description')
3523             return False
3524         elif not self._ensure_dir_exists(descfn):
3525             return None
3526         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3527             self.to_screen(f'[info] {label.title()} description is already present')
3528         elif ie_result.get('description') is None:
3529             self.report_warning(f'There\'s no {label} description to write')
3530             return False
3531         else:
3532             try:
3533                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3534                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3535                     descfile.write(ie_result['description'])
3536             except (OSError, IOError):
3537                 self.report_error(f'Cannot write {label} description file {descfn}')
3538                 return None
3539         return True
3540
3541     def _write_subtitles(self, info_dict, filename):
3542         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3543         ret = []
3544         subtitles = info_dict.get('requested_subtitles')
3545         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3546             # subtitles download errors are already managed as troubles in relevant IE
3547             # that way it will silently go on when used with unsupporting IE
3548             return ret
3549
3550         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3551         if not sub_filename_base:
3552             self.to_screen('[info] Skipping writing video subtitles')
3553             return ret
3554         for sub_lang, sub_info in subtitles.items():
3555             sub_format = sub_info['ext']
3556             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3557             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3558             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3559                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3560                 sub_info['filepath'] = sub_filename
3561                 ret.append((sub_filename, sub_filename_final))
3562                 continue
3563
3564             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3565             if sub_info.get('data') is not None:
3566                 try:
3567                     # Use newline='' to prevent conversion of newline characters
3568                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3569                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3570                         subfile.write(sub_info['data'])
3571                     sub_info['filepath'] = sub_filename
3572                     ret.append((sub_filename, sub_filename_final))
3573                     continue
3574                 except (OSError, IOError):
3575                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3576                     return None
3577
3578             try:
3579                 sub_copy = sub_info.copy()
3580                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3581                 self.dl(sub_filename, sub_copy, subtitle=True)
3582                 sub_info['filepath'] = sub_filename
3583                 ret.append((sub_filename, sub_filename_final))
3584             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3585                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3586                 continue
3587         return ret
3588
3589     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3590         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3591         write_all = self.params.get('write_all_thumbnails', False)
3592         thumbnails, ret = [], []
3593         if write_all or self.params.get('writethumbnail', False):
3594             thumbnails = info_dict.get('thumbnails') or []
3595         multiple = write_all and len(thumbnails) > 1
3596
3597         if thumb_filename_base is None:
3598             thumb_filename_base = filename
3599         if thumbnails and not thumb_filename_base:
3600             self.write_debug(f'Skipping writing {label} thumbnail')
3601             return ret
3602
3603         for t in thumbnails[::-1]:
3604             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3605             thumb_display_id = f'{label} thumbnail {t["id"]}'
3606             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3607             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3608
3609             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3610                 ret.append((thumb_filename, thumb_filename_final))
3611                 t['filepath'] = thumb_filename
3612                 self.to_screen('[info] %s is already present' % (
3613                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3614             else:
3615                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3616                 try:
3617                     uf = self.urlopen(t['url'])
3618                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3619                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3620                         shutil.copyfileobj(uf, thumbf)
3621                     ret.append((thumb_filename, thumb_filename_final))
3622                     t['filepath'] = thumb_filename
3623                 except network_exceptions as err:
3624                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3625             if ret and not write_all:
3626                 break
3627         return ret