yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DOT_DESKTOP_LINK_TEMPLATE,
  59     DOT_URL_LINK_TEMPLATE,
  60     DOT_WEBLOC_LINK_TEMPLATE,
  61     DownloadError,
  62     encode_compat_str,
  63     encodeFilename,
  64     EntryNotInPlaylist,
  65     error_to_compat_str,
  66     ExistingVideoReached,
  67     expand_path,
  68     ExtractorError,
  69     float_or_none,
  70     format_bytes,
  71     format_field,
  72     formatSeconds,
  73     GeoRestrictedError,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     LazyList,
  79     locked_file,
  80     make_dir,
  81     make_HTTPS_handler,
  82     MaxDownloadsReached,
  83     network_exceptions,
  84     orderedSet,
  85     OUTTMPL_TYPES,
  86     PagedList,
  87     parse_filesize,
  88     PerRequestProxyHandler,
  89     platform_name,
  90     Popen,
  91     PostProcessingError,
  92     preferredencoding,
  93     prepend_extension,
  94     register_socks_protocols,
  95     RejectedVideoReached,
  96     render_table,
  97     replace_extension,
  98     SameFileError,
  99     sanitize_filename,
 100     sanitize_path,
 101     sanitize_url,
 102     sanitized_Request,
 103     std_headers,
 104     STR_FORMAT_RE_TMPL,
 105     STR_FORMAT_TYPES,
 106     str_or_none,
 107     strftime_or_none,
 108     subtitles_filename,
 109     supports_terminal_sequences,
 110     TERMINAL_SEQUENCES,
 111     ThrottledDownload,
 112     to_high_limit_path,
 113     traverse_obj,
 114     try_get,
 115     UnavailableVideoError,
 116     url_basename,
 117     variadic,
 118     version_tuple,
 119     write_json_file,
 120     write_string,
 121     YoutubeDLCookieProcessor,
 122     YoutubeDLHandler,
 123     YoutubeDLRedirectHandler,
 124 )
 125 from .cache import Cache
 126 from .extractor import (
 127     gen_extractor_classes,
 128     get_info_extractor,
 129     _LAZY_LOADER,
 130     _PLUGIN_CLASSES as plugin_extractors
 131 )
 132 from .extractor.openload import PhantomJSwrapper
 133 from .downloader import (
 134     FFmpegFD,
 135     get_suitable_downloader,
 136     shorten_protocol_name
 137 )
 138 from .downloader.rtmp import rtmpdump_version
 139 from .postprocessor import (
 140     get_postprocessor,
 141     EmbedThumbnailPP,
 142     FFmpegFixupDurationPP,
 143     FFmpegFixupM3u8PP,
 144     FFmpegFixupM4aPP,
 145     FFmpegFixupStretchedPP,
 146     FFmpegFixupTimestampPP,
 147     FFmpegMergerPP,
 148     FFmpegPostProcessor,
 149     MoveFilesAfterDownloadPP,
 150     _PLUGIN_CLASSES as plugin_postprocessors
 151 )
 152 from .update import detect_variant
 153 from .version import __version__
 154
 155 if compat_os_name == 'nt':
 156     import ctypes
 157
 158
 159 class YoutubeDL(object):
 160     """YoutubeDL class.
 161
 162     YoutubeDL objects are the ones responsible of downloading the
 163     actual video file and writing it to disk if the user has requested
 164     it, among some other tasks. In most cases there should be one per
 165     program. As, given a video URL, the downloader doesn't know how to
 166     extract all the needed information, task that InfoExtractors do, it
 167     has to pass the URL to one of them.
 168
 169     For this, YoutubeDL objects have a method that allows
 170     InfoExtractors to be registered in a given order. When it is passed
 171     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 172     finds that reports being able to handle it. The InfoExtractor extracts
 173     all the information about the video or videos the URL refers to, and
 174     YoutubeDL process the extracted information, possibly using a File
 175     Downloader to download the video.
 176
 177     YoutubeDL objects accept a lot of parameters. In order not to saturate
 178     the object constructor with arguments, it receives a dictionary of
 179     options instead. These options are available through the params
 180     attribute for the InfoExtractors to use. The YoutubeDL also
 181     registers itself as the downloader in charge for the InfoExtractors
 182     that are added to it, so this is a "mutual registration".
 183
 184     Available options:
 185
 186     username:          Username for authentication purposes.
 187     password:          Password for authentication purposes.
 188     videopassword:     Password for accessing a video.
 189     ap_mso:            Adobe Pass multiple-system operator identifier.
 190     ap_username:       Multiple-system operator account username.
 191     ap_password:       Multiple-system operator account password.
 192     usenetrc:          Use netrc for authentication instead.
 193     verbose:           Print additional info to stdout.
 194     quiet:             Do not print messages to stdout.
 195     no_warnings:       Do not print out anything for warnings.
 196     forceprint:        A list of templates to force print
 197     forceurl:          Force printing final URL. (Deprecated)
 198     forcetitle:        Force printing title. (Deprecated)
 199     forceid:           Force printing ID. (Deprecated)
 200     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 201     forcedescription:  Force printing description. (Deprecated)
 202     forcefilename:     Force printing final filename. (Deprecated)
 203     forceduration:     Force printing duration. (Deprecated)
 204     forcejson:         Force printing info_dict as JSON.
 205     dump_single_json:  Force printing the info_dict of the whole playlist
 206                        (or video) as a single JSON line.
 207     force_write_download_archive: Force writing download archive regardless
 208                        of 'skip_download' or 'simulate'.
 209     simulate:          Do not download the video files. If unset (or None),
 210                        simulate only if listsubtitles, listformats or list_thumbnails is used
 211     format:            Video format code. see "FORMAT SELECTION" for more details.
 212     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 213     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 214                        extracting metadata even if the video is not actually
 215                        available for download (experimental)
 216     format_sort:       How to sort the video formats. see "Sorting Formats"
 217                        for more details.
 218     format_sort_force: Force the given format_sort. see "Sorting Formats"
 219                        for more details.
 220     allow_multiple_video_streams:   Allow multiple video streams to be merged
 221                        into a single file
 222     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 223                        into a single file
 224     check_formats      Whether to test if the formats are downloadable.
 225                        Can be True (check all), False (check none)
 226                        or None (check only if requested by extractor)
 227     paths:             Dictionary of output paths. The allowed keys are 'home'
 228                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 229     outtmpl:           Dictionary of templates for output names. Allowed keys
 230                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 231                        For compatibility with youtube-dl, a single string can also be used
 232     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 233     restrictfilenames: Do not allow "&" and spaces in file names
 234     trim_file_name:    Limit length of filename (extension excluded)
 235     windowsfilenames:  Force the filenames to be windows compatible
 236     ignoreerrors:      Do not stop on download/postprocessing errors.
 237                        Can be 'only_download' to ignore only download errors.
 238                        Default is 'only_download' for CLI, but False for API
 239     skip_playlist_after_errors: Number of allowed failures until the rest of
 240                        the playlist is skipped
 241     force_generic_extractor: Force downloader to use the generic extractor
 242     overwrites:        Overwrite all video and metadata files if True,
 243                        overwrite only non-video files if None
 244                        and don't overwrite any file if False
 245                        For compatibility with youtube-dl,
 246                        "nooverwrites" may also be used instead
 247     playliststart:     Playlist item to start at.
 248     playlistend:       Playlist item to end at.
 249     playlist_items:    Specific indices of playlist to download.
 250     playlistreverse:   Download playlist items in reverse order.
 251     playlistrandom:    Download playlist items in random order.
 252     matchtitle:        Download only matching titles.
 253     rejecttitle:       Reject downloads for matching titles.
 254     logger:            Log messages to a logging.Logger instance.
 255     logtostderr:       Log messages to stderr instead of stdout.
 256     consoletitle:       Display progress in console window's titlebar.
 257     writedescription:  Write the video description to a .description file
 258     writeinfojson:     Write the video description to a .info.json file
 259     clean_infojson:    Remove private fields from the infojson
 260     getcomments:       Extract video comments. This will not be written to disk
 261                        unless writeinfojson is also given
 262     writeannotations:  Write the video annotations to a .annotations.xml file
 263     writethumbnail:    Write the thumbnail image to a file
 264     allow_playlist_files: Whether to write playlists' description, infojson etc
 265                        also to disk when using the 'write*' options
 266     write_all_thumbnails:  Write all thumbnail formats to files
 267     writelink:         Write an internet shortcut file, depending on the
 268                        current platform (.url/.webloc/.desktop)
 269     writeurllink:      Write a Windows internet shortcut file (.url)
 270     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 271     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 272     writesubtitles:    Write the video subtitles to a file
 273     writeautomaticsub: Write the automatically generated subtitles to a file
 274     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 275                        Downloads all the subtitles of the video
 276                        (requires writesubtitles or writeautomaticsub)
 277     listsubtitles:     Lists all available subtitles for the video
 278     subtitlesformat:   The format code for subtitles
 279     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 280                        The list may contain "all" to refer to all the available
 281                        subtitles. The language can be prefixed with a "-" to
 282                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 283     keepvideo:         Keep the video file after post-processing
 284     daterange:         A DateRange object, download only if the upload_date is in the range.
 285     skip_download:     Skip the actual download of the video file
 286     cachedir:          Location of the cache files in the filesystem.
 287                        False to disable filesystem cache.
 288     noplaylist:        Download single video instead of a playlist if in doubt.
 289     age_limit:         An integer representing the user's age in years.
 290                        Unsuitable videos for the given age are skipped.
 291     min_views:         An integer representing the minimum view count the video
 292                        must have in order to not be skipped.
 293                        Videos without view count information are always
 294                        downloaded. None for no limit.
 295     max_views:         An integer representing the maximum view count.
 296                        Videos that are more popular than that are not
 297                        downloaded.
 298                        Videos without view count information are always
 299                        downloaded. None for no limit.
 300     download_archive:  File name of a file where all downloads are recorded.
 301                        Videos already present in the file are not downloaded
 302                        again.
 303     break_on_existing: Stop the download process after attempting to download a
 304                        file that is in the archive.
 305     break_on_reject:   Stop the download process when encountering a video that
 306                        has been filtered out.
 307     cookiefile:        File name where cookies should be read from and dumped to
 308     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 309                        name/path from where cookies are loaded.
 310                        Eg: ('chrome', ) or (vivaldi, 'default')
 311     nocheckcertificate:Do not verify SSL certificates
 312     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 313                        At the moment, this is only supported by YouTube.
 314     proxy:             URL of the proxy server to use
 315     geo_verification_proxy:  URL of the proxy to use for IP address verification
 316                        on geo-restricted sites.
 317     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 318     bidi_workaround:   Work around buggy terminals without bidirectional text
 319                        support, using fridibi
 320     debug_printtraffic:Print out sent and received HTTP traffic
 321     include_ads:       Download ads as well
 322     default_search:    Prepend this string if an input url is not valid.
 323                        'auto' for elaborate guessing
 324     encoding:          Use this encoding instead of the system-specified.
 325     extract_flat:      Do not resolve URLs, return the immediate result.
 326                        Pass in 'in_playlist' to only show this behavior for
 327                        playlist items.
 328     postprocessors:    A list of dictionaries, each with an entry
 329                        * key:  The name of the postprocessor. See
 330                                yt_dlp/postprocessor/__init__.py for a list.
 331                        * when: When to run the postprocessor. Can be one of
 332                                pre_process|before_dl|post_process|after_move.
 333                                Assumed to be 'post_process' if not given
 334     post_hooks:        Deprecated - Register a custom postprocessor instead
 335                        A list of functions that get called as the final step
 336                        for each video file, after all postprocessors have been
 337                        called. The filename will be passed as the only argument.
 338     progress_hooks:    A list of functions that get called on download
 339                        progress, with a dictionary with the entries
 340                        * status: One of "downloading", "error", or "finished".
 341                                  Check this first and ignore unknown values.
 342                        * info_dict: The extracted info_dict
 343
 344                        If status is one of "downloading", or "finished", the
 345                        following properties may also be present:
 346                        * filename: The final filename (always present)
 347                        * tmpfilename: The filename we're currently writing to
 348                        * downloaded_bytes: Bytes on disk
 349                        * total_bytes: Size of the whole file, None if unknown
 350                        * total_bytes_estimate: Guess of the eventual file size,
 351                                                None if unavailable.
 352                        * elapsed: The number of seconds since download started.
 353                        * eta: The estimated time in seconds, None if unknown
 354                        * speed: The download speed in bytes/second, None if
 355                                 unknown
 356                        * fragment_index: The counter of the currently
 357                                          downloaded video fragment.
 358                        * fragment_count: The number of fragments (= individual
 359                                          files that will be merged)
 360
 361                        Progress hooks are guaranteed to be called at least once
 362                        (with status "finished") if the download is successful.
 363     postprocessor_hooks:  A list of functions that get called on postprocessing
 364                        progress, with a dictionary with the entries
 365                        * status: One of "started", "processing", or "finished".
 366                                  Check this first and ignore unknown values.
 367                        * postprocessor: Name of the postprocessor
 368                        * info_dict: The extracted info_dict
 369
 370                        Progress hooks are guaranteed to be called at least twice
 371                        (with status "started" and "finished") if the processing is successful.
 372     merge_output_format: Extension to use when merging formats.
 373     final_ext:         Expected final extension; used to detect when the file was
 374                        already downloaded and converted. "merge_output_format" is
 375                        replaced by this extension when given
 376     fixup:             Automatically correct known faults of the file.
 377                        One of:
 378                        - "never": do nothing
 379                        - "warn": only emit a warning
 380                        - "detect_or_warn": check whether we can do anything
 381                                            about it, warn otherwise (default)
 382     source_address:    Client-side IP address to bind to.
 383     call_home:         Boolean, true iff we are allowed to contact the
 384                        yt-dlp servers for debugging. (BROKEN)
 385     sleep_interval_requests: Number of seconds to sleep between requests
 386                        during extraction
 387     sleep_interval:    Number of seconds to sleep before each download when
 388                        used alone or a lower bound of a range for randomized
 389                        sleep before each download (minimum possible number
 390                        of seconds to sleep) when used along with
 391                        max_sleep_interval.
 392     max_sleep_interval:Upper bound of a range for randomized sleep before each
 393                        download (maximum possible number of seconds to sleep).
 394                        Must only be used along with sleep_interval.
 395                        Actual sleep time will be a random float from range
 396                        [sleep_interval; max_sleep_interval].
 397     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 398     listformats:       Print an overview of available video formats and exit.
 399     list_thumbnails:   Print a table of all thumbnails and exit.
 400     match_filter:      A function that gets called with the info_dict of
 401                        every video.
 402                        If it returns a message, the video is ignored.
 403                        If it returns None, the video is downloaded.
 404                        match_filter_func in utils.py is one example for this.
 405     no_color:          Do not emit color codes in output.
 406     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 407                        HTTP header
 408     geo_bypass_country:
 409                        Two-letter ISO 3166-2 country code that will be used for
 410                        explicit geographic restriction bypassing via faking
 411                        X-Forwarded-For HTTP header
 412     geo_bypass_ip_block:
 413                        IP range in CIDR notation that will be used similarly to
 414                        geo_bypass_country
 415
 416     The following options determine which downloader is picked:
 417     external_downloader: A dictionary of protocol keys and the executable of the
 418                        external downloader to use for it. The allowed protocols
 419                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 420                        Set the value to 'native' to use the native downloader
 421     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 422                        or {'m3u8': 'ffmpeg'} instead.
 423                        Use the native HLS downloader instead of ffmpeg/avconv
 424                        if True, otherwise use ffmpeg/avconv if False, otherwise
 425                        use downloader suggested by extractor if None.
 426     compat_opts:       Compatibility options. See "Differences in default behavior".
 427                        The following options do not work when used through the API:
 428                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 429                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 430                        Refer __init__.py for their implementation
 431     progress_template: Dictionary of templates for progress outputs.
 432                        Allowed keys are 'download', 'postprocess',
 433                        'download-title' (console title) and 'postprocess-title'.
 434                        The template is mapped on a dictionary with keys 'progress' and 'info'
 435
 436     The following parameters are not used by YoutubeDL itself, they are used by
 437     the downloader (see yt_dlp/downloader/common.py):
 438     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 439     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 440     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 441     external_downloader_args.
 442
 443     The following options are used by the post processors:
 444     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 445                        otherwise prefer ffmpeg. (avconv support is deprecated)
 446     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 447                        to the binary or its containing directory.
 448     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 449                        and a list of additional command-line arguments for the
 450                        postprocessor/executable. The dict can also have "PP+EXE" keys
 451                        which are used when the given exe is used by the given PP.
 452                        Use 'default' as the name for arguments to passed to all PP
 453                        For compatibility with youtube-dl, a single list of args
 454                        can also be used
 455
 456     The following options are used by the extractors:
 457     extractor_retries: Number of times to retry for known errors
 458     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 459     hls_split_discontinuity: Split HLS playlists to different formats at
 460                        discontinuities such as ad breaks (default: False)
 461     extractor_args:    A dictionary of arguments to be passed to the extractors.
 462                        See "EXTRACTOR ARGUMENTS" for details.
 463                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 464     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 465                        If True (default), DASH manifests and related
 466                        data will be downloaded and processed by extractor.
 467                        You can reduce network I/O by disabling it if you don't
 468                        care about DASH. (only for youtube)
 469     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 470                        If True (default), HLS manifests and related
 471                        data will be downloaded and processed by extractor.
 472                        You can reduce network I/O by disabling it if you don't
 473                        care about HLS. (only for youtube)
 474     """
 475
 476     _NUMERIC_FIELDS = set((
 477         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 478         'timestamp', 'release_timestamp',
 479         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 480         'average_rating', 'comment_count', 'age_limit',
 481         'start_time', 'end_time',
 482         'chapter_number', 'season_number', 'episode_number',
 483         'track_number', 'disc_number', 'release_year',
 484     ))
 485
 486     _format_selection_exts = {
 487         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 488         'video': {'mp4', 'flv', 'webm', '3gp'},
 489         'storyboards': {'mhtml'},
 490     }
 491
 492     params = None
 493     _ies = {}
 494     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 495     _printed_messages = set()
 496     _first_webpage_request = True
 497     _download_retcode = None
 498     _num_downloads = None
 499     _playlist_level = 0
 500     _playlist_urls = set()
 501     _screen_file = None
 502
 503     def __init__(self, params=None, auto_init=True):
 504         """Create a FileDownloader object with the given options.
 505         @param auto_init    Whether to load the default extractors and print header (if verbose).
 506                             Set to 'no_verbose_header' to not ptint the header
 507         """
 508         if params is None:
 509             params = {}
 510         self._ies = {}
 511         self._ies_instances = {}
 512         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 513         self._printed_messages = set()
 514         self._first_webpage_request = True
 515         self._post_hooks = []
 516         self._progress_hooks = []
 517         self._postprocessor_hooks = []
 518         self._download_retcode = 0
 519         self._num_downloads = 0
 520         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 521         self._err_file = sys.stderr
 522         self.params = params
 523         self.cache = Cache(self)
 524
 525         windows_enable_vt_mode()
 526         # FIXME: This will break if we ever print color to stdout
 527         self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)
 528
 529         if sys.version_info < (3, 6):
 530             self.report_warning(
 531                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 532
 533         if self.params.get('allow_unplayable_formats'):
 534             self.report_warning(
 535                 f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
 536                 'This is a developer option intended for debugging. \n'
 537                 '         If you experience any issues while using this option, '
 538                 f'{self._color_text("DO NOT", "red")} open a bug report')
 539
 540         def check_deprecated(param, option, suggestion):
 541             if self.params.get(param) is not None:
 542                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 543                 return True
 544             return False
 545
 546         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 547             if self.params.get('geo_verification_proxy') is None:
 548                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 549
 550         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 551         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 552         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 553
 554         for msg in self.params.get('warnings', []):
 555             self.report_warning(msg)
 556
 557         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 558             # nooverwrites was unnecessarily changed to overwrites
 559             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 560             # This ensures compatibility with both keys
 561             self.params['overwrites'] = not self.params['nooverwrites']
 562         elif self.params.get('overwrites') is None:
 563             self.params.pop('overwrites', None)
 564         else:
 565             self.params['nooverwrites'] = not self.params['overwrites']
 566
 567         if params.get('bidi_workaround', False):
 568             try:
 569                 import pty
 570                 master, slave = pty.openpty()
 571                 width = compat_get_terminal_size().columns
 572                 if width is None:
 573                     width_args = []
 574                 else:
 575                     width_args = ['-w', str(width)]
 576                 sp_kwargs = dict(
 577                     stdin=subprocess.PIPE,
 578                     stdout=slave,
 579                     stderr=self._err_file)
 580                 try:
 581                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 582                 except OSError:
 583                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 584                 self._output_channel = os.fdopen(master, 'rb')
 585             except OSError as ose:
 586                 if ose.errno == errno.ENOENT:
 587                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 588                 else:
 589                     raise
 590
 591         if (sys.platform != 'win32'
 592                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 593                 and not params.get('restrictfilenames', False)):
 594             # Unicode filesystem API will throw errors (#1474, #13027)
 595             self.report_warning(
 596                 'Assuming --restrict-filenames since file system encoding '
 597                 'cannot encode all characters. '
 598                 'Set the LC_ALL environment variable to fix this.')
 599             self.params['restrictfilenames'] = True
 600
 601         self.outtmpl_dict = self.parse_outtmpl()
 602
 603         # Creating format selector here allows us to catch syntax errors before the extraction
 604         self.format_selector = (
 605             None if self.params.get('format') is None
 606             else self.build_format_selector(self.params['format']))
 607
 608         self._setup_opener()
 609
 610         if auto_init:
 611             if auto_init != 'no_verbose_header':
 612                 self.print_debug_header()
 613             self.add_default_info_extractors()
 614
 615         for pp_def_raw in self.params.get('postprocessors', []):
 616             pp_def = dict(pp_def_raw)
 617             when = pp_def.pop('when', 'post_process')
 618             pp_class = get_postprocessor(pp_def.pop('key'))
 619             pp = pp_class(self, **compat_kwargs(pp_def))
 620             self.add_post_processor(pp, when=when)
 621
 622         for ph in self.params.get('post_hooks', []):
 623             self.add_post_hook(ph)
 624
 625         for ph in self.params.get('progress_hooks', []):
 626             self.add_progress_hook(ph)
 627
 628         register_socks_protocols()
 629
 630         def preload_download_archive(fn):
 631             """Preload the archive, if any is specified"""
 632             if fn is None:
 633                 return False
 634             self.write_debug('Loading archive file %r\n' % fn)
 635             try:
 636                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 637                     for line in archive_file:
 638                         self.archive.add(line.strip())
 639             except IOError as ioe:
 640                 if ioe.errno != errno.ENOENT:
 641                     raise
 642                 return False
 643             return True
 644
 645         self.archive = set()
 646         preload_download_archive(self.params.get('download_archive'))
 647
 648     def warn_if_short_id(self, argv):
 649         # short YouTube ID starting with dash?
 650         idxs = [
 651             i for i, a in enumerate(argv)
 652             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 653         if idxs:
 654             correct_argv = (
 655                 ['yt-dlp']
 656                 + [a for i, a in enumerate(argv) if i not in idxs]
 657                 + ['--'] + [argv[i] for i in idxs]
 658             )
 659             self.report_warning(
 660                 'Long argument string detected. '
 661                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 662                 args_to_str(correct_argv))
 663
 664     def add_info_extractor(self, ie):
 665         """Add an InfoExtractor object to the end of the list."""
 666         ie_key = ie.ie_key()
 667         self._ies[ie_key] = ie
 668         if not isinstance(ie, type):
 669             self._ies_instances[ie_key] = ie
 670             ie.set_downloader(self)
 671
 672     def _get_info_extractor_class(self, ie_key):
 673         ie = self._ies.get(ie_key)
 674         if ie is None:
 675             ie = get_info_extractor(ie_key)
 676             self.add_info_extractor(ie)
 677         return ie
 678
 679     def get_info_extractor(self, ie_key):
 680         """
 681         Get an instance of an IE with name ie_key, it will try to get one from
 682         the _ies list, if there's no instance it will create a new one and add
 683         it to the extractor list.
 684         """
 685         ie = self._ies_instances.get(ie_key)
 686         if ie is None:
 687             ie = get_info_extractor(ie_key)()
 688             self.add_info_extractor(ie)
 689         return ie
 690
 691     def add_default_info_extractors(self):
 692         """
 693         Add the InfoExtractors returned by gen_extractors to the end of the list
 694         """
 695         for ie in gen_extractor_classes():
 696             self.add_info_extractor(ie)
 697
 698     def add_post_processor(self, pp, when='post_process'):
 699         """Add a PostProcessor object to the end of the chain."""
 700         self._pps[when].append(pp)
 701         pp.set_downloader(self)
 702
 703     def add_post_hook(self, ph):
 704         """Add the post hook"""
 705         self._post_hooks.append(ph)
 706
 707     def add_progress_hook(self, ph):
 708         """Add the download progress hook"""
 709         self._progress_hooks.append(ph)
 710
 711     def add_postprocessor_hook(self, ph):
 712         """Add the postprocessing progress hook"""
 713         self._postprocessor_hooks.append(ph)
 714
 715     def _bidi_workaround(self, message):
 716         if not hasattr(self, '_output_channel'):
 717             return message
 718
 719         assert hasattr(self, '_output_process')
 720         assert isinstance(message, compat_str)
 721         line_count = message.count('\n') + 1
 722         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 723         self._output_process.stdin.flush()
 724         res = ''.join(self._output_channel.readline().decode('utf-8')
 725                       for _ in range(line_count))
 726         return res[:-len('\n')]
 727
 728     def _write_string(self, message, out=None, only_once=False):
 729         if only_once:
 730             if message in self._printed_messages:
 731                 return
 732             self._printed_messages.add(message)
 733         write_string(message, out=out, encoding=self.params.get('encoding'))
 734
 735     def to_stdout(self, message, skip_eol=False, quiet=False):
 736         """Print message to stdout"""
 737         if self.params.get('logger'):
 738             self.params['logger'].debug(message)
 739         elif not quiet or self.params.get('verbose'):
 740             self._write_string(
 741                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 742                 self._err_file if quiet else self._screen_file)
 743
 744     def to_stderr(self, message, only_once=False):
 745         """Print message to stderr"""
 746         assert isinstance(message, compat_str)
 747         if self.params.get('logger'):
 748             self.params['logger'].error(message)
 749         else:
 750             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 751
 752     def to_console_title(self, message):
 753         if not self.params.get('consoletitle', False):
 754             return
 755         if compat_os_name == 'nt':
 756             if ctypes.windll.kernel32.GetConsoleWindow():
 757                 # c_wchar_p() might not be necessary if `message` is
 758                 # already of type unicode()
 759                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 760         elif 'TERM' in os.environ:
 761             self._write_string('\033]0;%s\007' % message, self._screen_file)
 762
 763     def save_console_title(self):
 764         if not self.params.get('consoletitle', False):
 765             return
 766         if self.params.get('simulate'):
 767             return
 768         if compat_os_name != 'nt' and 'TERM' in os.environ:
 769             # Save the title on stack
 770             self._write_string('\033[22;0t', self._screen_file)
 771
 772     def restore_console_title(self):
 773         if not self.params.get('consoletitle', False):
 774             return
 775         if self.params.get('simulate'):
 776             return
 777         if compat_os_name != 'nt' and 'TERM' in os.environ:
 778             # Restore the title from stack
 779             self._write_string('\033[23;0t', self._screen_file)
 780
 781     def __enter__(self):
 782         self.save_console_title()
 783         return self
 784
 785     def __exit__(self, *args):
 786         self.restore_console_title()
 787
 788         if self.params.get('cookiefile') is not None:
 789             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 790
 791     def trouble(self, message=None, tb=None):
 792         """Determine action to take when a download problem appears.
 793
 794         Depending on if the downloader has been configured to ignore
 795         download errors or not, this method may throw an exception or
 796         not when errors are found, after printing the message.
 797
 798         tb, if given, is additional traceback information.
 799         """
 800         if message is not None:
 801             self.to_stderr(message)
 802         if self.params.get('verbose'):
 803             if tb is None:
 804                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 805                     tb = ''
 806                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 807                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 808                     tb += encode_compat_str(traceback.format_exc())
 809                 else:
 810                     tb_data = traceback.format_list(traceback.extract_stack())
 811                     tb = ''.join(tb_data)
 812             if tb:
 813                 self.to_stderr(tb)
 814         if not self.params.get('ignoreerrors'):
 815             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 816                 exc_info = sys.exc_info()[1].exc_info
 817             else:
 818                 exc_info = sys.exc_info()
 819             raise DownloadError(message, exc_info)
 820         self._download_retcode = 1
 821
 822     def to_screen(self, message, skip_eol=False):
 823         """Print message to stdout if not in quiet mode"""
 824         self.to_stdout(
 825             message, skip_eol, quiet=self.params.get('quiet', False))
 826
 827     def _color_text(self, text, color):
 828         if self.params.get('no_color'):
 829             return text
 830         return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
 831
 832     def report_warning(self, message, only_once=False):
 833         '''
 834         Print the message to stderr, it will be prefixed with 'WARNING:'
 835         If stderr is a tty file the 'WARNING:' will be colored
 836         '''
 837         if self.params.get('logger') is not None:
 838             self.params['logger'].warning(message)
 839         else:
 840             if self.params.get('no_warnings'):
 841                 return
 842             self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)
 843
 844     def report_error(self, message, tb=None):
 845         '''
 846         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 847         in red if stderr is a tty file.
 848         '''
 849         self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)
 850
 851     def write_debug(self, message, only_once=False):
 852         '''Log debug message or Print message to stderr'''
 853         if not self.params.get('verbose', False):
 854             return
 855         message = '[debug] %s' % message
 856         if self.params.get('logger'):
 857             self.params['logger'].debug(message)
 858         else:
 859             self.to_stderr(message, only_once)
 860
 861     def report_file_already_downloaded(self, file_name):
 862         """Report file has already been fully downloaded."""
 863         try:
 864             self.to_screen('[download] %s has already been downloaded' % file_name)
 865         except UnicodeEncodeError:
 866             self.to_screen('[download] The file has already been downloaded')
 867
 868     def report_file_delete(self, file_name):
 869         """Report that existing file will be deleted."""
 870         try:
 871             self.to_screen('Deleting existing file %s' % file_name)
 872         except UnicodeEncodeError:
 873             self.to_screen('Deleting existing file')
 874
 875     def raise_no_formats(self, info, forced=False):
 876         has_drm = info.get('__has_drm')
 877         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 878         expected = self.params.get('ignore_no_formats_error')
 879         if forced or not expected:
 880             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 881                                  expected=has_drm or expected)
 882         else:
 883             self.report_warning(msg)
 884
 885     def parse_outtmpl(self):
 886         outtmpl_dict = self.params.get('outtmpl', {})
 887         if not isinstance(outtmpl_dict, dict):
 888             outtmpl_dict = {'default': outtmpl_dict}
 889         # Remove spaces in the default template
 890         if self.params.get('restrictfilenames'):
 891             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 892         else:
 893             sanitize = lambda x: x
 894         outtmpl_dict.update({
 895             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 896             if outtmpl_dict.get(k) is None})
 897         for key, val in outtmpl_dict.items():
 898             if isinstance(val, bytes):
 899                 self.report_warning(
 900                     'Parameter outtmpl is bytes, but should be a unicode string. '
 901                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 902         return outtmpl_dict
 903
 904     def get_output_path(self, dir_type='', filename=None):
 905         paths = self.params.get('paths', {})
 906         assert isinstance(paths, dict)
 907         path = os.path.join(
 908             expand_path(paths.get('home', '').strip()),
 909             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 910             filename or '')
 911
 912         # Temporary fix for #4787
 913         # 'Treat' all problem characters by passing filename through preferredencoding
 914         # to workaround encoding issues with subprocess on python2 @ Windows
 915         if sys.version_info < (3, 0) and sys.platform == 'win32':
 916             path = encodeFilename(path, True).decode(preferredencoding())
 917         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 918
 919     @staticmethod
 920     def _outtmpl_expandpath(outtmpl):
 921         # expand_path translates '%%' into '%' and '$$' into '$'
 922         # correspondingly that is not what we want since we need to keep
 923         # '%%' intact for template dict substitution step. Working around
 924         # with boundary-alike separator hack.
 925         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 926         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 927
 928         # outtmpl should be expand_path'ed before template dict substitution
 929         # because meta fields may contain env variables we don't want to
 930         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 931         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 932         return expand_path(outtmpl).replace(sep, '')
 933
 934     @staticmethod
 935     def escape_outtmpl(outtmpl):
 936         ''' Escape any remaining strings like %s, %abc% etc. '''
 937         return re.sub(
 938             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 939             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 940             outtmpl)
 941
 942     @classmethod
 943     def validate_outtmpl(cls, outtmpl):
 944         ''' @return None or Exception object '''
 945         outtmpl = re.sub(
 946             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 947             lambda mobj: f'{mobj.group(0)[:-1]}s',
 948             cls._outtmpl_expandpath(outtmpl))
 949         try:
 950             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 951             return None
 952         except ValueError as err:
 953             return err
 954
 955     @staticmethod
 956     def _copy_infodict(info_dict):
 957         info_dict = dict(info_dict)
 958         for key in ('__original_infodict', '__postprocessors'):
 959             info_dict.pop(key, None)
 960         return info_dict
 961
 962     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 963         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
 964         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
 965
 966         info_dict = self._copy_infodict(info_dict)
 967         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 968             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 969             if info_dict.get('duration', None) is not None
 970             else None)
 971         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 972         if info_dict.get('resolution') is None:
 973             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 974
 975         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
 976         # of %(field)s to %(field)0Nd for backward compatibility
 977         field_size_compat_map = {
 978             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 979             'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
 980             'autonumber': self.params.get('autonumber_size') or 5,
 981         }
 982
 983         TMPL_DICT = {}
 984         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
 985         MATH_FUNCTIONS = {
 986             '+': float.__add__,
 987             '-': float.__sub__,
 988         }
 989         # Field is of the form key1.key2...
 990         # where keys (except first) can be string, int or slice
 991         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 992         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 993         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 994         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 995             (?P<negate>-)?
 996             (?P<fields>{field})
 997             (?P<maths>(?:{math_op}{math_field})*)
 998             (?:>(?P<strf_format>.+?))?
 999             (?P<alternate>(?<!\\),[^|)]+)?
1000             (?:\|(?P<default>.*?))?
1001             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1002
1003         def _traverse_infodict(k):
1004             k = k.split('.')
1005             if k[0] == '':
1006                 k.pop(0)
1007             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1008
1009         def get_value(mdict):
1010             # Object traversal
1011             value = _traverse_infodict(mdict['fields'])
1012             # Negative
1013             if mdict['negate']:
1014                 value = float_or_none(value)
1015                 if value is not None:
1016                     value *= -1
1017             # Do maths
1018             offset_key = mdict['maths']
1019             if offset_key:
1020                 value = float_or_none(value)
1021                 operator = None
1022                 while offset_key:
1023                     item = re.match(
1024                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1025                         offset_key).group(0)
1026                     offset_key = offset_key[len(item):]
1027                     if operator is None:
1028                         operator = MATH_FUNCTIONS[item]
1029                         continue
1030                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1031                     offset = float_or_none(item)
1032                     if offset is None:
1033                         offset = float_or_none(_traverse_infodict(item))
1034                     try:
1035                         value = operator(value, multiplier * offset)
1036                     except (TypeError, ZeroDivisionError):
1037                         return None
1038                     operator = None
1039             # Datetime formatting
1040             if mdict['strf_format']:
1041                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1042
1043             return value
1044
1045         na = self.params.get('outtmpl_na_placeholder', 'NA')
1046
1047         def _dumpjson_default(obj):
1048             if isinstance(obj, (set, LazyList)):
1049                 return list(obj)
1050             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1051
1052         def create_key(outer_mobj):
1053             if not outer_mobj.group('has_key'):
1054                 return outer_mobj.group(0)
1055             key = outer_mobj.group('key')
1056             mobj = re.match(INTERNAL_FORMAT_RE, key)
1057             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1058             value, default = None, na
1059             while mobj:
1060                 mobj = mobj.groupdict()
1061                 default = mobj['default'] if mobj['default'] is not None else default
1062                 value = get_value(mobj)
1063                 if value is None and mobj['alternate']:
1064                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1065                 else:
1066                     break
1067
1068             fmt = outer_mobj.group('format')
1069             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1070                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1071
1072             value = default if value is None else value
1073
1074             str_fmt = f'{fmt[:-1]}s'
1075             if fmt[-1] == 'l':  # list
1076                 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1077                 value, fmt = delim.join(variadic(value)), str_fmt
1078             elif fmt[-1] == 'j':  # json
1079                 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1080             elif fmt[-1] == 'q':  # quoted
1081                 value, fmt = compat_shlex_quote(str(value)), str_fmt
1082             elif fmt[-1] == 'B':  # bytes
1083                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1084                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1085             elif fmt[-1] == 'U':  # unicode normalized
1086                 opts = outer_mobj.group('conversion') or ''
1087                 value, fmt = unicodedata.normalize(
1088                     # "+" = compatibility equivalence, "#" = NFD
1089                     'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1090                     value), str_fmt
1091             elif fmt[-1] == 'c':
1092                 if value:
1093                     value = str(value)[0]
1094                 else:
1095                     fmt = str_fmt
1096             elif fmt[-1] not in 'rs':  # numeric
1097                 value = float_or_none(value)
1098                 if value is None:
1099                     value, fmt = default, 's'
1100
1101             if sanitize:
1102                 if fmt[-1] == 'r':
1103                     # If value is an object, sanitize might convert it to a string
1104                     # So we convert it to repr first
1105                     value, fmt = repr(value), str_fmt
1106                 if fmt[-1] in 'csr':
1107                     value = sanitize(initial_field, value)
1108
1109             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1110             TMPL_DICT[key] = value
1111             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1112
1113         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1114
1115     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1116         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1117         return self.escape_outtmpl(outtmpl) % info_dict
1118
1119     def _prepare_filename(self, info_dict, tmpl_type='default'):
1120         try:
1121             sanitize = lambda k, v: sanitize_filename(
1122                 compat_str(v),
1123                 restricted=self.params.get('restrictfilenames'),
1124                 is_id=(k == 'id' or k.endswith('_id')))
1125             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1126             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1127
1128             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1129             if filename and force_ext is not None:
1130                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1131
1132             # https://github.com/blackjack4494/youtube-dlc/issues/85
1133             trim_file_name = self.params.get('trim_file_name', False)
1134             if trim_file_name:
1135                 fn_groups = filename.rsplit('.')
1136                 ext = fn_groups[-1]
1137                 sub_ext = ''
1138                 if len(fn_groups) > 2:
1139                     sub_ext = fn_groups[-2]
1140                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1141
1142             return filename
1143         except ValueError as err:
1144             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1145             return None
1146
1147     def prepare_filename(self, info_dict, dir_type='', warn=False):
1148         """Generate the output filename."""
1149
1150         filename = self._prepare_filename(info_dict, dir_type or 'default')
1151         if not filename and dir_type not in ('', 'temp'):
1152             return ''
1153
1154         if warn:
1155             if not self.params.get('paths'):
1156                 pass
1157             elif filename == '-':
1158                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1159             elif os.path.isabs(filename):
1160                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1161         if filename == '-' or not filename:
1162             return filename
1163
1164         return self.get_output_path(dir_type, filename)
1165
1166     def _match_entry(self, info_dict, incomplete=False, silent=False):
1167         """ Returns None if the file should be downloaded """
1168
1169         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1170
1171         def check_filter():
1172             if 'title' in info_dict:
1173                 # This can happen when we're just evaluating the playlist
1174                 title = info_dict['title']
1175                 matchtitle = self.params.get('matchtitle', False)
1176                 if matchtitle:
1177                     if not re.search(matchtitle, title, re.IGNORECASE):
1178                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1179                 rejecttitle = self.params.get('rejecttitle', False)
1180                 if rejecttitle:
1181                     if re.search(rejecttitle, title, re.IGNORECASE):
1182                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1183             date = info_dict.get('upload_date')
1184             if date is not None:
1185                 dateRange = self.params.get('daterange', DateRange())
1186                 if date not in dateRange:
1187                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1188             view_count = info_dict.get('view_count')
1189             if view_count is not None:
1190                 min_views = self.params.get('min_views')
1191                 if min_views is not None and view_count < min_views:
1192                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1193                 max_views = self.params.get('max_views')
1194                 if max_views is not None and view_count > max_views:
1195                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1196             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1197                 return 'Skipping "%s" because it is age restricted' % video_title
1198
1199             match_filter = self.params.get('match_filter')
1200             if match_filter is not None:
1201                 try:
1202                     ret = match_filter(info_dict, incomplete=incomplete)
1203                 except TypeError:
1204                     # For backward compatibility
1205                     ret = None if incomplete else match_filter(info_dict)
1206                 if ret is not None:
1207                     return ret
1208             return None
1209
1210         if self.in_download_archive(info_dict):
1211             reason = '%s has already been recorded in the archive' % video_title
1212             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1213         else:
1214             reason = check_filter()
1215             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1216         if reason is not None:
1217             if not silent:
1218                 self.to_screen('[download] ' + reason)
1219             if self.params.get(break_opt, False):
1220                 raise break_err()
1221         return reason
1222
1223     @staticmethod
1224     def add_extra_info(info_dict, extra_info):
1225         '''Set the keys from extra_info in info dict if they are missing'''
1226         for key, value in extra_info.items():
1227             info_dict.setdefault(key, value)
1228
1229     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1230                      process=True, force_generic_extractor=False):
1231         """
1232         Return a list with a dictionary for each video extracted.
1233
1234         Arguments:
1235         url -- URL to extract
1236
1237         Keyword arguments:
1238         download -- whether to download videos during extraction
1239         ie_key -- extractor key hint
1240         extra_info -- dictionary containing the extra values to add to each result
1241         process -- whether to resolve all unresolved references (URLs, playlist items),
1242             must be True for download to work.
1243         force_generic_extractor -- force using the generic extractor
1244         """
1245
1246         if extra_info is None:
1247             extra_info = {}
1248
1249         if not ie_key and force_generic_extractor:
1250             ie_key = 'Generic'
1251
1252         if ie_key:
1253             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1254         else:
1255             ies = self._ies
1256
1257         for ie_key, ie in ies.items():
1258             if not ie.suitable(url):
1259                 continue
1260
1261             if not ie.working():
1262                 self.report_warning('The program functionality for this site has been marked as broken, '
1263                                     'and will probably not work.')
1264
1265             temp_id = ie.get_temp_id(url)
1266             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1267                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1268                                ie_key, temp_id))
1269                 break
1270             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1271         else:
1272             self.report_error('no suitable InfoExtractor for URL %s' % url)
1273
1274     def __handle_extraction_exceptions(func):
1275         @functools.wraps(func)
1276         def wrapper(self, *args, **kwargs):
1277             try:
1278                 return func(self, *args, **kwargs)
1279             except GeoRestrictedError as e:
1280                 msg = e.msg
1281                 if e.countries:
1282                     msg += '\nThis video is available in %s.' % ', '.join(
1283                         map(ISO3166Utils.short2full, e.countries))
1284                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1285                 self.report_error(msg)
1286             except ExtractorError as e:  # An error we somewhat expected
1287                 self.report_error(compat_str(e), e.format_traceback())
1288             except ThrottledDownload:
1289                 self.to_stderr('\r')
1290                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1291                 return wrapper(self, *args, **kwargs)
1292             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
1293                 raise
1294             except Exception as e:
1295                 if self.params.get('ignoreerrors'):
1296                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1297                 else:
1298                     raise
1299         return wrapper
1300
1301     @__handle_extraction_exceptions
1302     def __extract_info(self, url, ie, download, extra_info, process):
1303         ie_result = ie.extract(url)
1304         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1305             return
1306         if isinstance(ie_result, list):
1307             # Backwards compatibility: old IE result format
1308             ie_result = {
1309                 '_type': 'compat_list',
1310                 'entries': ie_result,
1311             }
1312         if extra_info.get('original_url'):
1313             ie_result.setdefault('original_url', extra_info['original_url'])
1314         self.add_default_extra_info(ie_result, ie, url)
1315         if process:
1316             return self.process_ie_result(ie_result, download, extra_info)
1317         else:
1318             return ie_result
1319
1320     def add_default_extra_info(self, ie_result, ie, url):
1321         if url is not None:
1322             self.add_extra_info(ie_result, {
1323                 'webpage_url': url,
1324                 'original_url': url,
1325                 'webpage_url_basename': url_basename(url),
1326             })
1327         if ie is not None:
1328             self.add_extra_info(ie_result, {
1329                 'extractor': ie.IE_NAME,
1330                 'extractor_key': ie.ie_key(),
1331             })
1332
1333     def process_ie_result(self, ie_result, download=True, extra_info=None):
1334         """
1335         Take the result of the ie(may be modified) and resolve all unresolved
1336         references (URLs, playlist items).
1337
1338         It will also download the videos if 'download'.
1339         Returns the resolved ie_result.
1340         """
1341         if extra_info is None:
1342             extra_info = {}
1343         result_type = ie_result.get('_type', 'video')
1344
1345         if result_type in ('url', 'url_transparent'):
1346             ie_result['url'] = sanitize_url(ie_result['url'])
1347             if ie_result.get('original_url'):
1348                 extra_info.setdefault('original_url', ie_result['original_url'])
1349
1350             extract_flat = self.params.get('extract_flat', False)
1351             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1352                     or extract_flat is True):
1353                 info_copy = ie_result.copy()
1354                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1355                 if ie and not ie_result.get('id'):
1356                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1357                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1358                 self.add_extra_info(info_copy, extra_info)
1359                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1360                 if self.params.get('force_write_download_archive', False):
1361                     self.record_download_archive(info_copy)
1362                 return ie_result
1363
1364         if result_type == 'video':
1365             self.add_extra_info(ie_result, extra_info)
1366             ie_result = self.process_video_result(ie_result, download=download)
1367             additional_urls = (ie_result or {}).get('additional_urls')
1368             if additional_urls:
1369                 # TODO: Improve MetadataParserPP to allow setting a list
1370                 if isinstance(additional_urls, compat_str):
1371                     additional_urls = [additional_urls]
1372                 self.to_screen(
1373                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1374                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1375                 ie_result['additional_entries'] = [
1376                     self.extract_info(
1377                         url, download, extra_info,
1378                         force_generic_extractor=self.params.get('force_generic_extractor'))
1379                     for url in additional_urls
1380                 ]
1381             return ie_result
1382         elif result_type == 'url':
1383             # We have to add extra_info to the results because it may be
1384             # contained in a playlist
1385             return self.extract_info(
1386                 ie_result['url'], download,
1387                 ie_key=ie_result.get('ie_key'),
1388                 extra_info=extra_info)
1389         elif result_type == 'url_transparent':
1390             # Use the information from the embedding page
1391             info = self.extract_info(
1392                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1393                 extra_info=extra_info, download=False, process=False)
1394
1395             # extract_info may return None when ignoreerrors is enabled and
1396             # extraction failed with an error, don't crash and return early
1397             # in this case
1398             if not info:
1399                 return info
1400
1401             force_properties = dict(
1402                 (k, v) for k, v in ie_result.items() if v is not None)
1403             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1404                 if f in force_properties:
1405                     del force_properties[f]
1406             new_result = info.copy()
1407             new_result.update(force_properties)
1408
1409             # Extracted info may not be a video result (i.e.
1410             # info.get('_type', 'video') != video) but rather an url or
1411             # url_transparent. In such cases outer metadata (from ie_result)
1412             # should be propagated to inner one (info). For this to happen
1413             # _type of info should be overridden with url_transparent. This
1414             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1415             if new_result.get('_type') == 'url':
1416                 new_result['_type'] = 'url_transparent'
1417
1418             return self.process_ie_result(
1419                 new_result, download=download, extra_info=extra_info)
1420         elif result_type in ('playlist', 'multi_video'):
1421             # Protect from infinite recursion due to recursively nested playlists
1422             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1423             webpage_url = ie_result['webpage_url']
1424             if webpage_url in self._playlist_urls:
1425                 self.to_screen(
1426                     '[download] Skipping already downloaded playlist: %s'
1427                     % ie_result.get('title') or ie_result.get('id'))
1428                 return
1429
1430             self._playlist_level += 1
1431             self._playlist_urls.add(webpage_url)
1432             self._sanitize_thumbnails(ie_result)
1433             try:
1434                 return self.__process_playlist(ie_result, download)
1435             finally:
1436                 self._playlist_level -= 1
1437                 if not self._playlist_level:
1438                     self._playlist_urls.clear()
1439         elif result_type == 'compat_list':
1440             self.report_warning(
1441                 'Extractor %s returned a compat_list result. '
1442                 'It needs to be updated.' % ie_result.get('extractor'))
1443
1444             def _fixup(r):
1445                 self.add_extra_info(r, {
1446                     'extractor': ie_result['extractor'],
1447                     'webpage_url': ie_result['webpage_url'],
1448                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1449                     'extractor_key': ie_result['extractor_key'],
1450                 })
1451                 return r
1452             ie_result['entries'] = [
1453                 self.process_ie_result(_fixup(r), download, extra_info)
1454                 for r in ie_result['entries']
1455             ]
1456             return ie_result
1457         else:
1458             raise Exception('Invalid result type: %s' % result_type)
1459
1460     def _ensure_dir_exists(self, path):
1461         return make_dir(path, self.report_error)
1462
1463     def __process_playlist(self, ie_result, download):
1464         # We process each entry in the playlist
1465         playlist = ie_result.get('title') or ie_result.get('id')
1466         self.to_screen('[download] Downloading playlist: %s' % playlist)
1467
1468         if 'entries' not in ie_result:
1469             raise EntryNotInPlaylist()
1470         incomplete_entries = bool(ie_result.get('requested_entries'))
1471         if incomplete_entries:
1472             def fill_missing_entries(entries, indexes):
1473                 ret = [None] * max(*indexes)
1474                 for i, entry in zip(indexes, entries):
1475                     ret[i - 1] = entry
1476                 return ret
1477             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1478
1479         playlist_results = []
1480
1481         playliststart = self.params.get('playliststart', 1)
1482         playlistend = self.params.get('playlistend')
1483         # For backwards compatibility, interpret -1 as whole list
1484         if playlistend == -1:
1485             playlistend = None
1486
1487         playlistitems_str = self.params.get('playlist_items')
1488         playlistitems = None
1489         if playlistitems_str is not None:
1490             def iter_playlistitems(format):
1491                 for string_segment in format.split(','):
1492                     if '-' in string_segment:
1493                         start, end = string_segment.split('-')
1494                         for item in range(int(start), int(end) + 1):
1495                             yield int(item)
1496                     else:
1497                         yield int(string_segment)
1498             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1499
1500         ie_entries = ie_result['entries']
1501         msg = (
1502             'Downloading %d videos' if not isinstance(ie_entries, list)
1503             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1504
1505         if isinstance(ie_entries, list):
1506             def get_entry(i):
1507                 return ie_entries[i - 1]
1508         else:
1509             if not isinstance(ie_entries, PagedList):
1510                 ie_entries = LazyList(ie_entries)
1511
1512             def get_entry(i):
1513                 return YoutubeDL.__handle_extraction_exceptions(
1514                     lambda self, i: ie_entries[i - 1]
1515                 )(self, i)
1516
1517         entries = []
1518         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1519         for i in items:
1520             if i == 0:
1521                 continue
1522             if playlistitems is None and playlistend is not None and playlistend < i:
1523                 break
1524             entry = None
1525             try:
1526                 entry = get_entry(i)
1527                 if entry is None:
1528                     raise EntryNotInPlaylist()
1529             except (IndexError, EntryNotInPlaylist):
1530                 if incomplete_entries:
1531                     raise EntryNotInPlaylist()
1532                 elif not playlistitems:
1533                     break
1534             entries.append(entry)
1535             try:
1536                 if entry is not None:
1537                     self._match_entry(entry, incomplete=True, silent=True)
1538             except (ExistingVideoReached, RejectedVideoReached):
1539                 break
1540         ie_result['entries'] = entries
1541
1542         # Save playlist_index before re-ordering
1543         entries = [
1544             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1545             for i, entry in enumerate(entries, 1)
1546             if entry is not None]
1547         n_entries = len(entries)
1548
1549         if not playlistitems and (playliststart or playlistend):
1550             playlistitems = list(range(playliststart, playliststart + n_entries))
1551         ie_result['requested_entries'] = playlistitems
1552
1553         if self.params.get('allow_playlist_files', True):
1554             ie_copy = {
1555                 'playlist': playlist,
1556                 'playlist_id': ie_result.get('id'),
1557                 'playlist_title': ie_result.get('title'),
1558                 'playlist_uploader': ie_result.get('uploader'),
1559                 'playlist_uploader_id': ie_result.get('uploader_id'),
1560                 'playlist_index': 0,
1561             }
1562             ie_copy.update(dict(ie_result))
1563
1564             if self._write_info_json('playlist', ie_result,
1565                                      self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1566                 return
1567             if self._write_description('playlist', ie_result,
1568                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1569                 return
1570             # TODO: This should be passed to ThumbnailsConvertor if necessary
1571             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1572
1573         if self.params.get('playlistreverse', False):
1574             entries = entries[::-1]
1575         if self.params.get('playlistrandom', False):
1576             random.shuffle(entries)
1577
1578         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1579
1580         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1581         failures = 0
1582         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1583         for i, entry_tuple in enumerate(entries, 1):
1584             playlist_index, entry = entry_tuple
1585             if 'playlist-index' in self.params.get('compat_opts', []):
1586                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1587             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1588             # This __x_forwarded_for_ip thing is a bit ugly but requires
1589             # minimal changes
1590             if x_forwarded_for:
1591                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1592             extra = {
1593                 'n_entries': n_entries,
1594                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1595                 'playlist_index': playlist_index,
1596                 'playlist_autonumber': i,
1597                 'playlist': playlist,
1598                 'playlist_id': ie_result.get('id'),
1599                 'playlist_title': ie_result.get('title'),
1600                 'playlist_uploader': ie_result.get('uploader'),
1601                 'playlist_uploader_id': ie_result.get('uploader_id'),
1602                 'extractor': ie_result['extractor'],
1603                 'webpage_url': ie_result['webpage_url'],
1604                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1605                 'extractor_key': ie_result['extractor_key'],
1606             }
1607
1608             if self._match_entry(entry, incomplete=True) is not None:
1609                 continue
1610
1611             entry_result = self.__process_iterable_entry(entry, download, extra)
1612             if not entry_result:
1613                 failures += 1
1614             if failures >= max_failures:
1615                 self.report_error(
1616                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1617                 break
1618             # TODO: skip failed (empty) entries?
1619             playlist_results.append(entry_result)
1620         ie_result['entries'] = playlist_results
1621         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1622         return ie_result
1623
1624     @__handle_extraction_exceptions
1625     def __process_iterable_entry(self, entry, download, extra_info):
1626         return self.process_ie_result(
1627             entry, download=download, extra_info=extra_info)
1628
1629     def _build_format_filter(self, filter_spec):
1630         " Returns a function to filter the formats according to the filter_spec "
1631
1632         OPERATORS = {
1633             '<': operator.lt,
1634             '<=': operator.le,
1635             '>': operator.gt,
1636             '>=': operator.ge,
1637             '=': operator.eq,
1638             '!=': operator.ne,
1639         }
1640         operator_rex = re.compile(r'''(?x)\s*
1641             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1642             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1643             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1644             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1645         m = operator_rex.fullmatch(filter_spec)
1646         if m:
1647             try:
1648                 comparison_value = int(m.group('value'))
1649             except ValueError:
1650                 comparison_value = parse_filesize(m.group('value'))
1651                 if comparison_value is None:
1652                     comparison_value = parse_filesize(m.group('value') + 'B')
1653                 if comparison_value is None:
1654                     raise ValueError(
1655                         'Invalid value %r in format specification %r' % (
1656                             m.group('value'), filter_spec))
1657             op = OPERATORS[m.group('op')]
1658
1659         if not m:
1660             STR_OPERATORS = {
1661                 '=': operator.eq,
1662                 '^=': lambda attr, value: attr.startswith(value),
1663                 '$=': lambda attr, value: attr.endswith(value),
1664                 '*=': lambda attr, value: value in attr,
1665             }
1666             str_operator_rex = re.compile(r'''(?x)\s*
1667                 (?P<key>[a-zA-Z0-9._-]+)\s*
1668                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1669                 (?P<value>[a-zA-Z0-9._-]+)\s*
1670                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1671             m = str_operator_rex.fullmatch(filter_spec)
1672             if m:
1673                 comparison_value = m.group('value')
1674                 str_op = STR_OPERATORS[m.group('op')]
1675                 if m.group('negation'):
1676                     op = lambda attr, value: not str_op(attr, value)
1677                 else:
1678                     op = str_op
1679
1680         if not m:
1681             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1682
1683         def _filter(f):
1684             actual_value = f.get(m.group('key'))
1685             if actual_value is None:
1686                 return m.group('none_inclusive')
1687             return op(actual_value, comparison_value)
1688         return _filter
1689
1690     def _default_format_spec(self, info_dict, download=True):
1691
1692         def can_merge():
1693             merger = FFmpegMergerPP(self)
1694             return merger.available and merger.can_merge()
1695
1696         prefer_best = (
1697             not self.params.get('simulate')
1698             and download
1699             and (
1700                 not can_merge()
1701                 or info_dict.get('is_live', False)
1702                 or self.outtmpl_dict['default'] == '-'))
1703         compat = (
1704             prefer_best
1705             or self.params.get('allow_multiple_audio_streams', False)
1706             or 'format-spec' in self.params.get('compat_opts', []))
1707
1708         return (
1709             'best/bestvideo+bestaudio' if prefer_best
1710             else 'bestvideo*+bestaudio/best' if not compat
1711             else 'bestvideo+bestaudio/best')
1712
1713     def build_format_selector(self, format_spec):
1714         def syntax_error(note, start):
1715             message = (
1716                 'Invalid format specification: '
1717                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1718             return SyntaxError(message)
1719
1720         PICKFIRST = 'PICKFIRST'
1721         MERGE = 'MERGE'
1722         SINGLE = 'SINGLE'
1723         GROUP = 'GROUP'
1724         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1725
1726         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1727                                   'video': self.params.get('allow_multiple_video_streams', False)}
1728
1729         check_formats = self.params.get('check_formats')
1730
1731         def _parse_filter(tokens):
1732             filter_parts = []
1733             for type, string, start, _, _ in tokens:
1734                 if type == tokenize.OP and string == ']':
1735                     return ''.join(filter_parts)
1736                 else:
1737                     filter_parts.append(string)
1738
1739         def _remove_unused_ops(tokens):
1740             # Remove operators that we don't use and join them with the surrounding strings
1741             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1742             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1743             last_string, last_start, last_end, last_line = None, None, None, None
1744             for type, string, start, end, line in tokens:
1745                 if type == tokenize.OP and string == '[':
1746                     if last_string:
1747                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1748                         last_string = None
1749                     yield type, string, start, end, line
1750                     # everything inside brackets will be handled by _parse_filter
1751                     for type, string, start, end, line in tokens:
1752                         yield type, string, start, end, line
1753                         if type == tokenize.OP and string == ']':
1754                             break
1755                 elif type == tokenize.OP and string in ALLOWED_OPS:
1756                     if last_string:
1757                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1758                         last_string = None
1759                     yield type, string, start, end, line
1760                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1761                     if not last_string:
1762                         last_string = string
1763                         last_start = start
1764                         last_end = end
1765                     else:
1766                         last_string += string
1767             if last_string:
1768                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1769
1770         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1771             selectors = []
1772             current_selector = None
1773             for type, string, start, _, _ in tokens:
1774                 # ENCODING is only defined in python 3.x
1775                 if type == getattr(tokenize, 'ENCODING', None):
1776                     continue
1777                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1778                     current_selector = FormatSelector(SINGLE, string, [])
1779                 elif type == tokenize.OP:
1780                     if string == ')':
1781                         if not inside_group:
1782                             # ')' will be handled by the parentheses group
1783                             tokens.restore_last_token()
1784                         break
1785                     elif inside_merge and string in ['/', ',']:
1786                         tokens.restore_last_token()
1787                         break
1788                     elif inside_choice and string == ',':
1789                         tokens.restore_last_token()
1790                         break
1791                     elif string == ',':
1792                         if not current_selector:
1793                             raise syntax_error('"," must follow a format selector', start)
1794                         selectors.append(current_selector)
1795                         current_selector = None
1796                     elif string == '/':
1797                         if not current_selector:
1798                             raise syntax_error('"/" must follow a format selector', start)
1799                         first_choice = current_selector
1800                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1801                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1802                     elif string == '[':
1803                         if not current_selector:
1804                             current_selector = FormatSelector(SINGLE, 'best', [])
1805                         format_filter = _parse_filter(tokens)
1806                         current_selector.filters.append(format_filter)
1807                     elif string == '(':
1808                         if current_selector:
1809                             raise syntax_error('Unexpected "("', start)
1810                         group = _parse_format_selection(tokens, inside_group=True)
1811                         current_selector = FormatSelector(GROUP, group, [])
1812                     elif string == '+':
1813                         if not current_selector:
1814                             raise syntax_error('Unexpected "+"', start)
1815                         selector_1 = current_selector
1816                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1817                         if not selector_2:
1818                             raise syntax_error('Expected a selector', start)
1819                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1820                     else:
1821                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1822                 elif type == tokenize.ENDMARKER:
1823                     break
1824             if current_selector:
1825                 selectors.append(current_selector)
1826             return selectors
1827
1828         def _merge(formats_pair):
1829             format_1, format_2 = formats_pair
1830
1831             formats_info = []
1832             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1833             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1834
1835             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1836                 get_no_more = {'video': False, 'audio': False}
1837                 for (i, fmt_info) in enumerate(formats_info):
1838                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1839                         formats_info.pop(i)
1840                         continue
1841                     for aud_vid in ['audio', 'video']:
1842                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1843                             if get_no_more[aud_vid]:
1844                                 formats_info.pop(i)
1845                                 break
1846                             get_no_more[aud_vid] = True
1847
1848             if len(formats_info) == 1:
1849                 return formats_info[0]
1850
1851             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1852             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1853
1854             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1855             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1856
1857             output_ext = self.params.get('merge_output_format')
1858             if not output_ext:
1859                 if the_only_video:
1860                     output_ext = the_only_video['ext']
1861                 elif the_only_audio and not video_fmts:
1862                     output_ext = the_only_audio['ext']
1863                 else:
1864                     output_ext = 'mkv'
1865
1866             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1867
1868             new_dict = {
1869                 'requested_formats': formats_info,
1870                 'format': '+'.join(filtered('format')),
1871                 'format_id': '+'.join(filtered('format_id')),
1872                 'ext': output_ext,
1873                 'protocol': '+'.join(map(determine_protocol, formats_info)),
1874                 'language': '+'.join(orderedSet(filtered('language'))),
1875                 'format_note': '+'.join(orderedSet(filtered('format_note'))),
1876                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
1877                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1878             }
1879
1880             if the_only_video:
1881                 new_dict.update({
1882                     'width': the_only_video.get('width'),
1883                     'height': the_only_video.get('height'),
1884                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1885                     'fps': the_only_video.get('fps'),
1886                     'vcodec': the_only_video.get('vcodec'),
1887                     'vbr': the_only_video.get('vbr'),
1888                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1889                 })
1890
1891             if the_only_audio:
1892                 new_dict.update({
1893                     'acodec': the_only_audio.get('acodec'),
1894                     'abr': the_only_audio.get('abr'),
1895                     'asr': the_only_audio.get('asr'),
1896                 })
1897
1898             return new_dict
1899
1900         def _check_formats(formats):
1901             if not check_formats:
1902                 yield from formats
1903                 return
1904             for f in formats:
1905                 self.to_screen('[info] Testing format %s' % f['format_id'])
1906                 temp_file = tempfile.NamedTemporaryFile(
1907                     suffix='.tmp', delete=False,
1908                     dir=self.get_output_path('temp') or None)
1909                 temp_file.close()
1910                 try:
1911                     success, _ = self.dl(temp_file.name, f, test=True)
1912                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1913                     success = False
1914                 finally:
1915                     if os.path.exists(temp_file.name):
1916                         try:
1917                             os.remove(temp_file.name)
1918                         except OSError:
1919                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1920                 if success:
1921                     yield f
1922                 else:
1923                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1924
1925         def _build_selector_function(selector):
1926             if isinstance(selector, list):  # ,
1927                 fs = [_build_selector_function(s) for s in selector]
1928
1929                 def selector_function(ctx):
1930                     for f in fs:
1931                         yield from f(ctx)
1932                 return selector_function
1933
1934             elif selector.type == GROUP:  # ()
1935                 selector_function = _build_selector_function(selector.selector)
1936
1937             elif selector.type == PICKFIRST:  # /
1938                 fs = [_build_selector_function(s) for s in selector.selector]
1939
1940                 def selector_function(ctx):
1941                     for f in fs:
1942                         picked_formats = list(f(ctx))
1943                         if picked_formats:
1944                             return picked_formats
1945                     return []
1946
1947             elif selector.type == MERGE:  # +
1948                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1949
1950                 def selector_function(ctx):
1951                     for pair in itertools.product(
1952                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1953                         yield _merge(pair)
1954
1955             elif selector.type == SINGLE:  # atom
1956                 format_spec = selector.selector or 'best'
1957
1958                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1959                 if format_spec == 'all':
1960                     def selector_function(ctx):
1961                         yield from _check_formats(ctx['formats'])
1962                 elif format_spec == 'mergeall':
1963                     def selector_function(ctx):
1964                         formats = list(_check_formats(ctx['formats']))
1965                         if not formats:
1966                             return
1967                         merged_format = formats[-1]
1968                         for f in formats[-2::-1]:
1969                             merged_format = _merge((merged_format, f))
1970                         yield merged_format
1971
1972                 else:
1973                     format_fallback, format_reverse, format_idx = False, True, 1
1974                     mobj = re.match(
1975                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1976                         format_spec)
1977                     if mobj is not None:
1978                         format_idx = int_or_none(mobj.group('n'), default=1)
1979                         format_reverse = mobj.group('bw')[0] == 'b'
1980                         format_type = (mobj.group('type') or [None])[0]
1981                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1982                         format_modified = mobj.group('mod') is not None
1983
1984                         format_fallback = not format_type and not format_modified  # for b, w
1985                         _filter_f = (
1986                             (lambda f: f.get('%scodec' % format_type) != 'none')
1987                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1988                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1989                             if format_type  # bv, ba, wv, wa
1990                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1991                             if not format_modified  # b, w
1992                             else lambda f: True)  # b*, w*
1993                         filter_f = lambda f: _filter_f(f) and (
1994                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1995                     else:
1996                         if format_spec in self._format_selection_exts['audio']:
1997                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
1998                         elif format_spec in self._format_selection_exts['video']:
1999                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2000                         elif format_spec in self._format_selection_exts['storyboards']:
2001                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2002                         else:
2003                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2004
2005                     def selector_function(ctx):
2006                         formats = list(ctx['formats'])
2007                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2008                         if format_fallback and ctx['incomplete_formats'] and not matches:
2009                             # for extractors with incomplete formats (audio only (soundcloud)
2010                             # or video only (imgur)) best/worst will fallback to
2011                             # best/worst {video,audio}-only format
2012                             matches = formats
2013                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2014                         try:
2015                             yield matches[format_idx - 1]
2016                         except IndexError:
2017                             return
2018
2019             filters = [self._build_format_filter(f) for f in selector.filters]
2020
2021             def final_selector(ctx):
2022                 ctx_copy = copy.deepcopy(ctx)
2023                 for _filter in filters:
2024                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2025                 return selector_function(ctx_copy)
2026             return final_selector
2027
2028         stream = io.BytesIO(format_spec.encode('utf-8'))
2029         try:
2030             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2031         except tokenize.TokenError:
2032             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2033
2034         class TokenIterator(object):
2035             def __init__(self, tokens):
2036                 self.tokens = tokens
2037                 self.counter = 0
2038
2039             def __iter__(self):
2040                 return self
2041
2042             def __next__(self):
2043                 if self.counter >= len(self.tokens):
2044                     raise StopIteration()
2045                 value = self.tokens[self.counter]
2046                 self.counter += 1
2047                 return value
2048
2049             next = __next__
2050
2051             def restore_last_token(self):
2052                 self.counter -= 1
2053
2054         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2055         return _build_selector_function(parsed_selector)
2056
2057     def _calc_headers(self, info_dict):
2058         res = std_headers.copy()
2059
2060         add_headers = info_dict.get('http_headers')
2061         if add_headers:
2062             res.update(add_headers)
2063
2064         cookies = self._calc_cookies(info_dict)
2065         if cookies:
2066             res['Cookie'] = cookies
2067
2068         if 'X-Forwarded-For' not in res:
2069             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2070             if x_forwarded_for_ip:
2071                 res['X-Forwarded-For'] = x_forwarded_for_ip
2072
2073         return res
2074
2075     def _calc_cookies(self, info_dict):
2076         pr = sanitized_Request(info_dict['url'])
2077         self.cookiejar.add_cookie_header(pr)
2078         return pr.get_header('Cookie')
2079
2080     def _sanitize_thumbnails(self, info_dict):
2081         thumbnails = info_dict.get('thumbnails')
2082         if thumbnails is None:
2083             thumbnail = info_dict.get('thumbnail')
2084             if thumbnail:
2085                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2086         if thumbnails:
2087             thumbnails.sort(key=lambda t: (
2088                 t.get('preference') if t.get('preference') is not None else -1,
2089                 t.get('width') if t.get('width') is not None else -1,
2090                 t.get('height') if t.get('height') is not None else -1,
2091                 t.get('id') if t.get('id') is not None else '',
2092                 t.get('url')))
2093
2094             def thumbnail_tester():
2095                 def test_thumbnail(t):
2096                     self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2097                     try:
2098                         self.urlopen(HEADRequest(t['url']))
2099                     except network_exceptions as err:
2100                         self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2101                         return False
2102                     return True
2103                 return test_thumbnail
2104
2105             for i, t in enumerate(thumbnails):
2106                 if t.get('id') is None:
2107                     t['id'] = '%d' % i
2108                 if t.get('width') and t.get('height'):
2109                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
2110                 t['url'] = sanitize_url(t['url'])
2111
2112             if self.params.get('check_formats'):
2113                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2114             else:
2115                 info_dict['thumbnails'] = thumbnails
2116
2117     def process_video_result(self, info_dict, download=True):
2118         assert info_dict.get('_type', 'video') == 'video'
2119
2120         if 'id' not in info_dict:
2121             raise ExtractorError('Missing "id" field in extractor result')
2122         if 'title' not in info_dict:
2123             raise ExtractorError('Missing "title" field in extractor result',
2124                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2125
2126         def report_force_conversion(field, field_not, conversion):
2127             self.report_warning(
2128                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2129                 % (field, field_not, conversion))
2130
2131         def sanitize_string_field(info, string_field):
2132             field = info.get(string_field)
2133             if field is None or isinstance(field, compat_str):
2134                 return
2135             report_force_conversion(string_field, 'a string', 'string')
2136             info[string_field] = compat_str(field)
2137
2138         def sanitize_numeric_fields(info):
2139             for numeric_field in self._NUMERIC_FIELDS:
2140                 field = info.get(numeric_field)
2141                 if field is None or isinstance(field, compat_numeric_types):
2142                     continue
2143                 report_force_conversion(numeric_field, 'numeric', 'int')
2144                 info[numeric_field] = int_or_none(field)
2145
2146         sanitize_string_field(info_dict, 'id')
2147         sanitize_numeric_fields(info_dict)
2148
2149         if 'playlist' not in info_dict:
2150             # It isn't part of a playlist
2151             info_dict['playlist'] = None
2152             info_dict['playlist_index'] = None
2153
2154         self._sanitize_thumbnails(info_dict)
2155
2156         thumbnail = info_dict.get('thumbnail')
2157         thumbnails = info_dict.get('thumbnails')
2158         if thumbnail:
2159             info_dict['thumbnail'] = sanitize_url(thumbnail)
2160         elif thumbnails:
2161             info_dict['thumbnail'] = thumbnails[-1]['url']
2162
2163         if info_dict.get('display_id') is None and 'id' in info_dict:
2164             info_dict['display_id'] = info_dict['id']
2165
2166         if info_dict.get('duration') is not None:
2167             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2168
2169         for ts_key, date_key in (
2170                 ('timestamp', 'upload_date'),
2171                 ('release_timestamp', 'release_date'),
2172         ):
2173             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2174                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2175                 # see http://bugs.python.org/issue1646728)
2176                 try:
2177                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2178                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2179                 except (ValueError, OverflowError, OSError):
2180                     pass
2181
2182         live_keys = ('is_live', 'was_live')
2183         live_status = info_dict.get('live_status')
2184         if live_status is None:
2185             for key in live_keys:
2186                 if info_dict.get(key) is False:
2187                     continue
2188                 if info_dict.get(key):
2189                     live_status = key
2190                 break
2191             if all(info_dict.get(key) is False for key in live_keys):
2192                 live_status = 'not_live'
2193         if live_status:
2194             info_dict['live_status'] = live_status
2195             for key in live_keys:
2196                 if info_dict.get(key) is None:
2197                     info_dict[key] = (live_status == key)
2198
2199         # Auto generate title fields corresponding to the *_number fields when missing
2200         # in order to always have clean titles. This is very common for TV series.
2201         for field in ('chapter', 'season', 'episode'):
2202             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2203                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2204
2205         for cc_kind in ('subtitles', 'automatic_captions'):
2206             cc = info_dict.get(cc_kind)
2207             if cc:
2208                 for _, subtitle in cc.items():
2209                     for subtitle_format in subtitle:
2210                         if subtitle_format.get('url'):
2211                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2212                         if subtitle_format.get('ext') is None:
2213                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2214
2215         automatic_captions = info_dict.get('automatic_captions')
2216         subtitles = info_dict.get('subtitles')
2217
2218         info_dict['requested_subtitles'] = self.process_subtitles(
2219             info_dict['id'], subtitles, automatic_captions)
2220
2221         # We now pick which formats have to be downloaded
2222         if info_dict.get('formats') is None:
2223             # There's only one format available
2224             formats = [info_dict]
2225         else:
2226             formats = info_dict['formats']
2227
2228         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2229         if not self.params.get('allow_unplayable_formats'):
2230             formats = [f for f in formats if not f.get('has_drm')]
2231
2232         if not formats:
2233             self.raise_no_formats(info_dict)
2234
2235         def is_wellformed(f):
2236             url = f.get('url')
2237             if not url:
2238                 self.report_warning(
2239                     '"url" field is missing or empty - skipping format, '
2240                     'there is an error in extractor')
2241                 return False
2242             if isinstance(url, bytes):
2243                 sanitize_string_field(f, 'url')
2244             return True
2245
2246         # Filter out malformed formats for better extraction robustness
2247         formats = list(filter(is_wellformed, formats))
2248
2249         formats_dict = {}
2250
2251         # We check that all the formats have the format and format_id fields
2252         for i, format in enumerate(formats):
2253             sanitize_string_field(format, 'format_id')
2254             sanitize_numeric_fields(format)
2255             format['url'] = sanitize_url(format['url'])
2256             if not format.get('format_id'):
2257                 format['format_id'] = compat_str(i)
2258             else:
2259                 # Sanitize format_id from characters used in format selector expression
2260                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2261             format_id = format['format_id']
2262             if format_id not in formats_dict:
2263                 formats_dict[format_id] = []
2264             formats_dict[format_id].append(format)
2265
2266         # Make sure all formats have unique format_id
2267         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2268         for format_id, ambiguous_formats in formats_dict.items():
2269             ambigious_id = len(ambiguous_formats) > 1
2270             for i, format in enumerate(ambiguous_formats):
2271                 if ambigious_id:
2272                     format['format_id'] = '%s-%d' % (format_id, i)
2273                 if format.get('ext') is None:
2274                     format['ext'] = determine_ext(format['url']).lower()
2275                 # Ensure there is no conflict between id and ext in format selection
2276                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2277                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2278                     format['format_id'] = 'f%s' % format['format_id']
2279
2280         for i, format in enumerate(formats):
2281             if format.get('format') is None:
2282                 format['format'] = '{id} - {res}{note}'.format(
2283                     id=format['format_id'],
2284                     res=self.format_resolution(format),
2285                     note=format_field(format, 'format_note', ' (%s)'),
2286                 )
2287             if format.get('protocol') is None:
2288                 format['protocol'] = determine_protocol(format)
2289             if format.get('resolution') is None:
2290                 format['resolution'] = self.format_resolution(format, default=None)
2291             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2292                 format['dynamic_range'] = 'SDR'
2293             # Add HTTP headers, so that external programs can use them from the
2294             # json output
2295             full_format_info = info_dict.copy()
2296             full_format_info.update(format)
2297             format['http_headers'] = self._calc_headers(full_format_info)
2298         # Remove private housekeeping stuff
2299         if '__x_forwarded_for_ip' in info_dict:
2300             del info_dict['__x_forwarded_for_ip']
2301
2302         # TODO Central sorting goes here
2303
2304         if not formats or formats[0] is not info_dict:
2305             # only set the 'formats' fields if the original info_dict list them
2306             # otherwise we end up with a circular reference, the first (and unique)
2307             # element in the 'formats' field in info_dict is info_dict itself,
2308             # which can't be exported to json
2309             info_dict['formats'] = formats
2310
2311         info_dict, _ = self.pre_process(info_dict)
2312
2313         if self.params.get('list_thumbnails'):
2314             self.list_thumbnails(info_dict)
2315         if self.params.get('listformats'):
2316             if not info_dict.get('formats') and not info_dict.get('url'):
2317                 self.to_screen('%s has no formats' % info_dict['id'])
2318             else:
2319                 self.list_formats(info_dict)
2320         if self.params.get('listsubtitles'):
2321             if 'automatic_captions' in info_dict:
2322                 self.list_subtitles(
2323                     info_dict['id'], automatic_captions, 'automatic captions')
2324             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2325         list_only = self.params.get('simulate') is None and (
2326             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2327         if list_only:
2328             # Without this printing, -F --print-json will not work
2329             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2330             return
2331
2332         format_selector = self.format_selector
2333         if format_selector is None:
2334             req_format = self._default_format_spec(info_dict, download=download)
2335             self.write_debug('Default format spec: %s' % req_format)
2336             format_selector = self.build_format_selector(req_format)
2337
2338         # While in format selection we may need to have an access to the original
2339         # format set in order to calculate some metrics or do some processing.
2340         # For now we need to be able to guess whether original formats provided
2341         # by extractor are incomplete or not (i.e. whether extractor provides only
2342         # video-only or audio-only formats) for proper formats selection for
2343         # extractors with such incomplete formats (see
2344         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2345         # Since formats may be filtered during format selection and may not match
2346         # the original formats the results may be incorrect. Thus original formats
2347         # or pre-calculated metrics should be passed to format selection routines
2348         # as well.
2349         # We will pass a context object containing all necessary additional data
2350         # instead of just formats.
2351         # This fixes incorrect format selection issue (see
2352         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2353         incomplete_formats = (
2354             # All formats are video-only or
2355             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2356             # all formats are audio-only
2357             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2358
2359         ctx = {
2360             'formats': formats,
2361             'incomplete_formats': incomplete_formats,
2362         }
2363
2364         formats_to_download = list(format_selector(ctx))
2365         if not formats_to_download:
2366             if not self.params.get('ignore_no_formats_error'):
2367                 raise ExtractorError('Requested format is not available', expected=True,
2368                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2369             else:
2370                 self.report_warning('Requested format is not available')
2371                 # Process what we can, even without any available formats.
2372                 self.process_info(dict(info_dict))
2373         elif download:
2374             self.to_screen(
2375                 '[info] %s: Downloading %d format(s): %s' % (
2376                     info_dict['id'], len(formats_to_download),
2377                     ", ".join([f['format_id'] for f in formats_to_download])))
2378             for fmt in formats_to_download:
2379                 new_info = dict(info_dict)
2380                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2381                 new_info['__original_infodict'] = info_dict
2382                 new_info.update(fmt)
2383                 self.process_info(new_info)
2384         # We update the info dict with the best quality format (backwards compatibility)
2385         if formats_to_download:
2386             info_dict.update(formats_to_download[-1])
2387         return info_dict
2388
2389     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2390         """Select the requested subtitles and their format"""
2391         available_subs = {}
2392         if normal_subtitles and self.params.get('writesubtitles'):
2393             available_subs.update(normal_subtitles)
2394         if automatic_captions and self.params.get('writeautomaticsub'):
2395             for lang, cap_info in automatic_captions.items():
2396                 if lang not in available_subs:
2397                     available_subs[lang] = cap_info
2398
2399         if (not self.params.get('writesubtitles') and not
2400                 self.params.get('writeautomaticsub') or not
2401                 available_subs):
2402             return None
2403
2404         all_sub_langs = available_subs.keys()
2405         if self.params.get('allsubtitles', False):
2406             requested_langs = all_sub_langs
2407         elif self.params.get('subtitleslangs', False):
2408             # A list is used so that the order of languages will be the same as
2409             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2410             requested_langs = []
2411             for lang_re in self.params.get('subtitleslangs'):
2412                 if lang_re == 'all':
2413                     requested_langs.extend(all_sub_langs)
2414                     continue
2415                 discard = lang_re[0] == '-'
2416                 if discard:
2417                     lang_re = lang_re[1:]
2418                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2419                 if discard:
2420                     for lang in current_langs:
2421                         while lang in requested_langs:
2422                             requested_langs.remove(lang)
2423                 else:
2424                     requested_langs.extend(current_langs)
2425             requested_langs = orderedSet(requested_langs)
2426         elif 'en' in available_subs:
2427             requested_langs = ['en']
2428         else:
2429             requested_langs = [list(all_sub_langs)[0]]
2430         if requested_langs:
2431             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2432
2433         formats_query = self.params.get('subtitlesformat', 'best')
2434         formats_preference = formats_query.split('/') if formats_query else []
2435         subs = {}
2436         for lang in requested_langs:
2437             formats = available_subs.get(lang)
2438             if formats is None:
2439                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2440                 continue
2441             for ext in formats_preference:
2442                 if ext == 'best':
2443                     f = formats[-1]
2444                     break
2445                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2446                 if matches:
2447                     f = matches[-1]
2448                     break
2449             else:
2450                 f = formats[-1]
2451                 self.report_warning(
2452                     'No subtitle format found matching "%s" for language %s, '
2453                     'using %s' % (formats_query, lang, f['ext']))
2454             subs[lang] = f
2455         return subs
2456
2457     def __forced_printings(self, info_dict, filename, incomplete):
2458         def print_mandatory(field, actual_field=None):
2459             if actual_field is None:
2460                 actual_field = field
2461             if (self.params.get('force%s' % field, False)
2462                     and (not incomplete or info_dict.get(actual_field) is not None)):
2463                 self.to_stdout(info_dict[actual_field])
2464
2465         def print_optional(field):
2466             if (self.params.get('force%s' % field, False)
2467                     and info_dict.get(field) is not None):
2468                 self.to_stdout(info_dict[field])
2469
2470         info_dict = info_dict.copy()
2471         if filename is not None:
2472             info_dict['filename'] = filename
2473         if info_dict.get('requested_formats') is not None:
2474             # For RTMP URLs, also include the playpath
2475             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2476         elif 'url' in info_dict:
2477             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2478
2479         if self.params.get('forceprint') or self.params.get('forcejson'):
2480             self.post_extract(info_dict)
2481         for tmpl in self.params.get('forceprint', []):
2482             mobj = re.match(r'\w+(=?)$', tmpl)
2483             if mobj and mobj.group(1):
2484                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2485             elif mobj:
2486                 tmpl = '%({})s'.format(tmpl)
2487             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2488
2489         print_mandatory('title')
2490         print_mandatory('id')
2491         print_mandatory('url', 'urls')
2492         print_optional('thumbnail')
2493         print_optional('description')
2494         print_optional('filename')
2495         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2496             self.to_stdout(formatSeconds(info_dict['duration']))
2497         print_mandatory('format')
2498
2499         if self.params.get('forcejson'):
2500             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2501
2502     def dl(self, name, info, subtitle=False, test=False):
2503         if not info.get('url'):
2504             self.raise_no_formats(info, True)
2505
2506         if test:
2507             verbose = self.params.get('verbose')
2508             params = {
2509                 'test': True,
2510                 'quiet': self.params.get('quiet') or not verbose,
2511                 'verbose': verbose,
2512                 'noprogress': not verbose,
2513                 'nopart': True,
2514                 'skip_unavailable_fragments': False,
2515                 'keep_fragments': False,
2516                 'overwrites': True,
2517                 '_no_ytdl_file': True,
2518             }
2519         else:
2520             params = self.params
2521         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2522         if not test:
2523             for ph in self._progress_hooks:
2524                 fd.add_progress_hook(ph)
2525             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2526             self.write_debug('Invoking downloader on "%s"' % urls)
2527
2528         new_info = copy.deepcopy(self._copy_infodict(info))
2529         if new_info.get('http_headers') is None:
2530             new_info['http_headers'] = self._calc_headers(new_info)
2531         return fd.download(name, new_info, subtitle)
2532
2533     def process_info(self, info_dict):
2534         """Process a single resolved IE result."""
2535
2536         assert info_dict.get('_type', 'video') == 'video'
2537
2538         max_downloads = self.params.get('max_downloads')
2539         if max_downloads is not None:
2540             if self._num_downloads >= int(max_downloads):
2541                 raise MaxDownloadsReached()
2542
2543         # TODO: backward compatibility, to be removed
2544         info_dict['fulltitle'] = info_dict['title']
2545
2546         if 'format' not in info_dict and 'ext' in info_dict:
2547             info_dict['format'] = info_dict['ext']
2548
2549         if self._match_entry(info_dict) is not None:
2550             return
2551
2552         self.post_extract(info_dict)
2553         self._num_downloads += 1
2554
2555         # info_dict['_filename'] needs to be set for backward compatibility
2556         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2557         temp_filename = self.prepare_filename(info_dict, 'temp')
2558         files_to_move = {}
2559
2560         # Forced printings
2561         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2562
2563         if self.params.get('simulate'):
2564             if self.params.get('force_write_download_archive', False):
2565                 self.record_download_archive(info_dict)
2566             # Do nothing else if in simulate mode
2567             return
2568
2569         if full_filename is None:
2570             return
2571         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2572             return
2573         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2574             return
2575
2576         if self._write_description('video', info_dict,
2577                                    self.prepare_filename(info_dict, 'description')) is None:
2578             return
2579
2580         sub_files = self._write_subtitles(info_dict, temp_filename)
2581         if sub_files is None:
2582             return
2583         files_to_move.update(dict(sub_files))
2584
2585         thumb_files = self._write_thumbnails(
2586             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2587         if thumb_files is None:
2588             return
2589         files_to_move.update(dict(thumb_files))
2590
2591         infofn = self.prepare_filename(info_dict, 'infojson')
2592         _infojson_written = self._write_info_json('video', info_dict, infofn)
2593         if _infojson_written:
2594             info_dict['__infojson_filename'] = infofn
2595         elif _infojson_written is None:
2596             return
2597
2598         # Note: Annotations are deprecated
2599         annofn = None
2600         if self.params.get('writeannotations', False):
2601             annofn = self.prepare_filename(info_dict, 'annotation')
2602         if annofn:
2603             if not self._ensure_dir_exists(encodeFilename(annofn)):
2604                 return
2605             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2606                 self.to_screen('[info] Video annotations are already present')
2607             elif not info_dict.get('annotations'):
2608                 self.report_warning('There are no annotations to write.')
2609             else:
2610                 try:
2611                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2612                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2613                         annofile.write(info_dict['annotations'])
2614                 except (KeyError, TypeError):
2615                     self.report_warning('There are no annotations to write.')
2616                 except (OSError, IOError):
2617                     self.report_error('Cannot write annotations file: ' + annofn)
2618                     return
2619
2620         # Write internet shortcut files
2621         url_link = webloc_link = desktop_link = False
2622         if self.params.get('writelink', False):
2623             if sys.platform == "darwin":  # macOS.
2624                 webloc_link = True
2625             elif sys.platform.startswith("linux"):
2626                 desktop_link = True
2627             else:  # if sys.platform in ['win32', 'cygwin']:
2628                 url_link = True
2629         if self.params.get('writeurllink', False):
2630             url_link = True
2631         if self.params.get('writewebloclink', False):
2632             webloc_link = True
2633         if self.params.get('writedesktoplink', False):
2634             desktop_link = True
2635
2636         if url_link or webloc_link or desktop_link:
2637             if 'webpage_url' not in info_dict:
2638                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2639                 return
2640             ascii_url = iri_to_uri(info_dict['webpage_url'])
2641
2642         def _write_link_file(extension, template, newline, embed_filename):
2643             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2644             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2645                 self.to_screen('[info] Internet shortcut is already present')
2646             else:
2647                 try:
2648                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2649                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2650                         template_vars = {'url': ascii_url}
2651                         if embed_filename:
2652                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2653                         linkfile.write(template % template_vars)
2654                 except (OSError, IOError):
2655                     self.report_error('Cannot write internet shortcut ' + linkfn)
2656                     return False
2657             return True
2658
2659         if url_link:
2660             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2661                 return
2662         if webloc_link:
2663             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2664                 return
2665         if desktop_link:
2666             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2667                 return
2668
2669         try:
2670             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2671         except PostProcessingError as err:
2672             self.report_error('Preprocessing: %s' % str(err))
2673             return
2674
2675         must_record_download_archive = False
2676         if self.params.get('skip_download', False):
2677             info_dict['filepath'] = temp_filename
2678             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2679             info_dict['__files_to_move'] = files_to_move
2680             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2681         else:
2682             # Download
2683             info_dict.setdefault('__postprocessors', [])
2684             try:
2685
2686                 def existing_file(*filepaths):
2687                     ext = info_dict.get('ext')
2688                     final_ext = self.params.get('final_ext', ext)
2689                     existing_files = []
2690                     for file in orderedSet(filepaths):
2691                         if final_ext != ext:
2692                             converted = replace_extension(file, final_ext, ext)
2693                             if os.path.exists(encodeFilename(converted)):
2694                                 existing_files.append(converted)
2695                         if os.path.exists(encodeFilename(file)):
2696                             existing_files.append(file)
2697
2698                     if not existing_files or self.params.get('overwrites', False):
2699                         for file in orderedSet(existing_files):
2700                             self.report_file_delete(file)
2701                             os.remove(encodeFilename(file))
2702                         return None
2703
2704                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2705                     return existing_files[0]
2706
2707                 success = True
2708                 if info_dict.get('requested_formats') is not None:
2709
2710                     def compatible_formats(formats):
2711                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2712                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2713                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2714                         if len(video_formats) > 2 or len(audio_formats) > 2:
2715                             return False
2716
2717                         # Check extension
2718                         exts = set(format.get('ext') for format in formats)
2719                         COMPATIBLE_EXTS = (
2720                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2721                             set(('webm',)),
2722                         )
2723                         for ext_sets in COMPATIBLE_EXTS:
2724                             if ext_sets.issuperset(exts):
2725                                 return True
2726                         # TODO: Check acodec/vcodec
2727                         return False
2728
2729                     requested_formats = info_dict['requested_formats']
2730                     old_ext = info_dict['ext']
2731                     if self.params.get('merge_output_format') is None:
2732                         if not compatible_formats(requested_formats):
2733                             info_dict['ext'] = 'mkv'
2734                             self.report_warning(
2735                                 'Requested formats are incompatible for merge and will be merged into mkv')
2736                         if (info_dict['ext'] == 'webm'
2737                                 and info_dict.get('thumbnails')
2738                                 # check with type instead of pp_key, __name__, or isinstance
2739                                 # since we dont want any custom PPs to trigger this
2740                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2741                             info_dict['ext'] = 'mkv'
2742                             self.report_warning(
2743                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2744                     new_ext = info_dict['ext']
2745
2746                     def correct_ext(filename, ext=new_ext):
2747                         if filename == '-':
2748                             return filename
2749                         filename_real_ext = os.path.splitext(filename)[1][1:]
2750                         filename_wo_ext = (
2751                             os.path.splitext(filename)[0]
2752                             if filename_real_ext in (old_ext, new_ext)
2753                             else filename)
2754                         return '%s.%s' % (filename_wo_ext, ext)
2755
2756                     # Ensure filename always has a correct extension for successful merge
2757                     full_filename = correct_ext(full_filename)
2758                     temp_filename = correct_ext(temp_filename)
2759                     dl_filename = existing_file(full_filename, temp_filename)
2760                     info_dict['__real_download'] = False
2761
2762                     if dl_filename is not None:
2763                         self.report_file_already_downloaded(dl_filename)
2764                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2765                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2766                         success, real_download = self.dl(temp_filename, info_dict)
2767                         info_dict['__real_download'] = real_download
2768                     else:
2769                         downloaded = []
2770                         merger = FFmpegMergerPP(self)
2771                         if self.params.get('allow_unplayable_formats'):
2772                             self.report_warning(
2773                                 'You have requested merging of multiple formats '
2774                                 'while also allowing unplayable formats to be downloaded. '
2775                                 'The formats won\'t be merged to prevent data corruption.')
2776                         elif not merger.available:
2777                             self.report_warning(
2778                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2779                                 'The formats won\'t be merged.')
2780
2781                         if temp_filename == '-':
2782                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2783                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2784                                       else 'but ffmpeg is not installed')
2785                             self.report_warning(
2786                                 f'You have requested downloading multiple formats to stdout {reason}. '
2787                                 'The formats will be streamed one after the other')
2788                             fname = temp_filename
2789                         for f in requested_formats:
2790                             new_info = dict(info_dict)
2791                             del new_info['requested_formats']
2792                             new_info.update(f)
2793                             if temp_filename != '-':
2794                                 fname = prepend_extension(
2795                                     correct_ext(temp_filename, new_info['ext']),
2796                                     'f%s' % f['format_id'], new_info['ext'])
2797                                 if not self._ensure_dir_exists(fname):
2798                                     return
2799                                 f['filepath'] = fname
2800                                 downloaded.append(fname)
2801                             partial_success, real_download = self.dl(fname, new_info)
2802                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2803                             success = success and partial_success
2804                         if merger.available and not self.params.get('allow_unplayable_formats'):
2805                             info_dict['__postprocessors'].append(merger)
2806                             info_dict['__files_to_merge'] = downloaded
2807                             # Even if there were no downloads, it is being merged only now
2808                             info_dict['__real_download'] = True
2809                         else:
2810                             for file in downloaded:
2811                                 files_to_move[file] = None
2812                 else:
2813                     # Just a single file
2814                     dl_filename = existing_file(full_filename, temp_filename)
2815                     if dl_filename is None or dl_filename == temp_filename:
2816                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2817                         # So we should try to resume the download
2818                         success, real_download = self.dl(temp_filename, info_dict)
2819                         info_dict['__real_download'] = real_download
2820                     else:
2821                         self.report_file_already_downloaded(dl_filename)
2822
2823                 dl_filename = dl_filename or temp_filename
2824                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2825
2826             except network_exceptions as err:
2827                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2828                 return
2829             except (OSError, IOError) as err:
2830                 raise UnavailableVideoError(err)
2831             except (ContentTooShortError, ) as err:
2832                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2833                 return
2834
2835             if success and full_filename != '-':
2836
2837                 def fixup():
2838                     do_fixup = True
2839                     fixup_policy = self.params.get('fixup')
2840                     vid = info_dict['id']
2841
2842                     if fixup_policy in ('ignore', 'never'):
2843                         return
2844                     elif fixup_policy == 'warn':
2845                         do_fixup = False
2846                     elif fixup_policy != 'force':
2847                         assert fixup_policy in ('detect_or_warn', None)
2848                         if not info_dict.get('__real_download'):
2849                             do_fixup = False
2850
2851                     def ffmpeg_fixup(cndn, msg, cls):
2852                         if not cndn:
2853                             return
2854                         if not do_fixup:
2855                             self.report_warning(f'{vid}: {msg}')
2856                             return
2857                         pp = cls(self)
2858                         if pp.available:
2859                             info_dict['__postprocessors'].append(pp)
2860                         else:
2861                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2862
2863                     stretched_ratio = info_dict.get('stretched_ratio')
2864                     ffmpeg_fixup(
2865                         stretched_ratio not in (1, None),
2866                         f'Non-uniform pixel ratio {stretched_ratio}',
2867                         FFmpegFixupStretchedPP)
2868
2869                     ffmpeg_fixup(
2870                         (info_dict.get('requested_formats') is None
2871                          and info_dict.get('container') == 'm4a_dash'
2872                          and info_dict.get('ext') == 'm4a'),
2873                         'writing DASH m4a. Only some players support this container',
2874                         FFmpegFixupM4aPP)
2875
2876                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2877                     downloader = downloader.__name__ if downloader else None
2878                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2879                                  'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2880                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2881                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2882
2883                 fixup()
2884                 try:
2885                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2886                 except PostProcessingError as err:
2887                     self.report_error('Postprocessing: %s' % str(err))
2888                     return
2889                 try:
2890                     for ph in self._post_hooks:
2891                         ph(info_dict['filepath'])
2892                 except Exception as err:
2893                     self.report_error('post hooks: %s' % str(err))
2894                     return
2895                 must_record_download_archive = True
2896
2897         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2898             self.record_download_archive(info_dict)
2899         max_downloads = self.params.get('max_downloads')
2900         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2901             raise MaxDownloadsReached()
2902
2903     def download(self, url_list):
2904         """Download a given list of URLs."""
2905         outtmpl = self.outtmpl_dict['default']
2906         if (len(url_list) > 1
2907                 and outtmpl != '-'
2908                 and '%' not in outtmpl
2909                 and self.params.get('max_downloads') != 1):
2910             raise SameFileError(outtmpl)
2911
2912         for url in url_list:
2913             try:
2914                 # It also downloads the videos
2915                 res = self.extract_info(
2916                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2917             except UnavailableVideoError:
2918                 self.report_error('unable to download video')
2919             except MaxDownloadsReached:
2920                 self.to_screen('[info] Maximum number of downloads reached')
2921                 raise
2922             except ExistingVideoReached:
2923                 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2924                 raise
2925             except RejectedVideoReached:
2926                 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2927                 raise
2928             else:
2929                 if self.params.get('dump_single_json', False):
2930                     self.post_extract(res)
2931                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2932
2933         return self._download_retcode
2934
2935     def download_with_info_file(self, info_filename):
2936         with contextlib.closing(fileinput.FileInput(
2937                 [info_filename], mode='r',
2938                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2939             # FileInput doesn't have a read method, we can't call json.load
2940             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2941         try:
2942             self.process_ie_result(info, download=True)
2943         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2944             webpage_url = info.get('webpage_url')
2945             if webpage_url is not None:
2946                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2947                 return self.download([webpage_url])
2948             else:
2949                 raise
2950         return self._download_retcode
2951
2952     @staticmethod
2953     def sanitize_info(info_dict, remove_private_keys=False):
2954         ''' Sanitize the infodict for converting to json '''
2955         if info_dict is None:
2956             return info_dict
2957         info_dict.setdefault('epoch', int(time.time()))
2958         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
2959         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2960         if remove_private_keys:
2961             remove_keys |= {
2962                 'requested_formats', 'requested_subtitles', 'requested_entries',
2963                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2964             }
2965             empty_values = (None, {}, [], set(), tuple())
2966             reject = lambda k, v: k not in keep_keys and (
2967                 k.startswith('_') or k in remove_keys or v in empty_values)
2968         else:
2969             reject = lambda k, v: k in remove_keys
2970         filter_fn = lambda obj: (
2971             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2972             else obj if not isinstance(obj, dict)
2973             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2974         return filter_fn(info_dict)
2975
2976     @staticmethod
2977     def filter_requested_info(info_dict, actually_filter=True):
2978         ''' Alias of sanitize_info for backward compatibility '''
2979         return YoutubeDL.sanitize_info(info_dict, actually_filter)
2980
2981     def run_pp(self, pp, infodict):
2982         files_to_delete = []
2983         if '__files_to_move' not in infodict:
2984             infodict['__files_to_move'] = {}
2985         try:
2986             files_to_delete, infodict = pp.run(infodict)
2987         except PostProcessingError as e:
2988             # Must be True and not 'only_download'
2989             if self.params.get('ignoreerrors') is True:
2990                 self.report_error(e)
2991                 return infodict
2992             raise
2993
2994         if not files_to_delete:
2995             return infodict
2996         if self.params.get('keepvideo', False):
2997             for f in files_to_delete:
2998                 infodict['__files_to_move'].setdefault(f, '')
2999         else:
3000             for old_filename in set(files_to_delete):
3001                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3002                 try:
3003                     os.remove(encodeFilename(old_filename))
3004                 except (IOError, OSError):
3005                     self.report_warning('Unable to remove downloaded original file')
3006                 if old_filename in infodict['__files_to_move']:
3007                     del infodict['__files_to_move'][old_filename]
3008         return infodict
3009
3010     @staticmethod
3011     def post_extract(info_dict):
3012         def actual_post_extract(info_dict):
3013             if info_dict.get('_type') in ('playlist', 'multi_video'):
3014                 for video_dict in info_dict.get('entries', {}):
3015                     actual_post_extract(video_dict or {})
3016                 return
3017
3018             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3019             extra = post_extractor().items()
3020             info_dict.update(extra)
3021             info_dict.pop('__post_extractor', None)
3022
3023             original_infodict = info_dict.get('__original_infodict') or {}
3024             original_infodict.update(extra)
3025             original_infodict.pop('__post_extractor', None)
3026
3027         actual_post_extract(info_dict or {})
3028
3029     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3030         info = dict(ie_info)
3031         info['__files_to_move'] = files_to_move or {}
3032         for pp in self._pps[key]:
3033             info = self.run_pp(pp, info)
3034         return info, info.pop('__files_to_move', None)
3035
3036     def post_process(self, filename, ie_info, files_to_move=None):
3037         """Run all the postprocessors on the given file."""
3038         info = dict(ie_info)
3039         info['filepath'] = filename
3040         info['__files_to_move'] = files_to_move or {}
3041
3042         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3043             info = self.run_pp(pp, info)
3044         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3045         del info['__files_to_move']
3046         for pp in self._pps['after_move']:
3047             info = self.run_pp(pp, info)
3048         return info
3049
3050     def _make_archive_id(self, info_dict):
3051         video_id = info_dict.get('id')
3052         if not video_id:
3053             return
3054         # Future-proof against any change in case
3055         # and backwards compatibility with prior versions
3056         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3057         if extractor is None:
3058             url = str_or_none(info_dict.get('url'))
3059             if not url:
3060                 return
3061             # Try to find matching extractor for the URL and take its ie_key
3062             for ie_key, ie in self._ies.items():
3063                 if ie.suitable(url):
3064                     extractor = ie_key
3065                     break
3066             else:
3067                 return
3068         return '%s %s' % (extractor.lower(), video_id)
3069
3070     def in_download_archive(self, info_dict):
3071         fn = self.params.get('download_archive')
3072         if fn is None:
3073             return False
3074
3075         vid_id = self._make_archive_id(info_dict)
3076         if not vid_id:
3077             return False  # Incomplete video information
3078
3079         return vid_id in self.archive
3080
3081     def record_download_archive(self, info_dict):
3082         fn = self.params.get('download_archive')
3083         if fn is None:
3084             return
3085         vid_id = self._make_archive_id(info_dict)
3086         assert vid_id
3087         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3088             archive_file.write(vid_id + '\n')
3089         self.archive.add(vid_id)
3090
3091     @staticmethod
3092     def format_resolution(format, default='unknown'):
3093         is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
3094         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3095             return 'audio only'
3096         if format.get('resolution') is not None:
3097             return format['resolution']
3098         if format.get('width') and format.get('height'):
3099             res = '%dx%d' % (format['width'], format['height'])
3100         elif format.get('height'):
3101             res = '%sp' % format['height']
3102         elif format.get('width'):
3103             res = '%dx?' % format['width']
3104         elif is_images:
3105             return 'images'
3106         else:
3107             return default
3108         return f'{res} images' if is_images else res
3109
3110     def _format_note(self, fdict):
3111         res = ''
3112         if fdict.get('ext') in ['f4f', 'f4m']:
3113             res += '(unsupported) '
3114         if fdict.get('language'):
3115             if res:
3116                 res += ' '
3117             res += '[%s] ' % fdict['language']
3118         if fdict.get('format_note') is not None:
3119             res += fdict['format_note'] + ' '
3120         if fdict.get('tbr') is not None:
3121             res += '%4dk ' % fdict['tbr']
3122         if fdict.get('container') is not None:
3123             if res:
3124                 res += ', '
3125             res += '%s container' % fdict['container']
3126         if (fdict.get('vcodec') is not None
3127                 and fdict.get('vcodec') != 'none'):
3128             if res:
3129                 res += ', '
3130             res += fdict['vcodec']
3131             if fdict.get('vbr') is not None:
3132                 res += '@'
3133         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3134             res += 'video@'
3135         if fdict.get('vbr') is not None:
3136             res += '%4dk' % fdict['vbr']
3137         if fdict.get('fps') is not None:
3138             if res:
3139                 res += ', '
3140             res += '%sfps' % fdict['fps']
3141         if fdict.get('acodec') is not None:
3142             if res:
3143                 res += ', '
3144             if fdict['acodec'] == 'none':
3145                 res += 'video only'
3146             else:
3147                 res += '%-5s' % fdict['acodec']
3148         elif fdict.get('abr') is not None:
3149             if res:
3150                 res += ', '
3151             res += 'audio'
3152         if fdict.get('abr') is not None:
3153             res += '@%3dk' % fdict['abr']
3154         if fdict.get('asr') is not None:
3155             res += ' (%5dHz)' % fdict['asr']
3156         if fdict.get('filesize') is not None:
3157             if res:
3158                 res += ', '
3159             res += format_bytes(fdict['filesize'])
3160         elif fdict.get('filesize_approx') is not None:
3161             if res:
3162                 res += ', '
3163             res += '~' + format_bytes(fdict['filesize_approx'])
3164         return res
3165
3166     def list_formats(self, info_dict):
3167         formats = info_dict.get('formats', [info_dict])
3168         new_format = (
3169             'list-formats' not in self.params.get('compat_opts', [])
3170             and self.params.get('listformats_table', True) is not False)
3171         if new_format:
3172             table = [
3173                 [
3174                     format_field(f, 'format_id'),
3175                     format_field(f, 'ext'),
3176                     self.format_resolution(f),
3177                     format_field(f, 'fps', '%d'),
3178                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3179                     '|',
3180                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3181                     format_field(f, 'tbr', '%4dk'),
3182                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3183                     '|',
3184                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3185                     format_field(f, 'vbr', '%4dk'),
3186                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3187                     format_field(f, 'abr', '%3dk'),
3188                     format_field(f, 'asr', '%5dHz'),
3189                     ', '.join(filter(None, (
3190                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3191                         format_field(f, 'language', '[%s]'),
3192                         format_field(f, 'format_note'),
3193                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3194                     ))),
3195                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3196             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', '|', ' FILESIZE', '  TBR', 'PROTO',
3197                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3198         else:
3199             table = [
3200                 [
3201                     format_field(f, 'format_id'),
3202                     format_field(f, 'ext'),
3203                     self.format_resolution(f),
3204                     self._format_note(f)]
3205                 for f in formats
3206                 if f.get('preference') is None or f['preference'] >= -1000]
3207             header_line = ['format code', 'extension', 'resolution', 'note']
3208
3209         self.to_screen(
3210             '[info] Available formats for %s:' % info_dict['id'])
3211         self.to_stdout(render_table(
3212             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3213
3214     def list_thumbnails(self, info_dict):
3215         thumbnails = list(info_dict.get('thumbnails'))
3216         if not thumbnails:
3217             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3218             return
3219
3220         self.to_screen(
3221             '[info] Thumbnails for %s:' % info_dict['id'])
3222         self.to_stdout(render_table(
3223             ['ID', 'width', 'height', 'URL'],
3224             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3225
3226     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3227         if not subtitles:
3228             self.to_screen('%s has no %s' % (video_id, name))
3229             return
3230         self.to_screen(
3231             'Available %s for %s:' % (name, video_id))
3232
3233         def _row(lang, formats):
3234             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3235             if len(set(names)) == 1:
3236                 names = [] if names[0] == 'unknown' else names[:1]
3237             return [lang, ', '.join(names), ', '.join(exts)]
3238
3239         self.to_stdout(render_table(
3240             ['Language', 'Name', 'Formats'],
3241             [_row(lang, formats) for lang, formats in subtitles.items()],
3242             hideEmpty=True))
3243
3244     def urlopen(self, req):
3245         """ Start an HTTP download """
3246         if isinstance(req, compat_basestring):
3247             req = sanitized_Request(req)
3248         return self._opener.open(req, timeout=self._socket_timeout)
3249
3250     def print_debug_header(self):
3251         if not self.params.get('verbose'):
3252             return
3253         get_encoding = lambda stream: getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3254         encoding_str = (
3255             '[debug] Encodings: locale %s, fs %s, stdout %s, stderr %s, pref %s\n' % (
3256                 locale.getpreferredencoding(),
3257                 sys.getfilesystemencoding(),
3258                 get_encoding(self._screen_file), get_encoding(self._err_file),
3259                 self.get_encoding()))
3260
3261         logger = self.params.get('logger')
3262         if logger:
3263             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3264             write_debug(encoding_str)
3265         else:
3266             write_debug = lambda msg: self._write_string(f'[debug] {msg}')
3267             write_string(encoding_str, encoding=None)
3268
3269         source = detect_variant()
3270         write_debug('yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
3271         if not _LAZY_LOADER:
3272             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3273                 write_debug('Lazy loading extractors is forcibly disabled\n')
3274             else:
3275                 write_debug('Lazy loading extractors is disabled\n')
3276         if plugin_extractors or plugin_postprocessors:
3277             write_debug('Plugins: %s\n' % [
3278                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3279                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3280         if self.params.get('compat_opts'):
3281             write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
3282         try:
3283             sp = Popen(
3284                 ['git', 'rev-parse', '--short', 'HEAD'],
3285                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3286                 cwd=os.path.dirname(os.path.abspath(__file__)))
3287             out, err = sp.communicate_or_kill()
3288             out = out.decode().strip()
3289             if re.match('[0-9a-f]+', out):
3290                 write_debug('Git HEAD: %s\n' % out)
3291         except Exception:
3292             try:
3293                 sys.exc_clear()
3294             except Exception:
3295                 pass
3296
3297         def python_implementation():
3298             impl_name = platform.python_implementation()
3299             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3300                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3301             return impl_name
3302
3303         write_debug('Python version %s (%s %s) - %s\n' % (
3304             platform.python_version(),
3305             python_implementation(),
3306             platform.architecture()[0],
3307             platform_name()))
3308
3309         exe_versions = FFmpegPostProcessor.get_versions(self)
3310         exe_versions['rtmpdump'] = rtmpdump_version()
3311         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3312         exe_str = ', '.join(
3313             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3314         ) or 'none'
3315         write_debug('exe versions: %s\n' % exe_str)
3316
3317         from .downloader.websocket import has_websockets
3318         from .postprocessor.embedthumbnail import has_mutagen
3319         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3320
3321         lib_str = ', '.join(sorted(filter(None, (
3322             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3323             has_websockets and 'websockets',
3324             has_mutagen and 'mutagen',
3325             SQLITE_AVAILABLE and 'sqlite',
3326             KEYRING_AVAILABLE and 'keyring',
3327         )))) or 'none'
3328         write_debug('Optional libraries: %s\n' % lib_str)
3329         write_debug('ANSI escape support: stdout = %s, stderr = %s\n' % (
3330             supports_terminal_sequences(self._screen_file),
3331             supports_terminal_sequences(self._err_file)))
3332
3333         proxy_map = {}
3334         for handler in self._opener.handlers:
3335             if hasattr(handler, 'proxies'):
3336                 proxy_map.update(handler.proxies)
3337         write_debug('Proxy map: ' + compat_str(proxy_map) + '\n')
3338
3339         if self.params.get('call_home', False):
3340             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3341             write_debug('Public IP address: %s\n' % ipaddr)
3342             return
3343             latest_version = self.urlopen(
3344                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3345             if version_tuple(latest_version) > version_tuple(__version__):
3346                 self.report_warning(
3347                     'You are using an outdated version (newest version: %s)! '
3348                     'See https://yt-dl.org/update if you need help updating.' %
3349                     latest_version)
3350
3351     def _setup_opener(self):
3352         timeout_val = self.params.get('socket_timeout')
3353         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3354
3355         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3356         opts_cookiefile = self.params.get('cookiefile')
3357         opts_proxy = self.params.get('proxy')
3358
3359         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3360
3361         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3362         if opts_proxy is not None:
3363             if opts_proxy == '':
3364                 proxies = {}
3365             else:
3366                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3367         else:
3368             proxies = compat_urllib_request.getproxies()
3369             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3370             if 'http' in proxies and 'https' not in proxies:
3371                 proxies['https'] = proxies['http']
3372         proxy_handler = PerRequestProxyHandler(proxies)
3373
3374         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3375         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3376         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3377         redirect_handler = YoutubeDLRedirectHandler()
3378         data_handler = compat_urllib_request_DataHandler()
3379
3380         # When passing our own FileHandler instance, build_opener won't add the
3381         # default FileHandler and allows us to disable the file protocol, which
3382         # can be used for malicious purposes (see
3383         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3384         file_handler = compat_urllib_request.FileHandler()
3385
3386         def file_open(*args, **kwargs):
3387             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3388         file_handler.file_open = file_open
3389
3390         opener = compat_urllib_request.build_opener(
3391             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3392
3393         # Delete the default user-agent header, which would otherwise apply in
3394         # cases where our custom HTTP handler doesn't come into play
3395         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3396         opener.addheaders = []
3397         self._opener = opener
3398
3399     def encode(self, s):
3400         if isinstance(s, bytes):
3401             return s  # Already encoded
3402
3403         try:
3404             return s.encode(self.get_encoding())
3405         except UnicodeEncodeError as err:
3406             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3407             raise
3408
3409     def get_encoding(self):
3410         encoding = self.params.get('encoding')
3411         if encoding is None:
3412             encoding = preferredencoding()
3413         return encoding
3414
3415     def _write_info_json(self, label, ie_result, infofn):
3416         ''' Write infojson and returns True = written, False = skip, None = error '''
3417         if not self.params.get('writeinfojson'):
3418             return False
3419         elif not infofn:
3420             self.write_debug(f'Skipping writing {label} infojson')
3421             return False
3422         elif not self._ensure_dir_exists(infofn):
3423             return None
3424         elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3425             self.to_screen(f'[info] {label.title()} metadata is already present')
3426         else:
3427             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3428             try:
3429                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3430             except (OSError, IOError):
3431                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3432                 return None
3433         return True
3434
3435     def _write_description(self, label, ie_result, descfn):
3436         ''' Write description and returns True = written, False = skip, None = error '''
3437         if not self.params.get('writedescription'):
3438             return False
3439         elif not descfn:
3440             self.write_debug(f'Skipping writing {label} description')
3441             return False
3442         elif not self._ensure_dir_exists(descfn):
3443             return None
3444         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3445             self.to_screen(f'[info] {label.title()} description is already present')
3446         elif ie_result.get('description') is None:
3447             self.report_warning(f'There\'s no {label} description to write')
3448             return False
3449         else:
3450             try:
3451                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3452                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3453                     descfile.write(ie_result['description'])
3454             except (OSError, IOError):
3455                 self.report_error(f'Cannot write {label} description file {descfn}')
3456                 return None
3457         return True
3458
3459     def _write_subtitles(self, info_dict, filename):
3460         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3461         ret = []
3462         subtitles = info_dict.get('requested_subtitles')
3463         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3464             # subtitles download errors are already managed as troubles in relevant IE
3465             # that way it will silently go on when used with unsupporting IE
3466             return ret
3467
3468         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3469         if not sub_filename_base:
3470             self.to_screen('[info] Skipping writing video subtitles')
3471             return ret
3472         for sub_lang, sub_info in subtitles.items():
3473             sub_format = sub_info['ext']
3474             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3475             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3476             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3477                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3478                 sub_info['filepath'] = sub_filename
3479                 ret.append((sub_filename, sub_filename_final))
3480                 continue
3481
3482             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3483             if sub_info.get('data') is not None:
3484                 try:
3485                     # Use newline='' to prevent conversion of newline characters
3486                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3487                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3488                         subfile.write(sub_info['data'])
3489                     sub_info['filepath'] = sub_filename
3490                     ret.append((sub_filename, sub_filename_final))
3491                     continue
3492                 except (OSError, IOError):
3493                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3494                     return None
3495
3496             try:
3497                 sub_copy = sub_info.copy()
3498                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3499                 self.dl(sub_filename, sub_copy, subtitle=True)
3500                 sub_info['filepath'] = sub_filename
3501                 ret.append((sub_filename, sub_filename_final))
3502             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3503                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3504                 continue
3505         return ret
3506
3507     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3508         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3509         write_all = self.params.get('write_all_thumbnails', False)
3510         thumbnails, ret = [], []
3511         if write_all or self.params.get('writethumbnail', False):
3512             thumbnails = info_dict.get('thumbnails') or []
3513         multiple = write_all and len(thumbnails) > 1
3514
3515         if thumb_filename_base is None:
3516             thumb_filename_base = filename
3517         if thumbnails and not thumb_filename_base:
3518             self.write_debug(f'Skipping writing {label} thumbnail')
3519             return ret
3520
3521         for t in thumbnails[::-1]:
3522             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3523             thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3524             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3525             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3526
3527             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3528                 ret.append((thumb_filename, thumb_filename_final))
3529                 t['filepath'] = thumb_filename
3530                 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
3531             else:
3532                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3533                 try:
3534                     uf = self.urlopen(t['url'])
3535                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3536                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3537                         shutil.copyfileobj(uf, thumbf)
3538                     ret.append((thumb_filename, thumb_filename_final))
3539                     t['filepath'] = thumb_filename
3540                 except network_exceptions as err:
3541                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3542             if ret and not write_all:
3543                 break
3544         return ret