yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from string import ascii_letters
  32
  33 from .compat import (
  34     compat_basestring,
  35     compat_get_terminal_size,
  36     compat_kwargs,
  37     compat_numeric_types,
  38     compat_os_name,
  39     compat_pycrypto_AES,
  40     compat_shlex_quote,
  41     compat_str,
  42     compat_tokenize_tokenize,
  43     compat_urllib_error,
  44     compat_urllib_request,
  45     compat_urllib_request_DataHandler,
  46     windows_enable_vt_mode,
  47 )
  48 from .cookies import load_cookies
  49 from .utils import (
  50     age_restricted,
  51     args_to_str,
  52     ContentTooShortError,
  53     date_from_str,
  54     DateRange,
  55     DEFAULT_OUTTMPL,
  56     determine_ext,
  57     determine_protocol,
  58     DOT_DESKTOP_LINK_TEMPLATE,
  59     DOT_URL_LINK_TEMPLATE,
  60     DOT_WEBLOC_LINK_TEMPLATE,
  61     DownloadError,
  62     encode_compat_str,
  63     encodeFilename,
  64     EntryNotInPlaylist,
  65     error_to_compat_str,
  66     ExistingVideoReached,
  67     expand_path,
  68     ExtractorError,
  69     float_or_none,
  70     format_bytes,
  71     format_field,
  72     formatSeconds,
  73     GeoRestrictedError,
  74     HEADRequest,
  75     int_or_none,
  76     iri_to_uri,
  77     ISO3166Utils,
  78     LazyList,
  79     locked_file,
  80     make_dir,
  81     make_HTTPS_handler,
  82     MaxDownloadsReached,
  83     network_exceptions,
  84     orderedSet,
  85     OUTTMPL_TYPES,
  86     PagedList,
  87     parse_filesize,
  88     PerRequestProxyHandler,
  89     platform_name,
  90     Popen,
  91     PostProcessingError,
  92     preferredencoding,
  93     prepend_extension,
  94     register_socks_protocols,
  95     RejectedVideoReached,
  96     render_table,
  97     replace_extension,
  98     SameFileError,
  99     sanitize_filename,
 100     sanitize_path,
 101     sanitize_url,
 102     sanitized_Request,
 103     std_headers,
 104     STR_FORMAT_RE_TMPL,
 105     STR_FORMAT_TYPES,
 106     str_or_none,
 107     strftime_or_none,
 108     subtitles_filename,
 109     supports_terminal_sequences,
 110     TERMINAL_SEQUENCES,
 111     ThrottledDownload,
 112     to_high_limit_path,
 113     traverse_obj,
 114     try_get,
 115     UnavailableVideoError,
 116     url_basename,
 117     variadic,
 118     version_tuple,
 119     write_json_file,
 120     write_string,
 121     YoutubeDLCookieProcessor,
 122     YoutubeDLHandler,
 123     YoutubeDLRedirectHandler,
 124 )
 125 from .cache import Cache
 126 from .extractor import (
 127     gen_extractor_classes,
 128     get_info_extractor,
 129     _LAZY_LOADER,
 130     _PLUGIN_CLASSES as plugin_extractors
 131 )
 132 from .extractor.openload import PhantomJSwrapper
 133 from .downloader import (
 134     FFmpegFD,
 135     get_suitable_downloader,
 136     shorten_protocol_name
 137 )
 138 from .downloader.rtmp import rtmpdump_version
 139 from .postprocessor import (
 140     get_postprocessor,
 141     EmbedThumbnailPP,
 142     FFmpegFixupDurationPP,
 143     FFmpegFixupM3u8PP,
 144     FFmpegFixupM4aPP,
 145     FFmpegFixupStretchedPP,
 146     FFmpegFixupTimestampPP,
 147     FFmpegMergerPP,
 148     FFmpegPostProcessor,
 149     MoveFilesAfterDownloadPP,
 150     _PLUGIN_CLASSES as plugin_postprocessors
 151 )
 152 from .update import detect_variant
 153 from .version import __version__
 154
 155 if compat_os_name == 'nt':
 156     import ctypes
 157
 158
 159 class YoutubeDL(object):
 160     """YoutubeDL class.
 161
 162     YoutubeDL objects are the ones responsible of downloading the
 163     actual video file and writing it to disk if the user has requested
 164     it, among some other tasks. In most cases there should be one per
 165     program. As, given a video URL, the downloader doesn't know how to
 166     extract all the needed information, task that InfoExtractors do, it
 167     has to pass the URL to one of them.
 168
 169     For this, YoutubeDL objects have a method that allows
 170     InfoExtractors to be registered in a given order. When it is passed
 171     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 172     finds that reports being able to handle it. The InfoExtractor extracts
 173     all the information about the video or videos the URL refers to, and
 174     YoutubeDL process the extracted information, possibly using a File
 175     Downloader to download the video.
 176
 177     YoutubeDL objects accept a lot of parameters. In order not to saturate
 178     the object constructor with arguments, it receives a dictionary of
 179     options instead. These options are available through the params
 180     attribute for the InfoExtractors to use. The YoutubeDL also
 181     registers itself as the downloader in charge for the InfoExtractors
 182     that are added to it, so this is a "mutual registration".
 183
 184     Available options:
 185
 186     username:          Username for authentication purposes.
 187     password:          Password for authentication purposes.
 188     videopassword:     Password for accessing a video.
 189     ap_mso:            Adobe Pass multiple-system operator identifier.
 190     ap_username:       Multiple-system operator account username.
 191     ap_password:       Multiple-system operator account password.
 192     usenetrc:          Use netrc for authentication instead.
 193     verbose:           Print additional info to stdout.
 194     quiet:             Do not print messages to stdout.
 195     no_warnings:       Do not print out anything for warnings.
 196     forceprint:        A list of templates to force print
 197     forceurl:          Force printing final URL. (Deprecated)
 198     forcetitle:        Force printing title. (Deprecated)
 199     forceid:           Force printing ID. (Deprecated)
 200     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 201     forcedescription:  Force printing description. (Deprecated)
 202     forcefilename:     Force printing final filename. (Deprecated)
 203     forceduration:     Force printing duration. (Deprecated)
 204     forcejson:         Force printing info_dict as JSON.
 205     dump_single_json:  Force printing the info_dict of the whole playlist
 206                        (or video) as a single JSON line.
 207     force_write_download_archive: Force writing download archive regardless
 208                        of 'skip_download' or 'simulate'.
 209     simulate:          Do not download the video files. If unset (or None),
 210                        simulate only if listsubtitles, listformats or list_thumbnails is used
 211     format:            Video format code. see "FORMAT SELECTION" for more details.
 212     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 213     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 214                        extracting metadata even if the video is not actually
 215                        available for download (experimental)
 216     format_sort:       How to sort the video formats. see "Sorting Formats"
 217                        for more details.
 218     format_sort_force: Force the given format_sort. see "Sorting Formats"
 219                        for more details.
 220     allow_multiple_video_streams:   Allow multiple video streams to be merged
 221                        into a single file
 222     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 223                        into a single file
 224     check_formats      Whether to test if the formats are downloadable.
 225                        Can be True (check all), False (check none)
 226                        or None (check only if requested by extractor)
 227     paths:             Dictionary of output paths. The allowed keys are 'home'
 228                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 229     outtmpl:           Dictionary of templates for output names. Allowed keys
 230                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 231                        For compatibility with youtube-dl, a single string can also be used
 232     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 233     restrictfilenames: Do not allow "&" and spaces in file names
 234     trim_file_name:    Limit length of filename (extension excluded)
 235     windowsfilenames:  Force the filenames to be windows compatible
 236     ignoreerrors:      Do not stop on download/postprocessing errors.
 237                        Can be 'only_download' to ignore only download errors.
 238                        Default is 'only_download' for CLI, but False for API
 239     skip_playlist_after_errors: Number of allowed failures until the rest of
 240                        the playlist is skipped
 241     force_generic_extractor: Force downloader to use the generic extractor
 242     overwrites:        Overwrite all video and metadata files if True,
 243                        overwrite only non-video files if None
 244                        and don't overwrite any file if False
 245                        For compatibility with youtube-dl,
 246                        "nooverwrites" may also be used instead
 247     playliststart:     Playlist item to start at.
 248     playlistend:       Playlist item to end at.
 249     playlist_items:    Specific indices of playlist to download.
 250     playlistreverse:   Download playlist items in reverse order.
 251     playlistrandom:    Download playlist items in random order.
 252     matchtitle:        Download only matching titles.
 253     rejecttitle:       Reject downloads for matching titles.
 254     logger:            Log messages to a logging.Logger instance.
 255     logtostderr:       Log messages to stderr instead of stdout.
 256     consoletitle:       Display progress in console window's titlebar.
 257     writedescription:  Write the video description to a .description file
 258     writeinfojson:     Write the video description to a .info.json file
 259     clean_infojson:    Remove private fields from the infojson
 260     getcomments:       Extract video comments. This will not be written to disk
 261                        unless writeinfojson is also given
 262     writeannotations:  Write the video annotations to a .annotations.xml file
 263     writethumbnail:    Write the thumbnail image to a file
 264     allow_playlist_files: Whether to write playlists' description, infojson etc
 265                        also to disk when using the 'write*' options
 266     write_all_thumbnails:  Write all thumbnail formats to files
 267     writelink:         Write an internet shortcut file, depending on the
 268                        current platform (.url/.webloc/.desktop)
 269     writeurllink:      Write a Windows internet shortcut file (.url)
 270     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 271     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 272     writesubtitles:    Write the video subtitles to a file
 273     writeautomaticsub: Write the automatically generated subtitles to a file
 274     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 275                        Downloads all the subtitles of the video
 276                        (requires writesubtitles or writeautomaticsub)
 277     listsubtitles:     Lists all available subtitles for the video
 278     subtitlesformat:   The format code for subtitles
 279     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 280                        The list may contain "all" to refer to all the available
 281                        subtitles. The language can be prefixed with a "-" to
 282                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 283     keepvideo:         Keep the video file after post-processing
 284     daterange:         A DateRange object, download only if the upload_date is in the range.
 285     skip_download:     Skip the actual download of the video file
 286     cachedir:          Location of the cache files in the filesystem.
 287                        False to disable filesystem cache.
 288     noplaylist:        Download single video instead of a playlist if in doubt.
 289     age_limit:         An integer representing the user's age in years.
 290                        Unsuitable videos for the given age are skipped.
 291     min_views:         An integer representing the minimum view count the video
 292                        must have in order to not be skipped.
 293                        Videos without view count information are always
 294                        downloaded. None for no limit.
 295     max_views:         An integer representing the maximum view count.
 296                        Videos that are more popular than that are not
 297                        downloaded.
 298                        Videos without view count information are always
 299                        downloaded. None for no limit.
 300     download_archive:  File name of a file where all downloads are recorded.
 301                        Videos already present in the file are not downloaded
 302                        again.
 303     break_on_existing: Stop the download process after attempting to download a
 304                        file that is in the archive.
 305     break_on_reject:   Stop the download process when encountering a video that
 306                        has been filtered out.
 307     cookiefile:        File name where cookies should be read from and dumped to
 308     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 309                        name/path from where cookies are loaded.
 310                        Eg: ('chrome', ) or ('vivaldi', 'default')
 311     nocheckcertificate:Do not verify SSL certificates
 312     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 313                        At the moment, this is only supported by YouTube.
 314     proxy:             URL of the proxy server to use
 315     geo_verification_proxy:  URL of the proxy to use for IP address verification
 316                        on geo-restricted sites.
 317     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 318     bidi_workaround:   Work around buggy terminals without bidirectional text
 319                        support, using fridibi
 320     debug_printtraffic:Print out sent and received HTTP traffic
 321     include_ads:       Download ads as well
 322     default_search:    Prepend this string if an input url is not valid.
 323                        'auto' for elaborate guessing
 324     encoding:          Use this encoding instead of the system-specified.
 325     extract_flat:      Do not resolve URLs, return the immediate result.
 326                        Pass in 'in_playlist' to only show this behavior for
 327                        playlist items.
 328     postprocessors:    A list of dictionaries, each with an entry
 329                        * key:  The name of the postprocessor. See
 330                                yt_dlp/postprocessor/__init__.py for a list.
 331                        * when: When to run the postprocessor. Can be one of
 332                                pre_process|before_dl|post_process|after_move.
 333                                Assumed to be 'post_process' if not given
 334     post_hooks:        Deprecated - Register a custom postprocessor instead
 335                        A list of functions that get called as the final step
 336                        for each video file, after all postprocessors have been
 337                        called. The filename will be passed as the only argument.
 338     progress_hooks:    A list of functions that get called on download
 339                        progress, with a dictionary with the entries
 340                        * status: One of "downloading", "error", or "finished".
 341                                  Check this first and ignore unknown values.
 342                        * info_dict: The extracted info_dict
 343
 344                        If status is one of "downloading", or "finished", the
 345                        following properties may also be present:
 346                        * filename: The final filename (always present)
 347                        * tmpfilename: The filename we're currently writing to
 348                        * downloaded_bytes: Bytes on disk
 349                        * total_bytes: Size of the whole file, None if unknown
 350                        * total_bytes_estimate: Guess of the eventual file size,
 351                                                None if unavailable.
 352                        * elapsed: The number of seconds since download started.
 353                        * eta: The estimated time in seconds, None if unknown
 354                        * speed: The download speed in bytes/second, None if
 355                                 unknown
 356                        * fragment_index: The counter of the currently
 357                                          downloaded video fragment.
 358                        * fragment_count: The number of fragments (= individual
 359                                          files that will be merged)
 360
 361                        Progress hooks are guaranteed to be called at least once
 362                        (with status "finished") if the download is successful.
 363     postprocessor_hooks:  A list of functions that get called on postprocessing
 364                        progress, with a dictionary with the entries
 365                        * status: One of "started", "processing", or "finished".
 366                                  Check this first and ignore unknown values.
 367                        * postprocessor: Name of the postprocessor
 368                        * info_dict: The extracted info_dict
 369
 370                        Progress hooks are guaranteed to be called at least twice
 371                        (with status "started" and "finished") if the processing is successful.
 372     merge_output_format: Extension to use when merging formats.
 373     final_ext:         Expected final extension; used to detect when the file was
 374                        already downloaded and converted. "merge_output_format" is
 375                        replaced by this extension when given
 376     fixup:             Automatically correct known faults of the file.
 377                        One of:
 378                        - "never": do nothing
 379                        - "warn": only emit a warning
 380                        - "detect_or_warn": check whether we can do anything
 381                                            about it, warn otherwise (default)
 382     source_address:    Client-side IP address to bind to.
 383     call_home:         Boolean, true iff we are allowed to contact the
 384                        yt-dlp servers for debugging. (BROKEN)
 385     sleep_interval_requests: Number of seconds to sleep between requests
 386                        during extraction
 387     sleep_interval:    Number of seconds to sleep before each download when
 388                        used alone or a lower bound of a range for randomized
 389                        sleep before each download (minimum possible number
 390                        of seconds to sleep) when used along with
 391                        max_sleep_interval.
 392     max_sleep_interval:Upper bound of a range for randomized sleep before each
 393                        download (maximum possible number of seconds to sleep).
 394                        Must only be used along with sleep_interval.
 395                        Actual sleep time will be a random float from range
 396                        [sleep_interval; max_sleep_interval].
 397     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 398     listformats:       Print an overview of available video formats and exit.
 399     list_thumbnails:   Print a table of all thumbnails and exit.
 400     match_filter:      A function that gets called with the info_dict of
 401                        every video.
 402                        If it returns a message, the video is ignored.
 403                        If it returns None, the video is downloaded.
 404                        match_filter_func in utils.py is one example for this.
 405     no_color:          Do not emit color codes in output.
 406     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 407                        HTTP header
 408     geo_bypass_country:
 409                        Two-letter ISO 3166-2 country code that will be used for
 410                        explicit geographic restriction bypassing via faking
 411                        X-Forwarded-For HTTP header
 412     geo_bypass_ip_block:
 413                        IP range in CIDR notation that will be used similarly to
 414                        geo_bypass_country
 415
 416     The following options determine which downloader is picked:
 417     external_downloader: A dictionary of protocol keys and the executable of the
 418                        external downloader to use for it. The allowed protocols
 419                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 420                        Set the value to 'native' to use the native downloader
 421     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 422                        or {'m3u8': 'ffmpeg'} instead.
 423                        Use the native HLS downloader instead of ffmpeg/avconv
 424                        if True, otherwise use ffmpeg/avconv if False, otherwise
 425                        use downloader suggested by extractor if None.
 426     compat_opts:       Compatibility options. See "Differences in default behavior".
 427                        The following options do not work when used through the API:
 428                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 429                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 430                        Refer __init__.py for their implementation
 431     progress_template: Dictionary of templates for progress outputs.
 432                        Allowed keys are 'download', 'postprocess',
 433                        'download-title' (console title) and 'postprocess-title'.
 434                        The template is mapped on a dictionary with keys 'progress' and 'info'
 435
 436     The following parameters are not used by YoutubeDL itself, they are used by
 437     the downloader (see yt_dlp/downloader/common.py):
 438     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 439     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 440     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 441     external_downloader_args.
 442
 443     The following options are used by the post processors:
 444     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 445                        otherwise prefer ffmpeg. (avconv support is deprecated)
 446     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 447                        to the binary or its containing directory.
 448     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 449                        and a list of additional command-line arguments for the
 450                        postprocessor/executable. The dict can also have "PP+EXE" keys
 451                        which are used when the given exe is used by the given PP.
 452                        Use 'default' as the name for arguments to passed to all PP
 453                        For compatibility with youtube-dl, a single list of args
 454                        can also be used
 455
 456     The following options are used by the extractors:
 457     extractor_retries: Number of times to retry for known errors
 458     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 459     hls_split_discontinuity: Split HLS playlists to different formats at
 460                        discontinuities such as ad breaks (default: False)
 461     extractor_args:    A dictionary of arguments to be passed to the extractors.
 462                        See "EXTRACTOR ARGUMENTS" for details.
 463                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 464     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 465                        If True (default), DASH manifests and related
 466                        data will be downloaded and processed by extractor.
 467                        You can reduce network I/O by disabling it if you don't
 468                        care about DASH. (only for youtube)
 469     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 470                        If True (default), HLS manifests and related
 471                        data will be downloaded and processed by extractor.
 472                        You can reduce network I/O by disabling it if you don't
 473                        care about HLS. (only for youtube)
 474     """
 475
 476     _NUMERIC_FIELDS = set((
 477         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 478         'timestamp', 'release_timestamp',
 479         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 480         'average_rating', 'comment_count', 'age_limit',
 481         'start_time', 'end_time',
 482         'chapter_number', 'season_number', 'episode_number',
 483         'track_number', 'disc_number', 'release_year',
 484     ))
 485
 486     _format_selection_exts = {
 487         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 488         'video': {'mp4', 'flv', 'webm', '3gp'},
 489         'storyboards': {'mhtml'},
 490     }
 491
 492     params = None
 493     _ies = {}
 494     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 495     _printed_messages = set()
 496     _first_webpage_request = True
 497     _download_retcode = None
 498     _num_downloads = None
 499     _playlist_level = 0
 500     _playlist_urls = set()
 501     _screen_file = None
 502
 503     def __init__(self, params=None, auto_init=True):
 504         """Create a FileDownloader object with the given options.
 505         @param auto_init    Whether to load the default extractors and print header (if verbose).
 506                             Set to 'no_verbose_header' to not print the header
 507         """
 508         if params is None:
 509             params = {}
 510         self._ies = {}
 511         self._ies_instances = {}
 512         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 513         self._printed_messages = set()
 514         self._first_webpage_request = True
 515         self._post_hooks = []
 516         self._progress_hooks = []
 517         self._postprocessor_hooks = []
 518         self._download_retcode = 0
 519         self._num_downloads = 0
 520         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 521         self._err_file = sys.stderr
 522         self.params = params
 523         self.cache = Cache(self)
 524
 525         windows_enable_vt_mode()
 526         # FIXME: This will break if we ever print color to stdout
 527         self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)
 528
 529         if sys.version_info < (3, 6):
 530             self.report_warning(
 531                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 532
 533         if self.params.get('allow_unplayable_formats'):
 534             self.report_warning(
 535                 f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
 536                 'This is a developer option intended for debugging. \n'
 537                 '         If you experience any issues while using this option, '
 538                 f'{self._color_text("DO NOT", "red")} open a bug report')
 539
 540         def check_deprecated(param, option, suggestion):
 541             if self.params.get(param) is not None:
 542                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 543                 return True
 544             return False
 545
 546         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 547             if self.params.get('geo_verification_proxy') is None:
 548                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 549
 550         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 551         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 552         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 553
 554         for msg in self.params.get('_warnings', []):
 555             self.report_warning(msg)
 556
 557         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 558             # nooverwrites was unnecessarily changed to overwrites
 559             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 560             # This ensures compatibility with both keys
 561             self.params['overwrites'] = not self.params['nooverwrites']
 562         elif self.params.get('overwrites') is None:
 563             self.params.pop('overwrites', None)
 564         else:
 565             self.params['nooverwrites'] = not self.params['overwrites']
 566
 567         if params.get('bidi_workaround', False):
 568             try:
 569                 import pty
 570                 master, slave = pty.openpty()
 571                 width = compat_get_terminal_size().columns
 572                 if width is None:
 573                     width_args = []
 574                 else:
 575                     width_args = ['-w', str(width)]
 576                 sp_kwargs = dict(
 577                     stdin=subprocess.PIPE,
 578                     stdout=slave,
 579                     stderr=self._err_file)
 580                 try:
 581                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 582                 except OSError:
 583                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 584                 self._output_channel = os.fdopen(master, 'rb')
 585             except OSError as ose:
 586                 if ose.errno == errno.ENOENT:
 587                     self.report_warning(
 588                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 589                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 590                 else:
 591                     raise
 592
 593         if (sys.platform != 'win32'
 594                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 595                 and not params.get('restrictfilenames', False)):
 596             # Unicode filesystem API will throw errors (#1474, #13027)
 597             self.report_warning(
 598                 'Assuming --restrict-filenames since file system encoding '
 599                 'cannot encode all characters. '
 600                 'Set the LC_ALL environment variable to fix this.')
 601             self.params['restrictfilenames'] = True
 602
 603         self.outtmpl_dict = self.parse_outtmpl()
 604
 605         # Creating format selector here allows us to catch syntax errors before the extraction
 606         self.format_selector = (
 607             None if self.params.get('format') is None
 608             else self.build_format_selector(self.params['format']))
 609
 610         self._setup_opener()
 611
 612         if auto_init:
 613             if auto_init != 'no_verbose_header':
 614                 self.print_debug_header()
 615             self.add_default_info_extractors()
 616
 617         for pp_def_raw in self.params.get('postprocessors', []):
 618             pp_def = dict(pp_def_raw)
 619             when = pp_def.pop('when', 'post_process')
 620             pp_class = get_postprocessor(pp_def.pop('key'))
 621             pp = pp_class(self, **compat_kwargs(pp_def))
 622             self.add_post_processor(pp, when=when)
 623
 624         for ph in self.params.get('post_hooks', []):
 625             self.add_post_hook(ph)
 626
 627         for ph in self.params.get('progress_hooks', []):
 628             self.add_progress_hook(ph)
 629
 630         register_socks_protocols()
 631
 632         def preload_download_archive(fn):
 633             """Preload the archive, if any is specified"""
 634             if fn is None:
 635                 return False
 636             self.write_debug(f'Loading archive file {fn!r}')
 637             try:
 638                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 639                     for line in archive_file:
 640                         self.archive.add(line.strip())
 641             except IOError as ioe:
 642                 if ioe.errno != errno.ENOENT:
 643                     raise
 644                 return False
 645             return True
 646
 647         self.archive = set()
 648         preload_download_archive(self.params.get('download_archive'))
 649
 650     def warn_if_short_id(self, argv):
 651         # short YouTube ID starting with dash?
 652         idxs = [
 653             i for i, a in enumerate(argv)
 654             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 655         if idxs:
 656             correct_argv = (
 657                 ['yt-dlp']
 658                 + [a for i, a in enumerate(argv) if i not in idxs]
 659                 + ['--'] + [argv[i] for i in idxs]
 660             )
 661             self.report_warning(
 662                 'Long argument string detected. '
 663                 'Use -- to separate parameters and URLs, like this:\n%s' %
 664                 args_to_str(correct_argv))
 665
 666     def add_info_extractor(self, ie):
 667         """Add an InfoExtractor object to the end of the list."""
 668         ie_key = ie.ie_key()
 669         self._ies[ie_key] = ie
 670         if not isinstance(ie, type):
 671             self._ies_instances[ie_key] = ie
 672             ie.set_downloader(self)
 673
 674     def _get_info_extractor_class(self, ie_key):
 675         ie = self._ies.get(ie_key)
 676         if ie is None:
 677             ie = get_info_extractor(ie_key)
 678             self.add_info_extractor(ie)
 679         return ie
 680
 681     def get_info_extractor(self, ie_key):
 682         """
 683         Get an instance of an IE with name ie_key, it will try to get one from
 684         the _ies list, if there's no instance it will create a new one and add
 685         it to the extractor list.
 686         """
 687         ie = self._ies_instances.get(ie_key)
 688         if ie is None:
 689             ie = get_info_extractor(ie_key)()
 690             self.add_info_extractor(ie)
 691         return ie
 692
 693     def add_default_info_extractors(self):
 694         """
 695         Add the InfoExtractors returned by gen_extractors to the end of the list
 696         """
 697         for ie in gen_extractor_classes():
 698             self.add_info_extractor(ie)
 699
 700     def add_post_processor(self, pp, when='post_process'):
 701         """Add a PostProcessor object to the end of the chain."""
 702         self._pps[when].append(pp)
 703         pp.set_downloader(self)
 704
 705     def add_post_hook(self, ph):
 706         """Add the post hook"""
 707         self._post_hooks.append(ph)
 708
 709     def add_progress_hook(self, ph):
 710         """Add the download progress hook"""
 711         self._progress_hooks.append(ph)
 712
 713     def add_postprocessor_hook(self, ph):
 714         """Add the postprocessing progress hook"""
 715         self._postprocessor_hooks.append(ph)
 716
 717     def _bidi_workaround(self, message):
 718         if not hasattr(self, '_output_channel'):
 719             return message
 720
 721         assert hasattr(self, '_output_process')
 722         assert isinstance(message, compat_str)
 723         line_count = message.count('\n') + 1
 724         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 725         self._output_process.stdin.flush()
 726         res = ''.join(self._output_channel.readline().decode('utf-8')
 727                       for _ in range(line_count))
 728         return res[:-len('\n')]
 729
 730     def _write_string(self, message, out=None, only_once=False):
 731         if only_once:
 732             if message in self._printed_messages:
 733                 return
 734             self._printed_messages.add(message)
 735         write_string(message, out=out, encoding=self.params.get('encoding'))
 736
 737     def to_stdout(self, message, skip_eol=False, quiet=False):
 738         """Print message to stdout"""
 739         if self.params.get('logger'):
 740             self.params['logger'].debug(message)
 741         elif not quiet or self.params.get('verbose'):
 742             self._write_string(
 743                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 744                 self._err_file if quiet else self._screen_file)
 745
 746     def to_stderr(self, message, only_once=False):
 747         """Print message to stderr"""
 748         assert isinstance(message, compat_str)
 749         if self.params.get('logger'):
 750             self.params['logger'].error(message)
 751         else:
 752             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 753
 754     def to_console_title(self, message):
 755         if not self.params.get('consoletitle', False):
 756             return
 757         if compat_os_name == 'nt':
 758             if ctypes.windll.kernel32.GetConsoleWindow():
 759                 # c_wchar_p() might not be necessary if `message` is
 760                 # already of type unicode()
 761                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 762         elif 'TERM' in os.environ:
 763             self._write_string('\033]0;%s\007' % message, self._screen_file)
 764
 765     def save_console_title(self):
 766         if not self.params.get('consoletitle', False):
 767             return
 768         if self.params.get('simulate'):
 769             return
 770         if compat_os_name != 'nt' and 'TERM' in os.environ:
 771             # Save the title on stack
 772             self._write_string('\033[22;0t', self._screen_file)
 773
 774     def restore_console_title(self):
 775         if not self.params.get('consoletitle', False):
 776             return
 777         if self.params.get('simulate'):
 778             return
 779         if compat_os_name != 'nt' and 'TERM' in os.environ:
 780             # Restore the title from stack
 781             self._write_string('\033[23;0t', self._screen_file)
 782
 783     def __enter__(self):
 784         self.save_console_title()
 785         return self
 786
 787     def __exit__(self, *args):
 788         self.restore_console_title()
 789
 790         if self.params.get('cookiefile') is not None:
 791             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 792
 793     def trouble(self, message=None, tb=None):
 794         """Determine action to take when a download problem appears.
 795
 796         Depending on if the downloader has been configured to ignore
 797         download errors or not, this method may throw an exception or
 798         not when errors are found, after printing the message.
 799
 800         tb, if given, is additional traceback information.
 801         """
 802         if message is not None:
 803             self.to_stderr(message)
 804         if self.params.get('verbose'):
 805             if tb is None:
 806                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 807                     tb = ''
 808                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 809                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 810                     tb += encode_compat_str(traceback.format_exc())
 811                 else:
 812                     tb_data = traceback.format_list(traceback.extract_stack())
 813                     tb = ''.join(tb_data)
 814             if tb:
 815                 self.to_stderr(tb)
 816         if not self.params.get('ignoreerrors'):
 817             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 818                 exc_info = sys.exc_info()[1].exc_info
 819             else:
 820                 exc_info = sys.exc_info()
 821             raise DownloadError(message, exc_info)
 822         self._download_retcode = 1
 823
 824     def to_screen(self, message, skip_eol=False):
 825         """Print message to stdout if not in quiet mode"""
 826         self.to_stdout(
 827             message, skip_eol, quiet=self.params.get('quiet', False))
 828
 829     def _color_text(self, text, color):
 830         if self.params.get('no_color'):
 831             return text
 832         return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
 833
 834     def report_warning(self, message, only_once=False):
 835         '''
 836         Print the message to stderr, it will be prefixed with 'WARNING:'
 837         If stderr is a tty file the 'WARNING:' will be colored
 838         '''
 839         if self.params.get('logger') is not None:
 840             self.params['logger'].warning(message)
 841         else:
 842             if self.params.get('no_warnings'):
 843                 return
 844             self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)
 845
 846     def report_error(self, message, tb=None):
 847         '''
 848         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 849         in red if stderr is a tty file.
 850         '''
 851         self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)
 852
 853     def write_debug(self, message, only_once=False):
 854         '''Log debug message or Print message to stderr'''
 855         if not self.params.get('verbose', False):
 856             return
 857         message = '[debug] %s' % message
 858         if self.params.get('logger'):
 859             self.params['logger'].debug(message)
 860         else:
 861             self.to_stderr(message, only_once)
 862
 863     def report_file_already_downloaded(self, file_name):
 864         """Report file has already been fully downloaded."""
 865         try:
 866             self.to_screen('[download] %s has already been downloaded' % file_name)
 867         except UnicodeEncodeError:
 868             self.to_screen('[download] The file has already been downloaded')
 869
 870     def report_file_delete(self, file_name):
 871         """Report that existing file will be deleted."""
 872         try:
 873             self.to_screen('Deleting existing file %s' % file_name)
 874         except UnicodeEncodeError:
 875             self.to_screen('Deleting existing file')
 876
 877     def raise_no_formats(self, info, forced=False):
 878         has_drm = info.get('__has_drm')
 879         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 880         expected = self.params.get('ignore_no_formats_error')
 881         if forced or not expected:
 882             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 883                                  expected=has_drm or expected)
 884         else:
 885             self.report_warning(msg)
 886
 887     def parse_outtmpl(self):
 888         outtmpl_dict = self.params.get('outtmpl', {})
 889         if not isinstance(outtmpl_dict, dict):
 890             outtmpl_dict = {'default': outtmpl_dict}
 891         # Remove spaces in the default template
 892         if self.params.get('restrictfilenames'):
 893             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 894         else:
 895             sanitize = lambda x: x
 896         outtmpl_dict.update({
 897             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 898             if outtmpl_dict.get(k) is None})
 899         for key, val in outtmpl_dict.items():
 900             if isinstance(val, bytes):
 901                 self.report_warning(
 902                     'Parameter outtmpl is bytes, but should be a unicode string. '
 903                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 904         return outtmpl_dict
 905
 906     def get_output_path(self, dir_type='', filename=None):
 907         paths = self.params.get('paths', {})
 908         assert isinstance(paths, dict)
 909         path = os.path.join(
 910             expand_path(paths.get('home', '').strip()),
 911             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 912             filename or '')
 913
 914         # Temporary fix for #4787
 915         # 'Treat' all problem characters by passing filename through preferredencoding
 916         # to workaround encoding issues with subprocess on python2 @ Windows
 917         if sys.version_info < (3, 0) and sys.platform == 'win32':
 918             path = encodeFilename(path, True).decode(preferredencoding())
 919         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 920
 921     @staticmethod
 922     def _outtmpl_expandpath(outtmpl):
 923         # expand_path translates '%%' into '%' and '$$' into '$'
 924         # correspondingly that is not what we want since we need to keep
 925         # '%%' intact for template dict substitution step. Working around
 926         # with boundary-alike separator hack.
 927         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 928         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 929
 930         # outtmpl should be expand_path'ed before template dict substitution
 931         # because meta fields may contain env variables we don't want to
 932         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 933         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 934         return expand_path(outtmpl).replace(sep, '')
 935
 936     @staticmethod
 937     def escape_outtmpl(outtmpl):
 938         ''' Escape any remaining strings like %s, %abc% etc. '''
 939         return re.sub(
 940             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 941             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 942             outtmpl)
 943
 944     @classmethod
 945     def validate_outtmpl(cls, outtmpl):
 946         ''' @return None or Exception object '''
 947         outtmpl = re.sub(
 948             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 949             lambda mobj: f'{mobj.group(0)[:-1]}s',
 950             cls._outtmpl_expandpath(outtmpl))
 951         try:
 952             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 953             return None
 954         except ValueError as err:
 955             return err
 956
 957     @staticmethod
 958     def _copy_infodict(info_dict):
 959         info_dict = dict(info_dict)
 960         for key in ('__original_infodict', '__postprocessors'):
 961             info_dict.pop(key, None)
 962         return info_dict
 963
 964     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 965         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
 966         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
 967
 968         info_dict = self._copy_infodict(info_dict)
 969         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
 970             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
 971             if info_dict.get('duration', None) is not None
 972             else None)
 973         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 974         if info_dict.get('resolution') is None:
 975             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 976
 977         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
 978         # of %(field)s to %(field)0Nd for backward compatibility
 979         field_size_compat_map = {
 980             'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
 981             'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
 982             'autonumber': self.params.get('autonumber_size') or 5,
 983         }
 984
 985         TMPL_DICT = {}
 986         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
 987         MATH_FUNCTIONS = {
 988             '+': float.__add__,
 989             '-': float.__sub__,
 990         }
 991         # Field is of the form key1.key2...
 992         # where keys (except first) can be string, int or slice
 993         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
 994         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
 995         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
 996         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
 997             (?P<negate>-)?
 998             (?P<fields>{field})
 999             (?P<maths>(?:{math_op}{math_field})*)
1000             (?:>(?P<strf_format>.+?))?
1001             (?P<alternate>(?<!\\),[^|)]+)?
1002             (?:\|(?P<default>.*?))?
1003             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1004
1005         def _traverse_infodict(k):
1006             k = k.split('.')
1007             if k[0] == '':
1008                 k.pop(0)
1009             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1010
1011         def get_value(mdict):
1012             # Object traversal
1013             value = _traverse_infodict(mdict['fields'])
1014             # Negative
1015             if mdict['negate']:
1016                 value = float_or_none(value)
1017                 if value is not None:
1018                     value *= -1
1019             # Do maths
1020             offset_key = mdict['maths']
1021             if offset_key:
1022                 value = float_or_none(value)
1023                 operator = None
1024                 while offset_key:
1025                     item = re.match(
1026                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1027                         offset_key).group(0)
1028                     offset_key = offset_key[len(item):]
1029                     if operator is None:
1030                         operator = MATH_FUNCTIONS[item]
1031                         continue
1032                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1033                     offset = float_or_none(item)
1034                     if offset is None:
1035                         offset = float_or_none(_traverse_infodict(item))
1036                     try:
1037                         value = operator(value, multiplier * offset)
1038                     except (TypeError, ZeroDivisionError):
1039                         return None
1040                     operator = None
1041             # Datetime formatting
1042             if mdict['strf_format']:
1043                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1044
1045             return value
1046
1047         na = self.params.get('outtmpl_na_placeholder', 'NA')
1048
1049         def _dumpjson_default(obj):
1050             if isinstance(obj, (set, LazyList)):
1051                 return list(obj)
1052             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1053
1054         def create_key(outer_mobj):
1055             if not outer_mobj.group('has_key'):
1056                 return outer_mobj.group(0)
1057             key = outer_mobj.group('key')
1058             mobj = re.match(INTERNAL_FORMAT_RE, key)
1059             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1060             value, default = None, na
1061             while mobj:
1062                 mobj = mobj.groupdict()
1063                 default = mobj['default'] if mobj['default'] is not None else default
1064                 value = get_value(mobj)
1065                 if value is None and mobj['alternate']:
1066                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1067                 else:
1068                     break
1069
1070             fmt = outer_mobj.group('format')
1071             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1072                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1073
1074             value = default if value is None else value
1075
1076             str_fmt = f'{fmt[:-1]}s'
1077             if fmt[-1] == 'l':  # list
1078                 delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
1079                 value, fmt = delim.join(variadic(value)), str_fmt
1080             elif fmt[-1] == 'j':  # json
1081                 value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
1082             elif fmt[-1] == 'q':  # quoted
1083                 value, fmt = compat_shlex_quote(str(value)), str_fmt
1084             elif fmt[-1] == 'B':  # bytes
1085                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1086                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1087             elif fmt[-1] == 'U':  # unicode normalized
1088                 opts = outer_mobj.group('conversion') or ''
1089                 value, fmt = unicodedata.normalize(
1090                     # "+" = compatibility equivalence, "#" = NFD
1091                     'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
1092                     value), str_fmt
1093             elif fmt[-1] == 'c':
1094                 if value:
1095                     value = str(value)[0]
1096                 else:
1097                     fmt = str_fmt
1098             elif fmt[-1] not in 'rs':  # numeric
1099                 value = float_or_none(value)
1100                 if value is None:
1101                     value, fmt = default, 's'
1102
1103             if sanitize:
1104                 if fmt[-1] == 'r':
1105                     # If value is an object, sanitize might convert it to a string
1106                     # So we convert it to repr first
1107                     value, fmt = repr(value), str_fmt
1108                 if fmt[-1] in 'csr':
1109                     value = sanitize(initial_field, value)
1110
1111             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1112             TMPL_DICT[key] = value
1113             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1114
1115         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1116
1117     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1118         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1119         return self.escape_outtmpl(outtmpl) % info_dict
1120
1121     def _prepare_filename(self, info_dict, tmpl_type='default'):
1122         try:
1123             sanitize = lambda k, v: sanitize_filename(
1124                 compat_str(v),
1125                 restricted=self.params.get('restrictfilenames'),
1126                 is_id=(k == 'id' or k.endswith('_id')))
1127             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1128             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1129
1130             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1131             if filename and force_ext is not None:
1132                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1133
1134             # https://github.com/blackjack4494/youtube-dlc/issues/85
1135             trim_file_name = self.params.get('trim_file_name', False)
1136             if trim_file_name:
1137                 fn_groups = filename.rsplit('.')
1138                 ext = fn_groups[-1]
1139                 sub_ext = ''
1140                 if len(fn_groups) > 2:
1141                     sub_ext = fn_groups[-2]
1142                 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1143
1144             return filename
1145         except ValueError as err:
1146             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1147             return None
1148
1149     def prepare_filename(self, info_dict, dir_type='', warn=False):
1150         """Generate the output filename."""
1151
1152         filename = self._prepare_filename(info_dict, dir_type or 'default')
1153         if not filename and dir_type not in ('', 'temp'):
1154             return ''
1155
1156         if warn:
1157             if not self.params.get('paths'):
1158                 pass
1159             elif filename == '-':
1160                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1161             elif os.path.isabs(filename):
1162                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1163         if filename == '-' or not filename:
1164             return filename
1165
1166         return self.get_output_path(dir_type, filename)
1167
1168     def _match_entry(self, info_dict, incomplete=False, silent=False):
1169         """ Returns None if the file should be downloaded """
1170
1171         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1172
1173         def check_filter():
1174             if 'title' in info_dict:
1175                 # This can happen when we're just evaluating the playlist
1176                 title = info_dict['title']
1177                 matchtitle = self.params.get('matchtitle', False)
1178                 if matchtitle:
1179                     if not re.search(matchtitle, title, re.IGNORECASE):
1180                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1181                 rejecttitle = self.params.get('rejecttitle', False)
1182                 if rejecttitle:
1183                     if re.search(rejecttitle, title, re.IGNORECASE):
1184                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1185             date = info_dict.get('upload_date')
1186             if date is not None:
1187                 dateRange = self.params.get('daterange', DateRange())
1188                 if date not in dateRange:
1189                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1190             view_count = info_dict.get('view_count')
1191             if view_count is not None:
1192                 min_views = self.params.get('min_views')
1193                 if min_views is not None and view_count < min_views:
1194                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1195                 max_views = self.params.get('max_views')
1196                 if max_views is not None and view_count > max_views:
1197                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1198             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1199                 return 'Skipping "%s" because it is age restricted' % video_title
1200
1201             match_filter = self.params.get('match_filter')
1202             if match_filter is not None:
1203                 try:
1204                     ret = match_filter(info_dict, incomplete=incomplete)
1205                 except TypeError:
1206                     # For backward compatibility
1207                     ret = None if incomplete else match_filter(info_dict)
1208                 if ret is not None:
1209                     return ret
1210             return None
1211
1212         if self.in_download_archive(info_dict):
1213             reason = '%s has already been recorded in the archive' % video_title
1214             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1215         else:
1216             reason = check_filter()
1217             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1218         if reason is not None:
1219             if not silent:
1220                 self.to_screen('[download] ' + reason)
1221             if self.params.get(break_opt, False):
1222                 raise break_err()
1223         return reason
1224
1225     @staticmethod
1226     def add_extra_info(info_dict, extra_info):
1227         '''Set the keys from extra_info in info dict if they are missing'''
1228         for key, value in extra_info.items():
1229             info_dict.setdefault(key, value)
1230
1231     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1232                      process=True, force_generic_extractor=False):
1233         """
1234         Return a list with a dictionary for each video extracted.
1235
1236         Arguments:
1237         url -- URL to extract
1238
1239         Keyword arguments:
1240         download -- whether to download videos during extraction
1241         ie_key -- extractor key hint
1242         extra_info -- dictionary containing the extra values to add to each result
1243         process -- whether to resolve all unresolved references (URLs, playlist items),
1244             must be True for download to work.
1245         force_generic_extractor -- force using the generic extractor
1246         """
1247
1248         if extra_info is None:
1249             extra_info = {}
1250
1251         if not ie_key and force_generic_extractor:
1252             ie_key = 'Generic'
1253
1254         if ie_key:
1255             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1256         else:
1257             ies = self._ies
1258
1259         for ie_key, ie in ies.items():
1260             if not ie.suitable(url):
1261                 continue
1262
1263             if not ie.working():
1264                 self.report_warning('The program functionality for this site has been marked as broken, '
1265                                     'and will probably not work.')
1266
1267             temp_id = ie.get_temp_id(url)
1268             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1269                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1270                                ie_key, temp_id))
1271                 break
1272             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1273         else:
1274             self.report_error('no suitable InfoExtractor for URL %s' % url)
1275
1276     def __handle_extraction_exceptions(func):
1277         @functools.wraps(func)
1278         def wrapper(self, *args, **kwargs):
1279             try:
1280                 return func(self, *args, **kwargs)
1281             except GeoRestrictedError as e:
1282                 msg = e.msg
1283                 if e.countries:
1284                     msg += '\nThis video is available in %s.' % ', '.join(
1285                         map(ISO3166Utils.short2full, e.countries))
1286                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1287                 self.report_error(msg)
1288             except ExtractorError as e:  # An error we somewhat expected
1289                 self.report_error(compat_str(e), e.format_traceback())
1290             except ThrottledDownload:
1291                 self.to_stderr('\r')
1292                 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1293                 return wrapper(self, *args, **kwargs)
1294             except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
1295                 raise
1296             except Exception as e:
1297                 if self.params.get('ignoreerrors'):
1298                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1299                 else:
1300                     raise
1301         return wrapper
1302
1303     @__handle_extraction_exceptions
1304     def __extract_info(self, url, ie, download, extra_info, process):
1305         ie_result = ie.extract(url)
1306         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1307             return
1308         if isinstance(ie_result, list):
1309             # Backwards compatibility: old IE result format
1310             ie_result = {
1311                 '_type': 'compat_list',
1312                 'entries': ie_result,
1313             }
1314         if extra_info.get('original_url'):
1315             ie_result.setdefault('original_url', extra_info['original_url'])
1316         self.add_default_extra_info(ie_result, ie, url)
1317         if process:
1318             return self.process_ie_result(ie_result, download, extra_info)
1319         else:
1320             return ie_result
1321
1322     def add_default_extra_info(self, ie_result, ie, url):
1323         if url is not None:
1324             self.add_extra_info(ie_result, {
1325                 'webpage_url': url,
1326                 'original_url': url,
1327                 'webpage_url_basename': url_basename(url),
1328             })
1329         if ie is not None:
1330             self.add_extra_info(ie_result, {
1331                 'extractor': ie.IE_NAME,
1332                 'extractor_key': ie.ie_key(),
1333             })
1334
1335     def process_ie_result(self, ie_result, download=True, extra_info=None):
1336         """
1337         Take the result of the ie(may be modified) and resolve all unresolved
1338         references (URLs, playlist items).
1339
1340         It will also download the videos if 'download'.
1341         Returns the resolved ie_result.
1342         """
1343         if extra_info is None:
1344             extra_info = {}
1345         result_type = ie_result.get('_type', 'video')
1346
1347         if result_type in ('url', 'url_transparent'):
1348             ie_result['url'] = sanitize_url(ie_result['url'])
1349             if ie_result.get('original_url'):
1350                 extra_info.setdefault('original_url', ie_result['original_url'])
1351
1352             extract_flat = self.params.get('extract_flat', False)
1353             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1354                     or extract_flat is True):
1355                 info_copy = ie_result.copy()
1356                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1357                 if ie and not ie_result.get('id'):
1358                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1359                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1360                 self.add_extra_info(info_copy, extra_info)
1361                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1362                 if self.params.get('force_write_download_archive', False):
1363                     self.record_download_archive(info_copy)
1364                 return ie_result
1365
1366         if result_type == 'video':
1367             self.add_extra_info(ie_result, extra_info)
1368             ie_result = self.process_video_result(ie_result, download=download)
1369             additional_urls = (ie_result or {}).get('additional_urls')
1370             if additional_urls:
1371                 # TODO: Improve MetadataParserPP to allow setting a list
1372                 if isinstance(additional_urls, compat_str):
1373                     additional_urls = [additional_urls]
1374                 self.to_screen(
1375                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1376                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1377                 ie_result['additional_entries'] = [
1378                     self.extract_info(
1379                         url, download, extra_info,
1380                         force_generic_extractor=self.params.get('force_generic_extractor'))
1381                     for url in additional_urls
1382                 ]
1383             return ie_result
1384         elif result_type == 'url':
1385             # We have to add extra_info to the results because it may be
1386             # contained in a playlist
1387             return self.extract_info(
1388                 ie_result['url'], download,
1389                 ie_key=ie_result.get('ie_key'),
1390                 extra_info=extra_info)
1391         elif result_type == 'url_transparent':
1392             # Use the information from the embedding page
1393             info = self.extract_info(
1394                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1395                 extra_info=extra_info, download=False, process=False)
1396
1397             # extract_info may return None when ignoreerrors is enabled and
1398             # extraction failed with an error, don't crash and return early
1399             # in this case
1400             if not info:
1401                 return info
1402
1403             force_properties = dict(
1404                 (k, v) for k, v in ie_result.items() if v is not None)
1405             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1406                 if f in force_properties:
1407                     del force_properties[f]
1408             new_result = info.copy()
1409             new_result.update(force_properties)
1410
1411             # Extracted info may not be a video result (i.e.
1412             # info.get('_type', 'video') != video) but rather an url or
1413             # url_transparent. In such cases outer metadata (from ie_result)
1414             # should be propagated to inner one (info). For this to happen
1415             # _type of info should be overridden with url_transparent. This
1416             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1417             if new_result.get('_type') == 'url':
1418                 new_result['_type'] = 'url_transparent'
1419
1420             return self.process_ie_result(
1421                 new_result, download=download, extra_info=extra_info)
1422         elif result_type in ('playlist', 'multi_video'):
1423             # Protect from infinite recursion due to recursively nested playlists
1424             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1425             webpage_url = ie_result['webpage_url']
1426             if webpage_url in self._playlist_urls:
1427                 self.to_screen(
1428                     '[download] Skipping already downloaded playlist: %s'
1429                     % ie_result.get('title') or ie_result.get('id'))
1430                 return
1431
1432             self._playlist_level += 1
1433             self._playlist_urls.add(webpage_url)
1434             self._sanitize_thumbnails(ie_result)
1435             try:
1436                 return self.__process_playlist(ie_result, download)
1437             finally:
1438                 self._playlist_level -= 1
1439                 if not self._playlist_level:
1440                     self._playlist_urls.clear()
1441         elif result_type == 'compat_list':
1442             self.report_warning(
1443                 'Extractor %s returned a compat_list result. '
1444                 'It needs to be updated.' % ie_result.get('extractor'))
1445
1446             def _fixup(r):
1447                 self.add_extra_info(r, {
1448                     'extractor': ie_result['extractor'],
1449                     'webpage_url': ie_result['webpage_url'],
1450                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1451                     'extractor_key': ie_result['extractor_key'],
1452                 })
1453                 return r
1454             ie_result['entries'] = [
1455                 self.process_ie_result(_fixup(r), download, extra_info)
1456                 for r in ie_result['entries']
1457             ]
1458             return ie_result
1459         else:
1460             raise Exception('Invalid result type: %s' % result_type)
1461
1462     def _ensure_dir_exists(self, path):
1463         return make_dir(path, self.report_error)
1464
1465     def __process_playlist(self, ie_result, download):
1466         # We process each entry in the playlist
1467         playlist = ie_result.get('title') or ie_result.get('id')
1468         self.to_screen('[download] Downloading playlist: %s' % playlist)
1469
1470         if 'entries' not in ie_result:
1471             raise EntryNotInPlaylist()
1472         incomplete_entries = bool(ie_result.get('requested_entries'))
1473         if incomplete_entries:
1474             def fill_missing_entries(entries, indexes):
1475                 ret = [None] * max(*indexes)
1476                 for i, entry in zip(indexes, entries):
1477                     ret[i - 1] = entry
1478                 return ret
1479             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1480
1481         playlist_results = []
1482
1483         playliststart = self.params.get('playliststart', 1)
1484         playlistend = self.params.get('playlistend')
1485         # For backwards compatibility, interpret -1 as whole list
1486         if playlistend == -1:
1487             playlistend = None
1488
1489         playlistitems_str = self.params.get('playlist_items')
1490         playlistitems = None
1491         if playlistitems_str is not None:
1492             def iter_playlistitems(format):
1493                 for string_segment in format.split(','):
1494                     if '-' in string_segment:
1495                         start, end = string_segment.split('-')
1496                         for item in range(int(start), int(end) + 1):
1497                             yield int(item)
1498                     else:
1499                         yield int(string_segment)
1500             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1501
1502         ie_entries = ie_result['entries']
1503         msg = (
1504             'Downloading %d videos' if not isinstance(ie_entries, list)
1505             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1506
1507         if isinstance(ie_entries, list):
1508             def get_entry(i):
1509                 return ie_entries[i - 1]
1510         else:
1511             if not isinstance(ie_entries, PagedList):
1512                 ie_entries = LazyList(ie_entries)
1513
1514             def get_entry(i):
1515                 return YoutubeDL.__handle_extraction_exceptions(
1516                     lambda self, i: ie_entries[i - 1]
1517                 )(self, i)
1518
1519         entries = []
1520         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1521         for i in items:
1522             if i == 0:
1523                 continue
1524             if playlistitems is None and playlistend is not None and playlistend < i:
1525                 break
1526             entry = None
1527             try:
1528                 entry = get_entry(i)
1529                 if entry is None:
1530                     raise EntryNotInPlaylist()
1531             except (IndexError, EntryNotInPlaylist):
1532                 if incomplete_entries:
1533                     raise EntryNotInPlaylist()
1534                 elif not playlistitems:
1535                     break
1536             entries.append(entry)
1537             try:
1538                 if entry is not None:
1539                     self._match_entry(entry, incomplete=True, silent=True)
1540             except (ExistingVideoReached, RejectedVideoReached):
1541                 break
1542         ie_result['entries'] = entries
1543
1544         # Save playlist_index before re-ordering
1545         entries = [
1546             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1547             for i, entry in enumerate(entries, 1)
1548             if entry is not None]
1549         n_entries = len(entries)
1550
1551         if not playlistitems and (playliststart or playlistend):
1552             playlistitems = list(range(playliststart, playliststart + n_entries))
1553         ie_result['requested_entries'] = playlistitems
1554
1555         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1556             ie_copy = {
1557                 'playlist': playlist,
1558                 'playlist_id': ie_result.get('id'),
1559                 'playlist_title': ie_result.get('title'),
1560                 'playlist_uploader': ie_result.get('uploader'),
1561                 'playlist_uploader_id': ie_result.get('uploader_id'),
1562                 'playlist_index': 0,
1563                 'n_entries': n_entries,
1564             }
1565             ie_copy.update(dict(ie_result))
1566
1567             if self._write_info_json('playlist', ie_result,
1568                                      self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1569                 return
1570             if self._write_description('playlist', ie_result,
1571                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1572                 return
1573             # TODO: This should be passed to ThumbnailsConvertor if necessary
1574             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1575
1576         if self.params.get('playlistreverse', False):
1577             entries = entries[::-1]
1578         if self.params.get('playlistrandom', False):
1579             random.shuffle(entries)
1580
1581         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1582
1583         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1584         failures = 0
1585         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1586         for i, entry_tuple in enumerate(entries, 1):
1587             playlist_index, entry = entry_tuple
1588             if 'playlist-index' in self.params.get('compat_opts', []):
1589                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1590             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1591             # This __x_forwarded_for_ip thing is a bit ugly but requires
1592             # minimal changes
1593             if x_forwarded_for:
1594                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1595             extra = {
1596                 'n_entries': n_entries,
1597                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1598                 'playlist_index': playlist_index,
1599                 'playlist_autonumber': i,
1600                 'playlist': playlist,
1601                 'playlist_id': ie_result.get('id'),
1602                 'playlist_title': ie_result.get('title'),
1603                 'playlist_uploader': ie_result.get('uploader'),
1604                 'playlist_uploader_id': ie_result.get('uploader_id'),
1605                 'extractor': ie_result['extractor'],
1606                 'webpage_url': ie_result['webpage_url'],
1607                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1608                 'extractor_key': ie_result['extractor_key'],
1609             }
1610
1611             if self._match_entry(entry, incomplete=True) is not None:
1612                 continue
1613
1614             entry_result = self.__process_iterable_entry(entry, download, extra)
1615             if not entry_result:
1616                 failures += 1
1617             if failures >= max_failures:
1618                 self.report_error(
1619                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1620                 break
1621             # TODO: skip failed (empty) entries?
1622             playlist_results.append(entry_result)
1623         ie_result['entries'] = playlist_results
1624         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1625         return ie_result
1626
1627     @__handle_extraction_exceptions
1628     def __process_iterable_entry(self, entry, download, extra_info):
1629         return self.process_ie_result(
1630             entry, download=download, extra_info=extra_info)
1631
1632     def _build_format_filter(self, filter_spec):
1633         " Returns a function to filter the formats according to the filter_spec "
1634
1635         OPERATORS = {
1636             '<': operator.lt,
1637             '<=': operator.le,
1638             '>': operator.gt,
1639             '>=': operator.ge,
1640             '=': operator.eq,
1641             '!=': operator.ne,
1642         }
1643         operator_rex = re.compile(r'''(?x)\s*
1644             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1645             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1646             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1647             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1648         m = operator_rex.fullmatch(filter_spec)
1649         if m:
1650             try:
1651                 comparison_value = int(m.group('value'))
1652             except ValueError:
1653                 comparison_value = parse_filesize(m.group('value'))
1654                 if comparison_value is None:
1655                     comparison_value = parse_filesize(m.group('value') + 'B')
1656                 if comparison_value is None:
1657                     raise ValueError(
1658                         'Invalid value %r in format specification %r' % (
1659                             m.group('value'), filter_spec))
1660             op = OPERATORS[m.group('op')]
1661
1662         if not m:
1663             STR_OPERATORS = {
1664                 '=': operator.eq,
1665                 '^=': lambda attr, value: attr.startswith(value),
1666                 '$=': lambda attr, value: attr.endswith(value),
1667                 '*=': lambda attr, value: value in attr,
1668             }
1669             str_operator_rex = re.compile(r'''(?x)\s*
1670                 (?P<key>[a-zA-Z0-9._-]+)\s*
1671                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1672                 (?P<value>[a-zA-Z0-9._-]+)\s*
1673                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1674             m = str_operator_rex.fullmatch(filter_spec)
1675             if m:
1676                 comparison_value = m.group('value')
1677                 str_op = STR_OPERATORS[m.group('op')]
1678                 if m.group('negation'):
1679                     op = lambda attr, value: not str_op(attr, value)
1680                 else:
1681                     op = str_op
1682
1683         if not m:
1684             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1685
1686         def _filter(f):
1687             actual_value = f.get(m.group('key'))
1688             if actual_value is None:
1689                 return m.group('none_inclusive')
1690             return op(actual_value, comparison_value)
1691         return _filter
1692
1693     def _default_format_spec(self, info_dict, download=True):
1694
1695         def can_merge():
1696             merger = FFmpegMergerPP(self)
1697             return merger.available and merger.can_merge()
1698
1699         prefer_best = (
1700             not self.params.get('simulate')
1701             and download
1702             and (
1703                 not can_merge()
1704                 or info_dict.get('is_live', False)
1705                 or self.outtmpl_dict['default'] == '-'))
1706         compat = (
1707             prefer_best
1708             or self.params.get('allow_multiple_audio_streams', False)
1709             or 'format-spec' in self.params.get('compat_opts', []))
1710
1711         return (
1712             'best/bestvideo+bestaudio' if prefer_best
1713             else 'bestvideo*+bestaudio/best' if not compat
1714             else 'bestvideo+bestaudio/best')
1715
1716     def build_format_selector(self, format_spec):
1717         def syntax_error(note, start):
1718             message = (
1719                 'Invalid format specification: '
1720                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1721             return SyntaxError(message)
1722
1723         PICKFIRST = 'PICKFIRST'
1724         MERGE = 'MERGE'
1725         SINGLE = 'SINGLE'
1726         GROUP = 'GROUP'
1727         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1728
1729         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1730                                   'video': self.params.get('allow_multiple_video_streams', False)}
1731
1732         check_formats = self.params.get('check_formats')
1733
1734         def _parse_filter(tokens):
1735             filter_parts = []
1736             for type, string, start, _, _ in tokens:
1737                 if type == tokenize.OP and string == ']':
1738                     return ''.join(filter_parts)
1739                 else:
1740                     filter_parts.append(string)
1741
1742         def _remove_unused_ops(tokens):
1743             # Remove operators that we don't use and join them with the surrounding strings
1744             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1745             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1746             last_string, last_start, last_end, last_line = None, None, None, None
1747             for type, string, start, end, line in tokens:
1748                 if type == tokenize.OP and string == '[':
1749                     if last_string:
1750                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1751                         last_string = None
1752                     yield type, string, start, end, line
1753                     # everything inside brackets will be handled by _parse_filter
1754                     for type, string, start, end, line in tokens:
1755                         yield type, string, start, end, line
1756                         if type == tokenize.OP and string == ']':
1757                             break
1758                 elif type == tokenize.OP and string in ALLOWED_OPS:
1759                     if last_string:
1760                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1761                         last_string = None
1762                     yield type, string, start, end, line
1763                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1764                     if not last_string:
1765                         last_string = string
1766                         last_start = start
1767                         last_end = end
1768                     else:
1769                         last_string += string
1770             if last_string:
1771                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1772
1773         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1774             selectors = []
1775             current_selector = None
1776             for type, string, start, _, _ in tokens:
1777                 # ENCODING is only defined in python 3.x
1778                 if type == getattr(tokenize, 'ENCODING', None):
1779                     continue
1780                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1781                     current_selector = FormatSelector(SINGLE, string, [])
1782                 elif type == tokenize.OP:
1783                     if string == ')':
1784                         if not inside_group:
1785                             # ')' will be handled by the parentheses group
1786                             tokens.restore_last_token()
1787                         break
1788                     elif inside_merge and string in ['/', ',']:
1789                         tokens.restore_last_token()
1790                         break
1791                     elif inside_choice and string == ',':
1792                         tokens.restore_last_token()
1793                         break
1794                     elif string == ',':
1795                         if not current_selector:
1796                             raise syntax_error('"," must follow a format selector', start)
1797                         selectors.append(current_selector)
1798                         current_selector = None
1799                     elif string == '/':
1800                         if not current_selector:
1801                             raise syntax_error('"/" must follow a format selector', start)
1802                         first_choice = current_selector
1803                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1804                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1805                     elif string == '[':
1806                         if not current_selector:
1807                             current_selector = FormatSelector(SINGLE, 'best', [])
1808                         format_filter = _parse_filter(tokens)
1809                         current_selector.filters.append(format_filter)
1810                     elif string == '(':
1811                         if current_selector:
1812                             raise syntax_error('Unexpected "("', start)
1813                         group = _parse_format_selection(tokens, inside_group=True)
1814                         current_selector = FormatSelector(GROUP, group, [])
1815                     elif string == '+':
1816                         if not current_selector:
1817                             raise syntax_error('Unexpected "+"', start)
1818                         selector_1 = current_selector
1819                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1820                         if not selector_2:
1821                             raise syntax_error('Expected a selector', start)
1822                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1823                     else:
1824                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1825                 elif type == tokenize.ENDMARKER:
1826                     break
1827             if current_selector:
1828                 selectors.append(current_selector)
1829             return selectors
1830
1831         def _merge(formats_pair):
1832             format_1, format_2 = formats_pair
1833
1834             formats_info = []
1835             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1836             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1837
1838             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1839                 get_no_more = {'video': False, 'audio': False}
1840                 for (i, fmt_info) in enumerate(formats_info):
1841                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1842                         formats_info.pop(i)
1843                         continue
1844                     for aud_vid in ['audio', 'video']:
1845                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1846                             if get_no_more[aud_vid]:
1847                                 formats_info.pop(i)
1848                                 break
1849                             get_no_more[aud_vid] = True
1850
1851             if len(formats_info) == 1:
1852                 return formats_info[0]
1853
1854             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1855             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1856
1857             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1858             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1859
1860             output_ext = self.params.get('merge_output_format')
1861             if not output_ext:
1862                 if the_only_video:
1863                     output_ext = the_only_video['ext']
1864                 elif the_only_audio and not video_fmts:
1865                     output_ext = the_only_audio['ext']
1866                 else:
1867                     output_ext = 'mkv'
1868
1869             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1870
1871             new_dict = {
1872                 'requested_formats': formats_info,
1873                 'format': '+'.join(filtered('format')),
1874                 'format_id': '+'.join(filtered('format_id')),
1875                 'ext': output_ext,
1876                 'protocol': '+'.join(map(determine_protocol, formats_info)),
1877                 'language': '+'.join(orderedSet(filtered('language'))),
1878                 'format_note': '+'.join(orderedSet(filtered('format_note'))),
1879                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
1880                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1881             }
1882
1883             if the_only_video:
1884                 new_dict.update({
1885                     'width': the_only_video.get('width'),
1886                     'height': the_only_video.get('height'),
1887                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1888                     'fps': the_only_video.get('fps'),
1889                     'dynamic_range': the_only_video.get('dynamic_range'),
1890                     'vcodec': the_only_video.get('vcodec'),
1891                     'vbr': the_only_video.get('vbr'),
1892                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1893                 })
1894
1895             if the_only_audio:
1896                 new_dict.update({
1897                     'acodec': the_only_audio.get('acodec'),
1898                     'abr': the_only_audio.get('abr'),
1899                     'asr': the_only_audio.get('asr'),
1900                 })
1901
1902             return new_dict
1903
1904         def _check_formats(formats):
1905             if not check_formats:
1906                 yield from formats
1907                 return
1908             for f in formats:
1909                 self.to_screen('[info] Testing format %s' % f['format_id'])
1910                 temp_file = tempfile.NamedTemporaryFile(
1911                     suffix='.tmp', delete=False,
1912                     dir=self.get_output_path('temp') or None)
1913                 temp_file.close()
1914                 try:
1915                     success, _ = self.dl(temp_file.name, f, test=True)
1916                 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1917                     success = False
1918                 finally:
1919                     if os.path.exists(temp_file.name):
1920                         try:
1921                             os.remove(temp_file.name)
1922                         except OSError:
1923                             self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1924                 if success:
1925                     yield f
1926                 else:
1927                     self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1928
1929         def _build_selector_function(selector):
1930             if isinstance(selector, list):  # ,
1931                 fs = [_build_selector_function(s) for s in selector]
1932
1933                 def selector_function(ctx):
1934                     for f in fs:
1935                         yield from f(ctx)
1936                 return selector_function
1937
1938             elif selector.type == GROUP:  # ()
1939                 selector_function = _build_selector_function(selector.selector)
1940
1941             elif selector.type == PICKFIRST:  # /
1942                 fs = [_build_selector_function(s) for s in selector.selector]
1943
1944                 def selector_function(ctx):
1945                     for f in fs:
1946                         picked_formats = list(f(ctx))
1947                         if picked_formats:
1948                             return picked_formats
1949                     return []
1950
1951             elif selector.type == MERGE:  # +
1952                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1953
1954                 def selector_function(ctx):
1955                     for pair in itertools.product(
1956                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1957                         yield _merge(pair)
1958
1959             elif selector.type == SINGLE:  # atom
1960                 format_spec = selector.selector or 'best'
1961
1962                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
1963                 if format_spec == 'all':
1964                     def selector_function(ctx):
1965                         yield from _check_formats(ctx['formats'])
1966                 elif format_spec == 'mergeall':
1967                     def selector_function(ctx):
1968                         formats = list(_check_formats(ctx['formats']))
1969                         if not formats:
1970                             return
1971                         merged_format = formats[-1]
1972                         for f in formats[-2::-1]:
1973                             merged_format = _merge((merged_format, f))
1974                         yield merged_format
1975
1976                 else:
1977                     format_fallback, format_reverse, format_idx = False, True, 1
1978                     mobj = re.match(
1979                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1980                         format_spec)
1981                     if mobj is not None:
1982                         format_idx = int_or_none(mobj.group('n'), default=1)
1983                         format_reverse = mobj.group('bw')[0] == 'b'
1984                         format_type = (mobj.group('type') or [None])[0]
1985                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1986                         format_modified = mobj.group('mod') is not None
1987
1988                         format_fallback = not format_type and not format_modified  # for b, w
1989                         _filter_f = (
1990                             (lambda f: f.get('%scodec' % format_type) != 'none')
1991                             if format_type and format_modified  # bv*, ba*, wv*, wa*
1992                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1993                             if format_type  # bv, ba, wv, wa
1994                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1995                             if not format_modified  # b, w
1996                             else lambda f: True)  # b*, w*
1997                         filter_f = lambda f: _filter_f(f) and (
1998                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
1999                     else:
2000                         if format_spec in self._format_selection_exts['audio']:
2001                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2002                         elif format_spec in self._format_selection_exts['video']:
2003                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2004                         elif format_spec in self._format_selection_exts['storyboards']:
2005                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2006                         else:
2007                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2008
2009                     def selector_function(ctx):
2010                         formats = list(ctx['formats'])
2011                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2012                         if format_fallback and ctx['incomplete_formats'] and not matches:
2013                             # for extractors with incomplete formats (audio only (soundcloud)
2014                             # or video only (imgur)) best/worst will fallback to
2015                             # best/worst {video,audio}-only format
2016                             matches = formats
2017                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2018                         try:
2019                             yield matches[format_idx - 1]
2020                         except IndexError:
2021                             return
2022
2023             filters = [self._build_format_filter(f) for f in selector.filters]
2024
2025             def final_selector(ctx):
2026                 ctx_copy = copy.deepcopy(ctx)
2027                 for _filter in filters:
2028                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2029                 return selector_function(ctx_copy)
2030             return final_selector
2031
2032         stream = io.BytesIO(format_spec.encode('utf-8'))
2033         try:
2034             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2035         except tokenize.TokenError:
2036             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2037
2038         class TokenIterator(object):
2039             def __init__(self, tokens):
2040                 self.tokens = tokens
2041                 self.counter = 0
2042
2043             def __iter__(self):
2044                 return self
2045
2046             def __next__(self):
2047                 if self.counter >= len(self.tokens):
2048                     raise StopIteration()
2049                 value = self.tokens[self.counter]
2050                 self.counter += 1
2051                 return value
2052
2053             next = __next__
2054
2055             def restore_last_token(self):
2056                 self.counter -= 1
2057
2058         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2059         return _build_selector_function(parsed_selector)
2060
2061     def _calc_headers(self, info_dict):
2062         res = std_headers.copy()
2063
2064         add_headers = info_dict.get('http_headers')
2065         if add_headers:
2066             res.update(add_headers)
2067
2068         cookies = self._calc_cookies(info_dict)
2069         if cookies:
2070             res['Cookie'] = cookies
2071
2072         if 'X-Forwarded-For' not in res:
2073             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2074             if x_forwarded_for_ip:
2075                 res['X-Forwarded-For'] = x_forwarded_for_ip
2076
2077         return res
2078
2079     def _calc_cookies(self, info_dict):
2080         pr = sanitized_Request(info_dict['url'])
2081         self.cookiejar.add_cookie_header(pr)
2082         return pr.get_header('Cookie')
2083
2084     def _sanitize_thumbnails(self, info_dict):
2085         thumbnails = info_dict.get('thumbnails')
2086         if thumbnails is None:
2087             thumbnail = info_dict.get('thumbnail')
2088             if thumbnail:
2089                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2090         if thumbnails:
2091             thumbnails.sort(key=lambda t: (
2092                 t.get('preference') if t.get('preference') is not None else -1,
2093                 t.get('width') if t.get('width') is not None else -1,
2094                 t.get('height') if t.get('height') is not None else -1,
2095                 t.get('id') if t.get('id') is not None else '',
2096                 t.get('url')))
2097
2098             def thumbnail_tester():
2099                 def test_thumbnail(t):
2100                     self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2101                     try:
2102                         self.urlopen(HEADRequest(t['url']))
2103                     except network_exceptions as err:
2104                         self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2105                         return False
2106                     return True
2107                 return test_thumbnail
2108
2109             for i, t in enumerate(thumbnails):
2110                 if t.get('id') is None:
2111                     t['id'] = '%d' % i
2112                 if t.get('width') and t.get('height'):
2113                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
2114                 t['url'] = sanitize_url(t['url'])
2115
2116             if self.params.get('check_formats'):
2117                 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
2118             else:
2119                 info_dict['thumbnails'] = thumbnails
2120
2121     def process_video_result(self, info_dict, download=True):
2122         assert info_dict.get('_type', 'video') == 'video'
2123
2124         if 'id' not in info_dict:
2125             raise ExtractorError('Missing "id" field in extractor result')
2126         if 'title' not in info_dict:
2127             raise ExtractorError('Missing "title" field in extractor result',
2128                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2129
2130         def report_force_conversion(field, field_not, conversion):
2131             self.report_warning(
2132                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2133                 % (field, field_not, conversion))
2134
2135         def sanitize_string_field(info, string_field):
2136             field = info.get(string_field)
2137             if field is None or isinstance(field, compat_str):
2138                 return
2139             report_force_conversion(string_field, 'a string', 'string')
2140             info[string_field] = compat_str(field)
2141
2142         def sanitize_numeric_fields(info):
2143             for numeric_field in self._NUMERIC_FIELDS:
2144                 field = info.get(numeric_field)
2145                 if field is None or isinstance(field, compat_numeric_types):
2146                     continue
2147                 report_force_conversion(numeric_field, 'numeric', 'int')
2148                 info[numeric_field] = int_or_none(field)
2149
2150         sanitize_string_field(info_dict, 'id')
2151         sanitize_numeric_fields(info_dict)
2152
2153         if 'playlist' not in info_dict:
2154             # It isn't part of a playlist
2155             info_dict['playlist'] = None
2156             info_dict['playlist_index'] = None
2157
2158         self._sanitize_thumbnails(info_dict)
2159
2160         thumbnail = info_dict.get('thumbnail')
2161         thumbnails = info_dict.get('thumbnails')
2162         if thumbnail:
2163             info_dict['thumbnail'] = sanitize_url(thumbnail)
2164         elif thumbnails:
2165             info_dict['thumbnail'] = thumbnails[-1]['url']
2166
2167         if info_dict.get('display_id') is None and 'id' in info_dict:
2168             info_dict['display_id'] = info_dict['id']
2169
2170         if info_dict.get('duration') is not None:
2171             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2172
2173         for ts_key, date_key in (
2174                 ('timestamp', 'upload_date'),
2175                 ('release_timestamp', 'release_date'),
2176         ):
2177             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2178                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2179                 # see http://bugs.python.org/issue1646728)
2180                 try:
2181                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2182                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2183                 except (ValueError, OverflowError, OSError):
2184                     pass
2185
2186         live_keys = ('is_live', 'was_live')
2187         live_status = info_dict.get('live_status')
2188         if live_status is None:
2189             for key in live_keys:
2190                 if info_dict.get(key) is False:
2191                     continue
2192                 if info_dict.get(key):
2193                     live_status = key
2194                 break
2195             if all(info_dict.get(key) is False for key in live_keys):
2196                 live_status = 'not_live'
2197         if live_status:
2198             info_dict['live_status'] = live_status
2199             for key in live_keys:
2200                 if info_dict.get(key) is None:
2201                     info_dict[key] = (live_status == key)
2202
2203         # Auto generate title fields corresponding to the *_number fields when missing
2204         # in order to always have clean titles. This is very common for TV series.
2205         for field in ('chapter', 'season', 'episode'):
2206             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2207                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2208
2209         for cc_kind in ('subtitles', 'automatic_captions'):
2210             cc = info_dict.get(cc_kind)
2211             if cc:
2212                 for _, subtitle in cc.items():
2213                     for subtitle_format in subtitle:
2214                         if subtitle_format.get('url'):
2215                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2216                         if subtitle_format.get('ext') is None:
2217                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2218
2219         automatic_captions = info_dict.get('automatic_captions')
2220         subtitles = info_dict.get('subtitles')
2221
2222         info_dict['requested_subtitles'] = self.process_subtitles(
2223             info_dict['id'], subtitles, automatic_captions)
2224
2225         # We now pick which formats have to be downloaded
2226         if info_dict.get('formats') is None:
2227             # There's only one format available
2228             formats = [info_dict]
2229         else:
2230             formats = info_dict['formats']
2231
2232         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2233         if not self.params.get('allow_unplayable_formats'):
2234             formats = [f for f in formats if not f.get('has_drm')]
2235
2236         if not formats:
2237             self.raise_no_formats(info_dict)
2238
2239         def is_wellformed(f):
2240             url = f.get('url')
2241             if not url:
2242                 self.report_warning(
2243                     '"url" field is missing or empty - skipping format, '
2244                     'there is an error in extractor')
2245                 return False
2246             if isinstance(url, bytes):
2247                 sanitize_string_field(f, 'url')
2248             return True
2249
2250         # Filter out malformed formats for better extraction robustness
2251         formats = list(filter(is_wellformed, formats))
2252
2253         formats_dict = {}
2254
2255         # We check that all the formats have the format and format_id fields
2256         for i, format in enumerate(formats):
2257             sanitize_string_field(format, 'format_id')
2258             sanitize_numeric_fields(format)
2259             format['url'] = sanitize_url(format['url'])
2260             if not format.get('format_id'):
2261                 format['format_id'] = compat_str(i)
2262             else:
2263                 # Sanitize format_id from characters used in format selector expression
2264                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2265             format_id = format['format_id']
2266             if format_id not in formats_dict:
2267                 formats_dict[format_id] = []
2268             formats_dict[format_id].append(format)
2269
2270         # Make sure all formats have unique format_id
2271         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2272         for format_id, ambiguous_formats in formats_dict.items():
2273             ambigious_id = len(ambiguous_formats) > 1
2274             for i, format in enumerate(ambiguous_formats):
2275                 if ambigious_id:
2276                     format['format_id'] = '%s-%d' % (format_id, i)
2277                 if format.get('ext') is None:
2278                     format['ext'] = determine_ext(format['url']).lower()
2279                 # Ensure there is no conflict between id and ext in format selection
2280                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2281                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2282                     format['format_id'] = 'f%s' % format['format_id']
2283
2284         for i, format in enumerate(formats):
2285             if format.get('format') is None:
2286                 format['format'] = '{id} - {res}{note}'.format(
2287                     id=format['format_id'],
2288                     res=self.format_resolution(format),
2289                     note=format_field(format, 'format_note', ' (%s)'),
2290                 )
2291             if format.get('protocol') is None:
2292                 format['protocol'] = determine_protocol(format)
2293             if format.get('resolution') is None:
2294                 format['resolution'] = self.format_resolution(format, default=None)
2295             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2296                 format['dynamic_range'] = 'SDR'
2297             # Add HTTP headers, so that external programs can use them from the
2298             # json output
2299             full_format_info = info_dict.copy()
2300             full_format_info.update(format)
2301             format['http_headers'] = self._calc_headers(full_format_info)
2302         # Remove private housekeeping stuff
2303         if '__x_forwarded_for_ip' in info_dict:
2304             del info_dict['__x_forwarded_for_ip']
2305
2306         # TODO Central sorting goes here
2307
2308         if not formats or formats[0] is not info_dict:
2309             # only set the 'formats' fields if the original info_dict list them
2310             # otherwise we end up with a circular reference, the first (and unique)
2311             # element in the 'formats' field in info_dict is info_dict itself,
2312             # which can't be exported to json
2313             info_dict['formats'] = formats
2314
2315         info_dict, _ = self.pre_process(info_dict)
2316
2317         if self.params.get('list_thumbnails'):
2318             self.list_thumbnails(info_dict)
2319         if self.params.get('listformats'):
2320             if not info_dict.get('formats') and not info_dict.get('url'):
2321                 self.to_screen('%s has no formats' % info_dict['id'])
2322             else:
2323                 self.list_formats(info_dict)
2324         if self.params.get('listsubtitles'):
2325             if 'automatic_captions' in info_dict:
2326                 self.list_subtitles(
2327                     info_dict['id'], automatic_captions, 'automatic captions')
2328             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2329         list_only = self.params.get('simulate') is None and (
2330             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2331         if list_only:
2332             # Without this printing, -F --print-json will not work
2333             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2334             return
2335
2336         format_selector = self.format_selector
2337         if format_selector is None:
2338             req_format = self._default_format_spec(info_dict, download=download)
2339             self.write_debug('Default format spec: %s' % req_format)
2340             format_selector = self.build_format_selector(req_format)
2341
2342         # While in format selection we may need to have an access to the original
2343         # format set in order to calculate some metrics or do some processing.
2344         # For now we need to be able to guess whether original formats provided
2345         # by extractor are incomplete or not (i.e. whether extractor provides only
2346         # video-only or audio-only formats) for proper formats selection for
2347         # extractors with such incomplete formats (see
2348         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2349         # Since formats may be filtered during format selection and may not match
2350         # the original formats the results may be incorrect. Thus original formats
2351         # or pre-calculated metrics should be passed to format selection routines
2352         # as well.
2353         # We will pass a context object containing all necessary additional data
2354         # instead of just formats.
2355         # This fixes incorrect format selection issue (see
2356         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2357         incomplete_formats = (
2358             # All formats are video-only or
2359             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2360             # all formats are audio-only
2361             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2362
2363         ctx = {
2364             'formats': formats,
2365             'incomplete_formats': incomplete_formats,
2366         }
2367
2368         formats_to_download = list(format_selector(ctx))
2369         if not formats_to_download:
2370             if not self.params.get('ignore_no_formats_error'):
2371                 raise ExtractorError('Requested format is not available', expected=True,
2372                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2373             else:
2374                 self.report_warning('Requested format is not available')
2375                 # Process what we can, even without any available formats.
2376                 self.process_info(dict(info_dict))
2377         elif download:
2378             self.to_screen(
2379                 '[info] %s: Downloading %d format(s): %s' % (
2380                     info_dict['id'], len(formats_to_download),
2381                     ", ".join([f['format_id'] for f in formats_to_download])))
2382             for fmt in formats_to_download:
2383                 new_info = dict(info_dict)
2384                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2385                 new_info['__original_infodict'] = info_dict
2386                 new_info.update(fmt)
2387                 self.process_info(new_info)
2388         # We update the info dict with the selected best quality format (backwards compatibility)
2389         if formats_to_download:
2390             info_dict.update(formats_to_download[-1])
2391         return info_dict
2392
2393     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2394         """Select the requested subtitles and their format"""
2395         available_subs = {}
2396         if normal_subtitles and self.params.get('writesubtitles'):
2397             available_subs.update(normal_subtitles)
2398         if automatic_captions and self.params.get('writeautomaticsub'):
2399             for lang, cap_info in automatic_captions.items():
2400                 if lang not in available_subs:
2401                     available_subs[lang] = cap_info
2402
2403         if (not self.params.get('writesubtitles') and not
2404                 self.params.get('writeautomaticsub') or not
2405                 available_subs):
2406             return None
2407
2408         all_sub_langs = available_subs.keys()
2409         if self.params.get('allsubtitles', False):
2410             requested_langs = all_sub_langs
2411         elif self.params.get('subtitleslangs', False):
2412             # A list is used so that the order of languages will be the same as
2413             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2414             requested_langs = []
2415             for lang_re in self.params.get('subtitleslangs'):
2416                 if lang_re == 'all':
2417                     requested_langs.extend(all_sub_langs)
2418                     continue
2419                 discard = lang_re[0] == '-'
2420                 if discard:
2421                     lang_re = lang_re[1:]
2422                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2423                 if discard:
2424                     for lang in current_langs:
2425                         while lang in requested_langs:
2426                             requested_langs.remove(lang)
2427                 else:
2428                     requested_langs.extend(current_langs)
2429             requested_langs = orderedSet(requested_langs)
2430         elif 'en' in available_subs:
2431             requested_langs = ['en']
2432         else:
2433             requested_langs = [list(all_sub_langs)[0]]
2434         if requested_langs:
2435             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2436
2437         formats_query = self.params.get('subtitlesformat', 'best')
2438         formats_preference = formats_query.split('/') if formats_query else []
2439         subs = {}
2440         for lang in requested_langs:
2441             formats = available_subs.get(lang)
2442             if formats is None:
2443                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2444                 continue
2445             for ext in formats_preference:
2446                 if ext == 'best':
2447                     f = formats[-1]
2448                     break
2449                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2450                 if matches:
2451                     f = matches[-1]
2452                     break
2453             else:
2454                 f = formats[-1]
2455                 self.report_warning(
2456                     'No subtitle format found matching "%s" for language %s, '
2457                     'using %s' % (formats_query, lang, f['ext']))
2458             subs[lang] = f
2459         return subs
2460
2461     def __forced_printings(self, info_dict, filename, incomplete):
2462         def print_mandatory(field, actual_field=None):
2463             if actual_field is None:
2464                 actual_field = field
2465             if (self.params.get('force%s' % field, False)
2466                     and (not incomplete or info_dict.get(actual_field) is not None)):
2467                 self.to_stdout(info_dict[actual_field])
2468
2469         def print_optional(field):
2470             if (self.params.get('force%s' % field, False)
2471                     and info_dict.get(field) is not None):
2472                 self.to_stdout(info_dict[field])
2473
2474         info_dict = info_dict.copy()
2475         if filename is not None:
2476             info_dict['filename'] = filename
2477         if info_dict.get('requested_formats') is not None:
2478             # For RTMP URLs, also include the playpath
2479             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2480         elif 'url' in info_dict:
2481             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2482
2483         if self.params.get('forceprint') or self.params.get('forcejson'):
2484             self.post_extract(info_dict)
2485         for tmpl in self.params.get('forceprint', []):
2486             mobj = re.match(r'\w+(=?)$', tmpl)
2487             if mobj and mobj.group(1):
2488                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2489             elif mobj:
2490                 tmpl = '%({})s'.format(tmpl)
2491             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2492
2493         print_mandatory('title')
2494         print_mandatory('id')
2495         print_mandatory('url', 'urls')
2496         print_optional('thumbnail')
2497         print_optional('description')
2498         print_optional('filename')
2499         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2500             self.to_stdout(formatSeconds(info_dict['duration']))
2501         print_mandatory('format')
2502
2503         if self.params.get('forcejson'):
2504             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2505
2506     def dl(self, name, info, subtitle=False, test=False):
2507         if not info.get('url'):
2508             self.raise_no_formats(info, True)
2509
2510         if test:
2511             verbose = self.params.get('verbose')
2512             params = {
2513                 'test': True,
2514                 'quiet': self.params.get('quiet') or not verbose,
2515                 'verbose': verbose,
2516                 'noprogress': not verbose,
2517                 'nopart': True,
2518                 'skip_unavailable_fragments': False,
2519                 'keep_fragments': False,
2520                 'overwrites': True,
2521                 '_no_ytdl_file': True,
2522             }
2523         else:
2524             params = self.params
2525         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2526         if not test:
2527             for ph in self._progress_hooks:
2528                 fd.add_progress_hook(ph)
2529             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2530             self.write_debug('Invoking downloader on "%s"' % urls)
2531
2532         new_info = copy.deepcopy(self._copy_infodict(info))
2533         if new_info.get('http_headers') is None:
2534             new_info['http_headers'] = self._calc_headers(new_info)
2535         return fd.download(name, new_info, subtitle)
2536
2537     def process_info(self, info_dict):
2538         """Process a single resolved IE result."""
2539
2540         assert info_dict.get('_type', 'video') == 'video'
2541
2542         max_downloads = self.params.get('max_downloads')
2543         if max_downloads is not None:
2544             if self._num_downloads >= int(max_downloads):
2545                 raise MaxDownloadsReached()
2546
2547         # TODO: backward compatibility, to be removed
2548         info_dict['fulltitle'] = info_dict['title']
2549
2550         if 'format' not in info_dict and 'ext' in info_dict:
2551             info_dict['format'] = info_dict['ext']
2552
2553         if self._match_entry(info_dict) is not None:
2554             return
2555
2556         self.post_extract(info_dict)
2557         self._num_downloads += 1
2558
2559         # info_dict['_filename'] needs to be set for backward compatibility
2560         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2561         temp_filename = self.prepare_filename(info_dict, 'temp')
2562         files_to_move = {}
2563
2564         # Forced printings
2565         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2566
2567         if self.params.get('simulate'):
2568             if self.params.get('force_write_download_archive', False):
2569                 self.record_download_archive(info_dict)
2570             # Do nothing else if in simulate mode
2571             return
2572
2573         if full_filename is None:
2574             return
2575         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2576             return
2577         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2578             return
2579
2580         if self._write_description('video', info_dict,
2581                                    self.prepare_filename(info_dict, 'description')) is None:
2582             return
2583
2584         sub_files = self._write_subtitles(info_dict, temp_filename)
2585         if sub_files is None:
2586             return
2587         files_to_move.update(dict(sub_files))
2588
2589         thumb_files = self._write_thumbnails(
2590             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2591         if thumb_files is None:
2592             return
2593         files_to_move.update(dict(thumb_files))
2594
2595         infofn = self.prepare_filename(info_dict, 'infojson')
2596         _infojson_written = self._write_info_json('video', info_dict, infofn)
2597         if _infojson_written:
2598             info_dict['__infojson_filename'] = infofn
2599         elif _infojson_written is None:
2600             return
2601
2602         # Note: Annotations are deprecated
2603         annofn = None
2604         if self.params.get('writeannotations', False):
2605             annofn = self.prepare_filename(info_dict, 'annotation')
2606         if annofn:
2607             if not self._ensure_dir_exists(encodeFilename(annofn)):
2608                 return
2609             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2610                 self.to_screen('[info] Video annotations are already present')
2611             elif not info_dict.get('annotations'):
2612                 self.report_warning('There are no annotations to write.')
2613             else:
2614                 try:
2615                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2616                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2617                         annofile.write(info_dict['annotations'])
2618                 except (KeyError, TypeError):
2619                     self.report_warning('There are no annotations to write.')
2620                 except (OSError, IOError):
2621                     self.report_error('Cannot write annotations file: ' + annofn)
2622                     return
2623
2624         # Write internet shortcut files
2625         url_link = webloc_link = desktop_link = False
2626         if self.params.get('writelink', False):
2627             if sys.platform == "darwin":  # macOS.
2628                 webloc_link = True
2629             elif sys.platform.startswith("linux"):
2630                 desktop_link = True
2631             else:  # if sys.platform in ['win32', 'cygwin']:
2632                 url_link = True
2633         if self.params.get('writeurllink', False):
2634             url_link = True
2635         if self.params.get('writewebloclink', False):
2636             webloc_link = True
2637         if self.params.get('writedesktoplink', False):
2638             desktop_link = True
2639
2640         if url_link or webloc_link or desktop_link:
2641             if 'webpage_url' not in info_dict:
2642                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2643                 return
2644             ascii_url = iri_to_uri(info_dict['webpage_url'])
2645
2646         def _write_link_file(extension, template, newline, embed_filename):
2647             linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
2648             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2649                 self.to_screen('[info] Internet shortcut is already present')
2650             else:
2651                 try:
2652                     self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2653                     with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2654                         template_vars = {'url': ascii_url}
2655                         if embed_filename:
2656                             template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2657                         linkfile.write(template % template_vars)
2658                 except (OSError, IOError):
2659                     self.report_error('Cannot write internet shortcut ' + linkfn)
2660                     return False
2661             return True
2662
2663         if url_link:
2664             if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2665                 return
2666         if webloc_link:
2667             if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2668                 return
2669         if desktop_link:
2670             if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2671                 return
2672
2673         try:
2674             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2675         except PostProcessingError as err:
2676             self.report_error('Preprocessing: %s' % str(err))
2677             return
2678
2679         must_record_download_archive = False
2680         if self.params.get('skip_download', False):
2681             info_dict['filepath'] = temp_filename
2682             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2683             info_dict['__files_to_move'] = files_to_move
2684             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2685         else:
2686             # Download
2687             info_dict.setdefault('__postprocessors', [])
2688             try:
2689
2690                 def existing_file(*filepaths):
2691                     ext = info_dict.get('ext')
2692                     final_ext = self.params.get('final_ext', ext)
2693                     existing_files = []
2694                     for file in orderedSet(filepaths):
2695                         if final_ext != ext:
2696                             converted = replace_extension(file, final_ext, ext)
2697                             if os.path.exists(encodeFilename(converted)):
2698                                 existing_files.append(converted)
2699                         if os.path.exists(encodeFilename(file)):
2700                             existing_files.append(file)
2701
2702                     if not existing_files or self.params.get('overwrites', False):
2703                         for file in orderedSet(existing_files):
2704                             self.report_file_delete(file)
2705                             os.remove(encodeFilename(file))
2706                         return None
2707
2708                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2709                     return existing_files[0]
2710
2711                 success = True
2712                 if info_dict.get('requested_formats') is not None:
2713
2714                     def compatible_formats(formats):
2715                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2716                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2717                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2718                         if len(video_formats) > 2 or len(audio_formats) > 2:
2719                             return False
2720
2721                         # Check extension
2722                         exts = set(format.get('ext') for format in formats)
2723                         COMPATIBLE_EXTS = (
2724                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2725                             set(('webm',)),
2726                         )
2727                         for ext_sets in COMPATIBLE_EXTS:
2728                             if ext_sets.issuperset(exts):
2729                                 return True
2730                         # TODO: Check acodec/vcodec
2731                         return False
2732
2733                     requested_formats = info_dict['requested_formats']
2734                     old_ext = info_dict['ext']
2735                     if self.params.get('merge_output_format') is None:
2736                         if not compatible_formats(requested_formats):
2737                             info_dict['ext'] = 'mkv'
2738                             self.report_warning(
2739                                 'Requested formats are incompatible for merge and will be merged into mkv')
2740                         if (info_dict['ext'] == 'webm'
2741                                 and info_dict.get('thumbnails')
2742                                 # check with type instead of pp_key, __name__, or isinstance
2743                                 # since we dont want any custom PPs to trigger this
2744                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2745                             info_dict['ext'] = 'mkv'
2746                             self.report_warning(
2747                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2748                     new_ext = info_dict['ext']
2749
2750                     def correct_ext(filename, ext=new_ext):
2751                         if filename == '-':
2752                             return filename
2753                         filename_real_ext = os.path.splitext(filename)[1][1:]
2754                         filename_wo_ext = (
2755                             os.path.splitext(filename)[0]
2756                             if filename_real_ext in (old_ext, new_ext)
2757                             else filename)
2758                         return '%s.%s' % (filename_wo_ext, ext)
2759
2760                     # Ensure filename always has a correct extension for successful merge
2761                     full_filename = correct_ext(full_filename)
2762                     temp_filename = correct_ext(temp_filename)
2763                     dl_filename = existing_file(full_filename, temp_filename)
2764                     info_dict['__real_download'] = False
2765
2766                     if dl_filename is not None:
2767                         self.report_file_already_downloaded(dl_filename)
2768                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2769                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2770                         success, real_download = self.dl(temp_filename, info_dict)
2771                         info_dict['__real_download'] = real_download
2772                     else:
2773                         downloaded = []
2774                         merger = FFmpegMergerPP(self)
2775                         if self.params.get('allow_unplayable_formats'):
2776                             self.report_warning(
2777                                 'You have requested merging of multiple formats '
2778                                 'while also allowing unplayable formats to be downloaded. '
2779                                 'The formats won\'t be merged to prevent data corruption.')
2780                         elif not merger.available:
2781                             self.report_warning(
2782                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2783                                 'The formats won\'t be merged.')
2784
2785                         if temp_filename == '-':
2786                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2787                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2788                                       else 'but ffmpeg is not installed')
2789                             self.report_warning(
2790                                 f'You have requested downloading multiple formats to stdout {reason}. '
2791                                 'The formats will be streamed one after the other')
2792                             fname = temp_filename
2793                         for f in requested_formats:
2794                             new_info = dict(info_dict)
2795                             del new_info['requested_formats']
2796                             new_info.update(f)
2797                             if temp_filename != '-':
2798                                 fname = prepend_extension(
2799                                     correct_ext(temp_filename, new_info['ext']),
2800                                     'f%s' % f['format_id'], new_info['ext'])
2801                                 if not self._ensure_dir_exists(fname):
2802                                     return
2803                                 f['filepath'] = fname
2804                                 downloaded.append(fname)
2805                             partial_success, real_download = self.dl(fname, new_info)
2806                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2807                             success = success and partial_success
2808                         if merger.available and not self.params.get('allow_unplayable_formats'):
2809                             info_dict['__postprocessors'].append(merger)
2810                             info_dict['__files_to_merge'] = downloaded
2811                             # Even if there were no downloads, it is being merged only now
2812                             info_dict['__real_download'] = True
2813                         else:
2814                             for file in downloaded:
2815                                 files_to_move[file] = None
2816                 else:
2817                     # Just a single file
2818                     dl_filename = existing_file(full_filename, temp_filename)
2819                     if dl_filename is None or dl_filename == temp_filename:
2820                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2821                         # So we should try to resume the download
2822                         success, real_download = self.dl(temp_filename, info_dict)
2823                         info_dict['__real_download'] = real_download
2824                     else:
2825                         self.report_file_already_downloaded(dl_filename)
2826
2827                 dl_filename = dl_filename or temp_filename
2828                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2829
2830             except network_exceptions as err:
2831                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2832                 return
2833             except (OSError, IOError) as err:
2834                 raise UnavailableVideoError(err)
2835             except (ContentTooShortError, ) as err:
2836                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2837                 return
2838
2839             if success and full_filename != '-':
2840
2841                 def fixup():
2842                     do_fixup = True
2843                     fixup_policy = self.params.get('fixup')
2844                     vid = info_dict['id']
2845
2846                     if fixup_policy in ('ignore', 'never'):
2847                         return
2848                     elif fixup_policy == 'warn':
2849                         do_fixup = False
2850                     elif fixup_policy != 'force':
2851                         assert fixup_policy in ('detect_or_warn', None)
2852                         if not info_dict.get('__real_download'):
2853                             do_fixup = False
2854
2855                     def ffmpeg_fixup(cndn, msg, cls):
2856                         if not cndn:
2857                             return
2858                         if not do_fixup:
2859                             self.report_warning(f'{vid}: {msg}')
2860                             return
2861                         pp = cls(self)
2862                         if pp.available:
2863                             info_dict['__postprocessors'].append(pp)
2864                         else:
2865                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2866
2867                     stretched_ratio = info_dict.get('stretched_ratio')
2868                     ffmpeg_fixup(
2869                         stretched_ratio not in (1, None),
2870                         f'Non-uniform pixel ratio {stretched_ratio}',
2871                         FFmpegFixupStretchedPP)
2872
2873                     ffmpeg_fixup(
2874                         (info_dict.get('requested_formats') is None
2875                          and info_dict.get('container') == 'm4a_dash'
2876                          and info_dict.get('ext') == 'm4a'),
2877                         'writing DASH m4a. Only some players support this container',
2878                         FFmpegFixupM4aPP)
2879
2880                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2881                     downloader = downloader.__name__ if downloader else None
2882                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2883                                  'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2884                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2885                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2886
2887                 fixup()
2888                 try:
2889                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2890                 except PostProcessingError as err:
2891                     self.report_error('Postprocessing: %s' % str(err))
2892                     return
2893                 try:
2894                     for ph in self._post_hooks:
2895                         ph(info_dict['filepath'])
2896                 except Exception as err:
2897                     self.report_error('post hooks: %s' % str(err))
2898                     return
2899                 must_record_download_archive = True
2900
2901         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2902             self.record_download_archive(info_dict)
2903         max_downloads = self.params.get('max_downloads')
2904         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2905             raise MaxDownloadsReached()
2906
2907     def download(self, url_list):
2908         """Download a given list of URLs."""
2909         outtmpl = self.outtmpl_dict['default']
2910         if (len(url_list) > 1
2911                 and outtmpl != '-'
2912                 and '%' not in outtmpl
2913                 and self.params.get('max_downloads') != 1):
2914             raise SameFileError(outtmpl)
2915
2916         for url in url_list:
2917             try:
2918                 # It also downloads the videos
2919                 res = self.extract_info(
2920                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2921             except UnavailableVideoError:
2922                 self.report_error('unable to download video')
2923             except MaxDownloadsReached:
2924                 self.to_screen('[info] Maximum number of downloads reached')
2925                 raise
2926             except ExistingVideoReached:
2927                 self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
2928                 raise
2929             except RejectedVideoReached:
2930                 self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
2931                 raise
2932             else:
2933                 if self.params.get('dump_single_json', False):
2934                     self.post_extract(res)
2935                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2936
2937         return self._download_retcode
2938
2939     def download_with_info_file(self, info_filename):
2940         with contextlib.closing(fileinput.FileInput(
2941                 [info_filename], mode='r',
2942                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2943             # FileInput doesn't have a read method, we can't call json.load
2944             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2945         try:
2946             self.process_ie_result(info, download=True)
2947         except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
2948             webpage_url = info.get('webpage_url')
2949             if webpage_url is not None:
2950                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2951                 return self.download([webpage_url])
2952             else:
2953                 raise
2954         return self._download_retcode
2955
2956     @staticmethod
2957     def sanitize_info(info_dict, remove_private_keys=False):
2958         ''' Sanitize the infodict for converting to json '''
2959         if info_dict is None:
2960             return info_dict
2961         info_dict.setdefault('epoch', int(time.time()))
2962         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
2963         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
2964         if remove_private_keys:
2965             remove_keys |= {
2966                 'requested_formats', 'requested_subtitles', 'requested_entries',
2967                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
2968             }
2969             empty_values = (None, {}, [], set(), tuple())
2970             reject = lambda k, v: k not in keep_keys and (
2971                 k.startswith('_') or k in remove_keys or v in empty_values)
2972         else:
2973             reject = lambda k, v: k in remove_keys
2974         filter_fn = lambda obj: (
2975             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
2976             else obj if not isinstance(obj, dict)
2977             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
2978         return filter_fn(info_dict)
2979
2980     @staticmethod
2981     def filter_requested_info(info_dict, actually_filter=True):
2982         ''' Alias of sanitize_info for backward compatibility '''
2983         return YoutubeDL.sanitize_info(info_dict, actually_filter)
2984
2985     def run_pp(self, pp, infodict):
2986         files_to_delete = []
2987         if '__files_to_move' not in infodict:
2988             infodict['__files_to_move'] = {}
2989         try:
2990             files_to_delete, infodict = pp.run(infodict)
2991         except PostProcessingError as e:
2992             # Must be True and not 'only_download'
2993             if self.params.get('ignoreerrors') is True:
2994                 self.report_error(e)
2995                 return infodict
2996             raise
2997
2998         if not files_to_delete:
2999             return infodict
3000         if self.params.get('keepvideo', False):
3001             for f in files_to_delete:
3002                 infodict['__files_to_move'].setdefault(f, '')
3003         else:
3004             for old_filename in set(files_to_delete):
3005                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3006                 try:
3007                     os.remove(encodeFilename(old_filename))
3008                 except (IOError, OSError):
3009                     self.report_warning('Unable to remove downloaded original file')
3010                 if old_filename in infodict['__files_to_move']:
3011                     del infodict['__files_to_move'][old_filename]
3012         return infodict
3013
3014     @staticmethod
3015     def post_extract(info_dict):
3016         def actual_post_extract(info_dict):
3017             if info_dict.get('_type') in ('playlist', 'multi_video'):
3018                 for video_dict in info_dict.get('entries', {}):
3019                     actual_post_extract(video_dict or {})
3020                 return
3021
3022             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3023             extra = post_extractor().items()
3024             info_dict.update(extra)
3025             info_dict.pop('__post_extractor', None)
3026
3027             original_infodict = info_dict.get('__original_infodict') or {}
3028             original_infodict.update(extra)
3029             original_infodict.pop('__post_extractor', None)
3030
3031         actual_post_extract(info_dict or {})
3032
3033     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3034         info = dict(ie_info)
3035         info['__files_to_move'] = files_to_move or {}
3036         for pp in self._pps[key]:
3037             info = self.run_pp(pp, info)
3038         return info, info.pop('__files_to_move', None)
3039
3040     def post_process(self, filename, ie_info, files_to_move=None):
3041         """Run all the postprocessors on the given file."""
3042         info = dict(ie_info)
3043         info['filepath'] = filename
3044         info['__files_to_move'] = files_to_move or {}
3045
3046         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3047             info = self.run_pp(pp, info)
3048         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3049         del info['__files_to_move']
3050         for pp in self._pps['after_move']:
3051             info = self.run_pp(pp, info)
3052         return info
3053
3054     def _make_archive_id(self, info_dict):
3055         video_id = info_dict.get('id')
3056         if not video_id:
3057             return
3058         # Future-proof against any change in case
3059         # and backwards compatibility with prior versions
3060         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3061         if extractor is None:
3062             url = str_or_none(info_dict.get('url'))
3063             if not url:
3064                 return
3065             # Try to find matching extractor for the URL and take its ie_key
3066             for ie_key, ie in self._ies.items():
3067                 if ie.suitable(url):
3068                     extractor = ie_key
3069                     break
3070             else:
3071                 return
3072         return '%s %s' % (extractor.lower(), video_id)
3073
3074     def in_download_archive(self, info_dict):
3075         fn = self.params.get('download_archive')
3076         if fn is None:
3077             return False
3078
3079         vid_id = self._make_archive_id(info_dict)
3080         if not vid_id:
3081             return False  # Incomplete video information
3082
3083         return vid_id in self.archive
3084
3085     def record_download_archive(self, info_dict):
3086         fn = self.params.get('download_archive')
3087         if fn is None:
3088             return
3089         vid_id = self._make_archive_id(info_dict)
3090         assert vid_id
3091         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3092             archive_file.write(vid_id + '\n')
3093         self.archive.add(vid_id)
3094
3095     @staticmethod
3096     def format_resolution(format, default='unknown'):
3097         is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
3098         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3099             return 'audio only'
3100         if format.get('resolution') is not None:
3101             return format['resolution']
3102         if format.get('width') and format.get('height'):
3103             res = '%dx%d' % (format['width'], format['height'])
3104         elif format.get('height'):
3105             res = '%sp' % format['height']
3106         elif format.get('width'):
3107             res = '%dx?' % format['width']
3108         elif is_images:
3109             return 'images'
3110         else:
3111             return default
3112         return f'{res} images' if is_images else res
3113
3114     def _format_note(self, fdict):
3115         res = ''
3116         if fdict.get('ext') in ['f4f', 'f4m']:
3117             res += '(unsupported) '
3118         if fdict.get('language'):
3119             if res:
3120                 res += ' '
3121             res += '[%s] ' % fdict['language']
3122         if fdict.get('format_note') is not None:
3123             res += fdict['format_note'] + ' '
3124         if fdict.get('tbr') is not None:
3125             res += '%4dk ' % fdict['tbr']
3126         if fdict.get('container') is not None:
3127             if res:
3128                 res += ', '
3129             res += '%s container' % fdict['container']
3130         if (fdict.get('vcodec') is not None
3131                 and fdict.get('vcodec') != 'none'):
3132             if res:
3133                 res += ', '
3134             res += fdict['vcodec']
3135             if fdict.get('vbr') is not None:
3136                 res += '@'
3137         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3138             res += 'video@'
3139         if fdict.get('vbr') is not None:
3140             res += '%4dk' % fdict['vbr']
3141         if fdict.get('fps') is not None:
3142             if res:
3143                 res += ', '
3144             res += '%sfps' % fdict['fps']
3145         if fdict.get('acodec') is not None:
3146             if res:
3147                 res += ', '
3148             if fdict['acodec'] == 'none':
3149                 res += 'video only'
3150             else:
3151                 res += '%-5s' % fdict['acodec']
3152         elif fdict.get('abr') is not None:
3153             if res:
3154                 res += ', '
3155             res += 'audio'
3156         if fdict.get('abr') is not None:
3157             res += '@%3dk' % fdict['abr']
3158         if fdict.get('asr') is not None:
3159             res += ' (%5dHz)' % fdict['asr']
3160         if fdict.get('filesize') is not None:
3161             if res:
3162                 res += ', '
3163             res += format_bytes(fdict['filesize'])
3164         elif fdict.get('filesize_approx') is not None:
3165             if res:
3166                 res += ', '
3167             res += '~' + format_bytes(fdict['filesize_approx'])
3168         return res
3169
3170     def list_formats(self, info_dict):
3171         formats = info_dict.get('formats', [info_dict])
3172         new_format = (
3173             'list-formats' not in self.params.get('compat_opts', [])
3174             and self.params.get('listformats_table', True) is not False)
3175         if new_format:
3176             table = [
3177                 [
3178                     format_field(f, 'format_id'),
3179                     format_field(f, 'ext'),
3180                     self.format_resolution(f),
3181                     format_field(f, 'fps', '%d'),
3182                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3183                     '|',
3184                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3185                     format_field(f, 'tbr', '%4dk'),
3186                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3187                     '|',
3188                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3189                     format_field(f, 'vbr', '%4dk'),
3190                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3191                     format_field(f, 'abr', '%3dk'),
3192                     format_field(f, 'asr', '%5dHz'),
3193                     ', '.join(filter(None, (
3194                         'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3195                         format_field(f, 'language', '[%s]'),
3196                         format_field(f, 'format_note'),
3197                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3198                     ))),
3199                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3200             header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', '|', ' FILESIZE', '  TBR', 'PROTO',
3201                            '|', 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
3202         else:
3203             table = [
3204                 [
3205                     format_field(f, 'format_id'),
3206                     format_field(f, 'ext'),
3207                     self.format_resolution(f),
3208                     self._format_note(f)]
3209                 for f in formats
3210                 if f.get('preference') is None or f['preference'] >= -1000]
3211             header_line = ['format code', 'extension', 'resolution', 'note']
3212
3213         self.to_screen(
3214             '[info] Available formats for %s:' % info_dict['id'])
3215         self.to_stdout(render_table(
3216             header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
3217
3218     def list_thumbnails(self, info_dict):
3219         thumbnails = list(info_dict.get('thumbnails'))
3220         if not thumbnails:
3221             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3222             return
3223
3224         self.to_screen(
3225             '[info] Thumbnails for %s:' % info_dict['id'])
3226         self.to_stdout(render_table(
3227             ['ID', 'width', 'height', 'URL'],
3228             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3229
3230     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3231         if not subtitles:
3232             self.to_screen('%s has no %s' % (video_id, name))
3233             return
3234         self.to_screen(
3235             'Available %s for %s:' % (name, video_id))
3236
3237         def _row(lang, formats):
3238             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3239             if len(set(names)) == 1:
3240                 names = [] if names[0] == 'unknown' else names[:1]
3241             return [lang, ', '.join(names), ', '.join(exts)]
3242
3243         self.to_stdout(render_table(
3244             ['Language', 'Name', 'Formats'],
3245             [_row(lang, formats) for lang, formats in subtitles.items()],
3246             hideEmpty=True))
3247
3248     def urlopen(self, req):
3249         """ Start an HTTP download """
3250         if isinstance(req, compat_basestring):
3251             req = sanitized_Request(req)
3252         return self._opener.open(req, timeout=self._socket_timeout)
3253
3254     def print_debug_header(self):
3255         if not self.params.get('verbose'):
3256             return
3257
3258         def get_encoding(stream):
3259             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3260             if not supports_terminal_sequences(stream):
3261                 ret += ' (No ANSI)'
3262             return ret
3263
3264         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3265             locale.getpreferredencoding(),
3266             sys.getfilesystemencoding(),
3267             get_encoding(self._screen_file), get_encoding(self._err_file),
3268             self.get_encoding())
3269
3270         logger = self.params.get('logger')
3271         if logger:
3272             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3273             write_debug(encoding_str)
3274         else:
3275             write_string(f'[debug] {encoding_str}', encoding=None)
3276             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3277
3278         source = detect_variant()
3279         write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
3280         if not _LAZY_LOADER:
3281             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3282                 write_debug('Lazy loading extractors is forcibly disabled')
3283             else:
3284                 write_debug('Lazy loading extractors is disabled')
3285         if plugin_extractors or plugin_postprocessors:
3286             write_debug('Plugins: %s' % [
3287                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3288                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3289         if self.params.get('compat_opts'):
3290             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3291         try:
3292             sp = Popen(
3293                 ['git', 'rev-parse', '--short', 'HEAD'],
3294                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3295                 cwd=os.path.dirname(os.path.abspath(__file__)))
3296             out, err = sp.communicate_or_kill()
3297             out = out.decode().strip()
3298             if re.match('[0-9a-f]+', out):
3299                 write_debug('Git HEAD: %s' % out)
3300         except Exception:
3301             try:
3302                 sys.exc_clear()
3303             except Exception:
3304                 pass
3305
3306         def python_implementation():
3307             impl_name = platform.python_implementation()
3308             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3309                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3310             return impl_name
3311
3312         write_debug('Python version %s (%s %s) - %s' % (
3313             platform.python_version(),
3314             python_implementation(),
3315             platform.architecture()[0],
3316             platform_name()))
3317
3318         exe_versions = FFmpegPostProcessor.get_versions(self)
3319         exe_versions['rtmpdump'] = rtmpdump_version()
3320         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3321         exe_str = ', '.join(
3322             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3323         ) or 'none'
3324         write_debug('exe versions: %s' % exe_str)
3325
3326         from .downloader.websocket import has_websockets
3327         from .postprocessor.embedthumbnail import has_mutagen
3328         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3329
3330         lib_str = ', '.join(sorted(filter(None, (
3331             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3332             has_websockets and 'websockets',
3333             has_mutagen and 'mutagen',
3334             SQLITE_AVAILABLE and 'sqlite',
3335             KEYRING_AVAILABLE and 'keyring',
3336         )))) or 'none'
3337         write_debug('Optional libraries: %s' % lib_str)
3338
3339         proxy_map = {}
3340         for handler in self._opener.handlers:
3341             if hasattr(handler, 'proxies'):
3342                 proxy_map.update(handler.proxies)
3343         write_debug(f'Proxy map: {proxy_map}')
3344
3345         # Not implemented
3346         if False and self.params.get('call_home'):
3347             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3348             write_debug('Public IP address: %s' % ipaddr)
3349             latest_version = self.urlopen(
3350                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3351             if version_tuple(latest_version) > version_tuple(__version__):
3352                 self.report_warning(
3353                     'You are using an outdated version (newest version: %s)! '
3354                     'See https://yt-dl.org/update if you need help updating.' %
3355                     latest_version)
3356
3357     def _setup_opener(self):
3358         timeout_val = self.params.get('socket_timeout')
3359         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3360
3361         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3362         opts_cookiefile = self.params.get('cookiefile')
3363         opts_proxy = self.params.get('proxy')
3364
3365         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3366
3367         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3368         if opts_proxy is not None:
3369             if opts_proxy == '':
3370                 proxies = {}
3371             else:
3372                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3373         else:
3374             proxies = compat_urllib_request.getproxies()
3375             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3376             if 'http' in proxies and 'https' not in proxies:
3377                 proxies['https'] = proxies['http']
3378         proxy_handler = PerRequestProxyHandler(proxies)
3379
3380         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3381         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3382         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3383         redirect_handler = YoutubeDLRedirectHandler()
3384         data_handler = compat_urllib_request_DataHandler()
3385
3386         # When passing our own FileHandler instance, build_opener won't add the
3387         # default FileHandler and allows us to disable the file protocol, which
3388         # can be used for malicious purposes (see
3389         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3390         file_handler = compat_urllib_request.FileHandler()
3391
3392         def file_open(*args, **kwargs):
3393             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3394         file_handler.file_open = file_open
3395
3396         opener = compat_urllib_request.build_opener(
3397             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3398
3399         # Delete the default user-agent header, which would otherwise apply in
3400         # cases where our custom HTTP handler doesn't come into play
3401         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3402         opener.addheaders = []
3403         self._opener = opener
3404
3405     def encode(self, s):
3406         if isinstance(s, bytes):
3407             return s  # Already encoded
3408
3409         try:
3410             return s.encode(self.get_encoding())
3411         except UnicodeEncodeError as err:
3412             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3413             raise
3414
3415     def get_encoding(self):
3416         encoding = self.params.get('encoding')
3417         if encoding is None:
3418             encoding = preferredencoding()
3419         return encoding
3420
3421     def _write_info_json(self, label, ie_result, infofn):
3422         ''' Write infojson and returns True = written, False = skip, None = error '''
3423         if not self.params.get('writeinfojson'):
3424             return False
3425         elif not infofn:
3426             self.write_debug(f'Skipping writing {label} infojson')
3427             return False
3428         elif not self._ensure_dir_exists(infofn):
3429             return None
3430         elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3431             self.to_screen(f'[info] {label.title()} metadata is already present')
3432         else:
3433             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3434             try:
3435                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3436             except (OSError, IOError):
3437                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3438                 return None
3439         return True
3440
3441     def _write_description(self, label, ie_result, descfn):
3442         ''' Write description and returns True = written, False = skip, None = error '''
3443         if not self.params.get('writedescription'):
3444             return False
3445         elif not descfn:
3446             self.write_debug(f'Skipping writing {label} description')
3447             return False
3448         elif not self._ensure_dir_exists(descfn):
3449             return None
3450         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3451             self.to_screen(f'[info] {label.title()} description is already present')
3452         elif ie_result.get('description') is None:
3453             self.report_warning(f'There\'s no {label} description to write')
3454             return False
3455         else:
3456             try:
3457                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3458                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3459                     descfile.write(ie_result['description'])
3460             except (OSError, IOError):
3461                 self.report_error(f'Cannot write {label} description file {descfn}')
3462                 return None
3463         return True
3464
3465     def _write_subtitles(self, info_dict, filename):
3466         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3467         ret = []
3468         subtitles = info_dict.get('requested_subtitles')
3469         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3470             # subtitles download errors are already managed as troubles in relevant IE
3471             # that way it will silently go on when used with unsupporting IE
3472             return ret
3473
3474         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3475         if not sub_filename_base:
3476             self.to_screen('[info] Skipping writing video subtitles')
3477             return ret
3478         for sub_lang, sub_info in subtitles.items():
3479             sub_format = sub_info['ext']
3480             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3481             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3482             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3483                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3484                 sub_info['filepath'] = sub_filename
3485                 ret.append((sub_filename, sub_filename_final))
3486                 continue
3487
3488             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3489             if sub_info.get('data') is not None:
3490                 try:
3491                     # Use newline='' to prevent conversion of newline characters
3492                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3493                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3494                         subfile.write(sub_info['data'])
3495                     sub_info['filepath'] = sub_filename
3496                     ret.append((sub_filename, sub_filename_final))
3497                     continue
3498                 except (OSError, IOError):
3499                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3500                     return None
3501
3502             try:
3503                 sub_copy = sub_info.copy()
3504                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3505                 self.dl(sub_filename, sub_copy, subtitle=True)
3506                 sub_info['filepath'] = sub_filename
3507                 ret.append((sub_filename, sub_filename_final))
3508             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3509                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3510                 continue
3511         return ret
3512
3513     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3514         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3515         write_all = self.params.get('write_all_thumbnails', False)
3516         thumbnails, ret = [], []
3517         if write_all or self.params.get('writethumbnail', False):
3518             thumbnails = info_dict.get('thumbnails') or []
3519         multiple = write_all and len(thumbnails) > 1
3520
3521         if thumb_filename_base is None:
3522             thumb_filename_base = filename
3523         if thumbnails and not thumb_filename_base:
3524             self.write_debug(f'Skipping writing {label} thumbnail')
3525             return ret
3526
3527         for t in thumbnails[::-1]:
3528             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3529             thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
3530             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3531             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3532
3533             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3534                 ret.append((thumb_filename, thumb_filename_final))
3535                 t['filepath'] = thumb_filename
3536                 self.to_screen(f'[info] {thumb_display_id.title()} is already present')
3537             else:
3538                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3539                 try:
3540                     uf = self.urlopen(t['url'])
3541                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3542                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3543                         shutil.copyfileobj(uf, thumbf)
3544                     ret.append((thumb_filename, thumb_filename_final))
3545                     t['filepath'] = thumb_filename
3546                 except network_exceptions as err:
3547                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3548             if ret and not write_all:
3549                 break
3550         return ret