yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     join_nonempty,
  78     LazyList,
  79     LINK_TEMPLATES,
  80     locked_file,
  81     make_dir,
  82     make_HTTPS_handler,
  83     MaxDownloadsReached,
  84     network_exceptions,
  85     number_of_digits,
  86     orderedSet,
  87     OUTTMPL_TYPES,
  88     PagedList,
  89     parse_filesize,
  90     PerRequestProxyHandler,
  91     platform_name,
  92     Popen,
  93     PostProcessingError,
  94     preferredencoding,
  95     prepend_extension,
  96     register_socks_protocols,
  97     RejectedVideoReached,
  98     render_table,
  99     replace_extension,
 100     SameFileError,
 101     sanitize_filename,
 102     sanitize_path,
 103     sanitize_url,
 104     sanitized_Request,
 105     std_headers,
 106     STR_FORMAT_RE_TMPL,
 107     STR_FORMAT_TYPES,
 108     str_or_none,
 109     strftime_or_none,
 110     subtitles_filename,
 111     supports_terminal_sequences,
 112     ThrottledDownload,
 113     to_high_limit_path,
 114     traverse_obj,
 115     try_get,
 116     UnavailableVideoError,
 117     url_basename,
 118     variadic,
 119     version_tuple,
 120     write_json_file,
 121     write_string,
 122     YoutubeDLCookieProcessor,
 123     YoutubeDLHandler,
 124     YoutubeDLRedirectHandler,
 125 )
 126 from .cache import Cache
 127 from .minicurses import format_text
 128 from .extractor import (
 129     gen_extractor_classes,
 130     get_info_extractor,
 131     _LAZY_LOADER,
 132     _PLUGIN_CLASSES as plugin_extractors
 133 )
 134 from .extractor.openload import PhantomJSwrapper
 135 from .downloader import (
 136     FFmpegFD,
 137     get_suitable_downloader,
 138     shorten_protocol_name
 139 )
 140 from .downloader.rtmp import rtmpdump_version
 141 from .postprocessor import (
 142     get_postprocessor,
 143     EmbedThumbnailPP,
 144     FFmpegFixupDurationPP,
 145     FFmpegFixupM3u8PP,
 146     FFmpegFixupM4aPP,
 147     FFmpegFixupStretchedPP,
 148     FFmpegFixupTimestampPP,
 149     FFmpegMergerPP,
 150     FFmpegPostProcessor,
 151     MoveFilesAfterDownloadPP,
 152     _PLUGIN_CLASSES as plugin_postprocessors
 153 )
 154 from .update import detect_variant
 155 from .version import __version__
 156
 157 if compat_os_name == 'nt':
 158     import ctypes
 159
 160
 161 class YoutubeDL(object):
 162     """YoutubeDL class.
 163
 164     YoutubeDL objects are the ones responsible of downloading the
 165     actual video file and writing it to disk if the user has requested
 166     it, among some other tasks. In most cases there should be one per
 167     program. As, given a video URL, the downloader doesn't know how to
 168     extract all the needed information, task that InfoExtractors do, it
 169     has to pass the URL to one of them.
 170
 171     For this, YoutubeDL objects have a method that allows
 172     InfoExtractors to be registered in a given order. When it is passed
 173     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 174     finds that reports being able to handle it. The InfoExtractor extracts
 175     all the information about the video or videos the URL refers to, and
 176     YoutubeDL process the extracted information, possibly using a File
 177     Downloader to download the video.
 178
 179     YoutubeDL objects accept a lot of parameters. In order not to saturate
 180     the object constructor with arguments, it receives a dictionary of
 181     options instead. These options are available through the params
 182     attribute for the InfoExtractors to use. The YoutubeDL also
 183     registers itself as the downloader in charge for the InfoExtractors
 184     that are added to it, so this is a "mutual registration".
 185
 186     Available options:
 187
 188     username:          Username for authentication purposes.
 189     password:          Password for authentication purposes.
 190     videopassword:     Password for accessing a video.
 191     ap_mso:            Adobe Pass multiple-system operator identifier.
 192     ap_username:       Multiple-system operator account username.
 193     ap_password:       Multiple-system operator account password.
 194     usenetrc:          Use netrc for authentication instead.
 195     verbose:           Print additional info to stdout.
 196     quiet:             Do not print messages to stdout.
 197     no_warnings:       Do not print out anything for warnings.
 198     forceprint:        A list of templates to force print
 199     forceurl:          Force printing final URL. (Deprecated)
 200     forcetitle:        Force printing title. (Deprecated)
 201     forceid:           Force printing ID. (Deprecated)
 202     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 203     forcedescription:  Force printing description. (Deprecated)
 204     forcefilename:     Force printing final filename. (Deprecated)
 205     forceduration:     Force printing duration. (Deprecated)
 206     forcejson:         Force printing info_dict as JSON.
 207     dump_single_json:  Force printing the info_dict of the whole playlist
 208                        (or video) as a single JSON line.
 209     force_write_download_archive: Force writing download archive regardless
 210                        of 'skip_download' or 'simulate'.
 211     simulate:          Do not download the video files. If unset (or None),
 212                        simulate only if listsubtitles, listformats or list_thumbnails is used
 213     format:            Video format code. see "FORMAT SELECTION" for more details.
 214                        You can also pass a function. The function takes 'ctx' as
 215                        argument and returns the formats to download.
 216                        See "build_format_selector" for an implementation
 217     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 218     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 219                        extracting metadata even if the video is not actually
 220                        available for download (experimental)
 221     format_sort:       A list of fields by which to sort the video formats.
 222                        See "Sorting Formats" for more details.
 223     format_sort_force: Force the given format_sort. see "Sorting Formats"
 224                        for more details.
 225     allow_multiple_video_streams:   Allow multiple video streams to be merged
 226                        into a single file
 227     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 228                        into a single file
 229     check_formats      Whether to test if the formats are downloadable.
 230                        Can be True (check all), False (check none),
 231                        'selected' (check selected formats),
 232                        or None (check only if requested by extractor)
 233     paths:             Dictionary of output paths. The allowed keys are 'home'
 234                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 235     outtmpl:           Dictionary of templates for output names. Allowed keys
 236                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 237                        For compatibility with youtube-dl, a single string can also be used
 238     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 239     restrictfilenames: Do not allow "&" and spaces in file names
 240     trim_file_name:    Limit length of filename (extension excluded)
 241     windowsfilenames:  Force the filenames to be windows compatible
 242     ignoreerrors:      Do not stop on download/postprocessing errors.
 243                        Can be 'only_download' to ignore only download errors.
 244                        Default is 'only_download' for CLI, but False for API
 245     skip_playlist_after_errors: Number of allowed failures until the rest of
 246                        the playlist is skipped
 247     force_generic_extractor: Force downloader to use the generic extractor
 248     overwrites:        Overwrite all video and metadata files if True,
 249                        overwrite only non-video files if None
 250                        and don't overwrite any file if False
 251                        For compatibility with youtube-dl,
 252                        "nooverwrites" may also be used instead
 253     playliststart:     Playlist item to start at.
 254     playlistend:       Playlist item to end at.
 255     playlist_items:    Specific indices of playlist to download.
 256     playlistreverse:   Download playlist items in reverse order.
 257     playlistrandom:    Download playlist items in random order.
 258     matchtitle:        Download only matching titles.
 259     rejecttitle:       Reject downloads for matching titles.
 260     logger:            Log messages to a logging.Logger instance.
 261     logtostderr:       Log messages to stderr instead of stdout.
 262     consoletitle:       Display progress in console window's titlebar.
 263     writedescription:  Write the video description to a .description file
 264     writeinfojson:     Write the video description to a .info.json file
 265     clean_infojson:    Remove private fields from the infojson
 266     getcomments:       Extract video comments. This will not be written to disk
 267                        unless writeinfojson is also given
 268     writeannotations:  Write the video annotations to a .annotations.xml file
 269     writethumbnail:    Write the thumbnail image to a file
 270     allow_playlist_files: Whether to write playlists' description, infojson etc
 271                        also to disk when using the 'write*' options
 272     write_all_thumbnails:  Write all thumbnail formats to files
 273     writelink:         Write an internet shortcut file, depending on the
 274                        current platform (.url/.webloc/.desktop)
 275     writeurllink:      Write a Windows internet shortcut file (.url)
 276     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 277     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 278     writesubtitles:    Write the video subtitles to a file
 279     writeautomaticsub: Write the automatically generated subtitles to a file
 280     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 281                        Downloads all the subtitles of the video
 282                        (requires writesubtitles or writeautomaticsub)
 283     listsubtitles:     Lists all available subtitles for the video
 284     subtitlesformat:   The format code for subtitles
 285     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 286                        The list may contain "all" to refer to all the available
 287                        subtitles. The language can be prefixed with a "-" to
 288                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 289     keepvideo:         Keep the video file after post-processing
 290     daterange:         A DateRange object, download only if the upload_date is in the range.
 291     skip_download:     Skip the actual download of the video file
 292     cachedir:          Location of the cache files in the filesystem.
 293                        False to disable filesystem cache.
 294     noplaylist:        Download single video instead of a playlist if in doubt.
 295     age_limit:         An integer representing the user's age in years.
 296                        Unsuitable videos for the given age are skipped.
 297     min_views:         An integer representing the minimum view count the video
 298                        must have in order to not be skipped.
 299                        Videos without view count information are always
 300                        downloaded. None for no limit.
 301     max_views:         An integer representing the maximum view count.
 302                        Videos that are more popular than that are not
 303                        downloaded.
 304                        Videos without view count information are always
 305                        downloaded. None for no limit.
 306     download_archive:  File name of a file where all downloads are recorded.
 307                        Videos already present in the file are not downloaded
 308                        again.
 309     break_on_existing: Stop the download process after attempting to download a
 310                        file that is in the archive.
 311     break_on_reject:   Stop the download process when encountering a video that
 312                        has been filtered out.
 313     cookiefile:        File name where cookies should be read from and dumped to
 314     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 315                        name/path from where cookies are loaded.
 316                        Eg: ('chrome', ) or ('vivaldi', 'default')
 317     nocheckcertificate:Do not verify SSL certificates
 318     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 319                        At the moment, this is only supported by YouTube.
 320     proxy:             URL of the proxy server to use
 321     geo_verification_proxy:  URL of the proxy to use for IP address verification
 322                        on geo-restricted sites.
 323     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 324     bidi_workaround:   Work around buggy terminals without bidirectional text
 325                        support, using fridibi
 326     debug_printtraffic:Print out sent and received HTTP traffic
 327     include_ads:       Download ads as well
 328     default_search:    Prepend this string if an input url is not valid.
 329                        'auto' for elaborate guessing
 330     encoding:          Use this encoding instead of the system-specified.
 331     extract_flat:      Do not resolve URLs, return the immediate result.
 332                        Pass in 'in_playlist' to only show this behavior for
 333                        playlist items.
 334     postprocessors:    A list of dictionaries, each with an entry
 335                        * key:  The name of the postprocessor. See
 336                                yt_dlp/postprocessor/__init__.py for a list.
 337                        * when: When to run the postprocessor. Can be one of
 338                                pre_process|before_dl|post_process|after_move.
 339                                Assumed to be 'post_process' if not given
 340     post_hooks:        Deprecated - Register a custom postprocessor instead
 341                        A list of functions that get called as the final step
 342                        for each video file, after all postprocessors have been
 343                        called. The filename will be passed as the only argument.
 344     progress_hooks:    A list of functions that get called on download
 345                        progress, with a dictionary with the entries
 346                        * status: One of "downloading", "error", or "finished".
 347                                  Check this first and ignore unknown values.
 348                        * info_dict: The extracted info_dict
 349
 350                        If status is one of "downloading", or "finished", the
 351                        following properties may also be present:
 352                        * filename: The final filename (always present)
 353                        * tmpfilename: The filename we're currently writing to
 354                        * downloaded_bytes: Bytes on disk
 355                        * total_bytes: Size of the whole file, None if unknown
 356                        * total_bytes_estimate: Guess of the eventual file size,
 357                                                None if unavailable.
 358                        * elapsed: The number of seconds since download started.
 359                        * eta: The estimated time in seconds, None if unknown
 360                        * speed: The download speed in bytes/second, None if
 361                                 unknown
 362                        * fragment_index: The counter of the currently
 363                                          downloaded video fragment.
 364                        * fragment_count: The number of fragments (= individual
 365                                          files that will be merged)
 366
 367                        Progress hooks are guaranteed to be called at least once
 368                        (with status "finished") if the download is successful.
 369     postprocessor_hooks:  A list of functions that get called on postprocessing
 370                        progress, with a dictionary with the entries
 371                        * status: One of "started", "processing", or "finished".
 372                                  Check this first and ignore unknown values.
 373                        * postprocessor: Name of the postprocessor
 374                        * info_dict: The extracted info_dict
 375
 376                        Progress hooks are guaranteed to be called at least twice
 377                        (with status "started" and "finished") if the processing is successful.
 378     merge_output_format: Extension to use when merging formats.
 379     final_ext:         Expected final extension; used to detect when the file was
 380                        already downloaded and converted
 381     fixup:             Automatically correct known faults of the file.
 382                        One of:
 383                        - "never": do nothing
 384                        - "warn": only emit a warning
 385                        - "detect_or_warn": check whether we can do anything
 386                                            about it, warn otherwise (default)
 387     source_address:    Client-side IP address to bind to.
 388     call_home:         Boolean, true iff we are allowed to contact the
 389                        yt-dlp servers for debugging. (BROKEN)
 390     sleep_interval_requests: Number of seconds to sleep between requests
 391                        during extraction
 392     sleep_interval:    Number of seconds to sleep before each download when
 393                        used alone or a lower bound of a range for randomized
 394                        sleep before each download (minimum possible number
 395                        of seconds to sleep) when used along with
 396                        max_sleep_interval.
 397     max_sleep_interval:Upper bound of a range for randomized sleep before each
 398                        download (maximum possible number of seconds to sleep).
 399                        Must only be used along with sleep_interval.
 400                        Actual sleep time will be a random float from range
 401                        [sleep_interval; max_sleep_interval].
 402     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 403     listformats:       Print an overview of available video formats and exit.
 404     list_thumbnails:   Print a table of all thumbnails and exit.
 405     match_filter:      A function that gets called with the info_dict of
 406                        every video.
 407                        If it returns a message, the video is ignored.
 408                        If it returns None, the video is downloaded.
 409                        match_filter_func in utils.py is one example for this.
 410     no_color:          Do not emit color codes in output.
 411     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 412                        HTTP header
 413     geo_bypass_country:
 414                        Two-letter ISO 3166-2 country code that will be used for
 415                        explicit geographic restriction bypassing via faking
 416                        X-Forwarded-For HTTP header
 417     geo_bypass_ip_block:
 418                        IP range in CIDR notation that will be used similarly to
 419                        geo_bypass_country
 420
 421     The following options determine which downloader is picked:
 422     external_downloader: A dictionary of protocol keys and the executable of the
 423                        external downloader to use for it. The allowed protocols
 424                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 425                        Set the value to 'native' to use the native downloader
 426     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 427                        or {'m3u8': 'ffmpeg'} instead.
 428                        Use the native HLS downloader instead of ffmpeg/avconv
 429                        if True, otherwise use ffmpeg/avconv if False, otherwise
 430                        use downloader suggested by extractor if None.
 431     compat_opts:       Compatibility options. See "Differences in default behavior".
 432                        The following options do not work when used through the API:
 433                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 434                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 435                        Refer __init__.py for their implementation
 436     progress_template: Dictionary of templates for progress outputs.
 437                        Allowed keys are 'download', 'postprocess',
 438                        'download-title' (console title) and 'postprocess-title'.
 439                        The template is mapped on a dictionary with keys 'progress' and 'info'
 440
 441     The following parameters are not used by YoutubeDL itself, they are used by
 442     the downloader (see yt_dlp/downloader/common.py):
 443     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 444     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 445     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 446     external_downloader_args, concurrent_fragment_downloads.
 447
 448     The following options are used by the post processors:
 449     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 450                        otherwise prefer ffmpeg. (avconv support is deprecated)
 451     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 452                        to the binary or its containing directory.
 453     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 454                        and a list of additional command-line arguments for the
 455                        postprocessor/executable. The dict can also have "PP+EXE" keys
 456                        which are used when the given exe is used by the given PP.
 457                        Use 'default' as the name for arguments to passed to all PP
 458                        For compatibility with youtube-dl, a single list of args
 459                        can also be used
 460
 461     The following options are used by the extractors:
 462     extractor_retries: Number of times to retry for known errors
 463     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 464     hls_split_discontinuity: Split HLS playlists to different formats at
 465                        discontinuities such as ad breaks (default: False)
 466     extractor_args:    A dictionary of arguments to be passed to the extractors.
 467                        See "EXTRACTOR ARGUMENTS" for details.
 468                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 469     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 470                        If True (default), DASH manifests and related
 471                        data will be downloaded and processed by extractor.
 472                        You can reduce network I/O by disabling it if you don't
 473                        care about DASH. (only for youtube)
 474     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 475                        If True (default), HLS manifests and related
 476                        data will be downloaded and processed by extractor.
 477                        You can reduce network I/O by disabling it if you don't
 478                        care about HLS. (only for youtube)
 479     """
 480
 481     _NUMERIC_FIELDS = set((
 482         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 483         'timestamp', 'release_timestamp',
 484         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 485         'average_rating', 'comment_count', 'age_limit',
 486         'start_time', 'end_time',
 487         'chapter_number', 'season_number', 'episode_number',
 488         'track_number', 'disc_number', 'release_year',
 489     ))
 490
 491     _format_selection_exts = {
 492         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 493         'video': {'mp4', 'flv', 'webm', '3gp'},
 494         'storyboards': {'mhtml'},
 495     }
 496
 497     params = None
 498     _ies = {}
 499     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 500     _printed_messages = set()
 501     _first_webpage_request = True
 502     _download_retcode = None
 503     _num_downloads = None
 504     _playlist_level = 0
 505     _playlist_urls = set()
 506     _screen_file = None
 507
 508     def __init__(self, params=None, auto_init=True):
 509         """Create a FileDownloader object with the given options.
 510         @param auto_init    Whether to load the default extractors and print header (if verbose).
 511                             Set to 'no_verbose_header' to not print the header
 512         """
 513         if params is None:
 514             params = {}
 515         self._ies = {}
 516         self._ies_instances = {}
 517         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 518         self._printed_messages = set()
 519         self._first_webpage_request = True
 520         self._post_hooks = []
 521         self._progress_hooks = []
 522         self._postprocessor_hooks = []
 523         self._download_retcode = 0
 524         self._num_downloads = 0
 525         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 526         self._err_file = sys.stderr
 527         self.params = params
 528         self.cache = Cache(self)
 529
 530         windows_enable_vt_mode()
 531         self._allow_colors = {
 532             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 533             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 534         }
 535
 536         if sys.version_info < (3, 6):
 537             self.report_warning(
 538                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 539
 540         if self.params.get('allow_unplayable_formats'):
 541             self.report_warning(
 542                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 543                 'This is a developer option intended for debugging. \n'
 544                 '         If you experience any issues while using this option, '
 545                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 546
 547         def check_deprecated(param, option, suggestion):
 548             if self.params.get(param) is not None:
 549                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 550                 return True
 551             return False
 552
 553         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 554             if self.params.get('geo_verification_proxy') is None:
 555                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 556
 557         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 558         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 559         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 560
 561         for msg in self.params.get('_warnings', []):
 562             self.report_warning(msg)
 563
 564         if 'list-formats' in self.params.get('compat_opts', []):
 565             self.params['listformats_table'] = False
 566
 567         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 568             # nooverwrites was unnecessarily changed to overwrites
 569             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 570             # This ensures compatibility with both keys
 571             self.params['overwrites'] = not self.params['nooverwrites']
 572         elif self.params.get('overwrites') is None:
 573             self.params.pop('overwrites', None)
 574         else:
 575             self.params['nooverwrites'] = not self.params['overwrites']
 576
 577         if params.get('bidi_workaround', False):
 578             try:
 579                 import pty
 580                 master, slave = pty.openpty()
 581                 width = compat_get_terminal_size().columns
 582                 if width is None:
 583                     width_args = []
 584                 else:
 585                     width_args = ['-w', str(width)]
 586                 sp_kwargs = dict(
 587                     stdin=subprocess.PIPE,
 588                     stdout=slave,
 589                     stderr=self._err_file)
 590                 try:
 591                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 592                 except OSError:
 593                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 594                 self._output_channel = os.fdopen(master, 'rb')
 595             except OSError as ose:
 596                 if ose.errno == errno.ENOENT:
 597                     self.report_warning(
 598                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 599                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 600                 else:
 601                     raise
 602
 603         if (sys.platform != 'win32'
 604                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 605                 and not params.get('restrictfilenames', False)):
 606             # Unicode filesystem API will throw errors (#1474, #13027)
 607             self.report_warning(
 608                 'Assuming --restrict-filenames since file system encoding '
 609                 'cannot encode all characters. '
 610                 'Set the LC_ALL environment variable to fix this.')
 611             self.params['restrictfilenames'] = True
 612
 613         self.outtmpl_dict = self.parse_outtmpl()
 614
 615         # Creating format selector here allows us to catch syntax errors before the extraction
 616         self.format_selector = (
 617             None if self.params.get('format') is None
 618             else self.params['format'] if callable(self.params['format'])
 619             else self.build_format_selector(self.params['format']))
 620
 621         self._setup_opener()
 622
 623         if auto_init:
 624             if auto_init != 'no_verbose_header':
 625                 self.print_debug_header()
 626             self.add_default_info_extractors()
 627
 628         for pp_def_raw in self.params.get('postprocessors', []):
 629             pp_def = dict(pp_def_raw)
 630             when = pp_def.pop('when', 'post_process')
 631             pp_class = get_postprocessor(pp_def.pop('key'))
 632             pp = pp_class(self, **compat_kwargs(pp_def))
 633             self.add_post_processor(pp, when=when)
 634
 635         hooks = {
 636             'post_hooks': self.add_post_hook,
 637             'progress_hooks': self.add_progress_hook,
 638             'postprocessor_hooks': self.add_postprocessor_hook,
 639         }
 640         for opt, fn in hooks.items():
 641             for ph in self.params.get(opt, []):
 642                 fn(ph)
 643
 644         register_socks_protocols()
 645
 646         def preload_download_archive(fn):
 647             """Preload the archive, if any is specified"""
 648             if fn is None:
 649                 return False
 650             self.write_debug(f'Loading archive file {fn!r}')
 651             try:
 652                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 653                     for line in archive_file:
 654                         self.archive.add(line.strip())
 655             except IOError as ioe:
 656                 if ioe.errno != errno.ENOENT:
 657                     raise
 658                 return False
 659             return True
 660
 661         self.archive = set()
 662         preload_download_archive(self.params.get('download_archive'))
 663
 664     def warn_if_short_id(self, argv):
 665         # short YouTube ID starting with dash?
 666         idxs = [
 667             i for i, a in enumerate(argv)
 668             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 669         if idxs:
 670             correct_argv = (
 671                 ['yt-dlp']
 672                 + [a for i, a in enumerate(argv) if i not in idxs]
 673                 + ['--'] + [argv[i] for i in idxs]
 674             )
 675             self.report_warning(
 676                 'Long argument string detected. '
 677                 'Use -- to separate parameters and URLs, like this:\n%s' %
 678                 args_to_str(correct_argv))
 679
 680     def add_info_extractor(self, ie):
 681         """Add an InfoExtractor object to the end of the list."""
 682         ie_key = ie.ie_key()
 683         self._ies[ie_key] = ie
 684         if not isinstance(ie, type):
 685             self._ies_instances[ie_key] = ie
 686             ie.set_downloader(self)
 687
 688     def _get_info_extractor_class(self, ie_key):
 689         ie = self._ies.get(ie_key)
 690         if ie is None:
 691             ie = get_info_extractor(ie_key)
 692             self.add_info_extractor(ie)
 693         return ie
 694
 695     def get_info_extractor(self, ie_key):
 696         """
 697         Get an instance of an IE with name ie_key, it will try to get one from
 698         the _ies list, if there's no instance it will create a new one and add
 699         it to the extractor list.
 700         """
 701         ie = self._ies_instances.get(ie_key)
 702         if ie is None:
 703             ie = get_info_extractor(ie_key)()
 704             self.add_info_extractor(ie)
 705         return ie
 706
 707     def add_default_info_extractors(self):
 708         """
 709         Add the InfoExtractors returned by gen_extractors to the end of the list
 710         """
 711         for ie in gen_extractor_classes():
 712             self.add_info_extractor(ie)
 713
 714     def add_post_processor(self, pp, when='post_process'):
 715         """Add a PostProcessor object to the end of the chain."""
 716         self._pps[when].append(pp)
 717         pp.set_downloader(self)
 718
 719     def add_post_hook(self, ph):
 720         """Add the post hook"""
 721         self._post_hooks.append(ph)
 722
 723     def add_progress_hook(self, ph):
 724         """Add the download progress hook"""
 725         self._progress_hooks.append(ph)
 726
 727     def add_postprocessor_hook(self, ph):
 728         """Add the postprocessing progress hook"""
 729         self._postprocessor_hooks.append(ph)
 730
 731     def _bidi_workaround(self, message):
 732         if not hasattr(self, '_output_channel'):
 733             return message
 734
 735         assert hasattr(self, '_output_process')
 736         assert isinstance(message, compat_str)
 737         line_count = message.count('\n') + 1
 738         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 739         self._output_process.stdin.flush()
 740         res = ''.join(self._output_channel.readline().decode('utf-8')
 741                       for _ in range(line_count))
 742         return res[:-len('\n')]
 743
 744     def _write_string(self, message, out=None, only_once=False):
 745         if only_once:
 746             if message in self._printed_messages:
 747                 return
 748             self._printed_messages.add(message)
 749         write_string(message, out=out, encoding=self.params.get('encoding'))
 750
 751     def to_stdout(self, message, skip_eol=False, quiet=False):
 752         """Print message to stdout"""
 753         if self.params.get('logger'):
 754             self.params['logger'].debug(message)
 755         elif not quiet or self.params.get('verbose'):
 756             self._write_string(
 757                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 758                 self._err_file if quiet else self._screen_file)
 759
 760     def to_stderr(self, message, only_once=False):
 761         """Print message to stderr"""
 762         assert isinstance(message, compat_str)
 763         if self.params.get('logger'):
 764             self.params['logger'].error(message)
 765         else:
 766             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 767
 768     def to_console_title(self, message):
 769         if not self.params.get('consoletitle', False):
 770             return
 771         if compat_os_name == 'nt':
 772             if ctypes.windll.kernel32.GetConsoleWindow():
 773                 # c_wchar_p() might not be necessary if `message` is
 774                 # already of type unicode()
 775                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 776         elif 'TERM' in os.environ:
 777             self._write_string('\033]0;%s\007' % message, self._screen_file)
 778
 779     def save_console_title(self):
 780         if not self.params.get('consoletitle', False):
 781             return
 782         if self.params.get('simulate'):
 783             return
 784         if compat_os_name != 'nt' and 'TERM' in os.environ:
 785             # Save the title on stack
 786             self._write_string('\033[22;0t', self._screen_file)
 787
 788     def restore_console_title(self):
 789         if not self.params.get('consoletitle', False):
 790             return
 791         if self.params.get('simulate'):
 792             return
 793         if compat_os_name != 'nt' and 'TERM' in os.environ:
 794             # Restore the title from stack
 795             self._write_string('\033[23;0t', self._screen_file)
 796
 797     def __enter__(self):
 798         self.save_console_title()
 799         return self
 800
 801     def __exit__(self, *args):
 802         self.restore_console_title()
 803
 804         if self.params.get('cookiefile') is not None:
 805             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 806
 807     def trouble(self, message=None, tb=None):
 808         """Determine action to take when a download problem appears.
 809
 810         Depending on if the downloader has been configured to ignore
 811         download errors or not, this method may throw an exception or
 812         not when errors are found, after printing the message.
 813
 814         tb, if given, is additional traceback information.
 815         """
 816         if message is not None:
 817             self.to_stderr(message)
 818         if self.params.get('verbose'):
 819             if tb is None:
 820                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 821                     tb = ''
 822                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 823                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 824                     tb += encode_compat_str(traceback.format_exc())
 825                 else:
 826                     tb_data = traceback.format_list(traceback.extract_stack())
 827                     tb = ''.join(tb_data)
 828             if tb:
 829                 self.to_stderr(tb)
 830         if not self.params.get('ignoreerrors'):
 831             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 832                 exc_info = sys.exc_info()[1].exc_info
 833             else:
 834                 exc_info = sys.exc_info()
 835             raise DownloadError(message, exc_info)
 836         self._download_retcode = 1
 837
 838     def to_screen(self, message, skip_eol=False):
 839         """Print message to stdout if not in quiet mode"""
 840         self.to_stdout(
 841             message, skip_eol, quiet=self.params.get('quiet', False))
 842
 843     class Styles(Enum):
 844         HEADERS = 'yellow'
 845         EMPHASIS = 'blue'
 846         ID = 'green'
 847         DELIM = 'blue'
 848         ERROR = 'red'
 849         WARNING = 'yellow'
 850
 851     def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
 852         assert out in ('screen', 'err')
 853         if test_encoding:
 854             original_text = text
 855             handle = self._screen_file if out == 'screen' else self._err_file
 856             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 857             text = text.encode(encoding, 'ignore').decode(encoding)
 858             if fallback is not None and text != original_text:
 859                 text = fallback
 860         if isinstance(f, self.Styles):
 861             f = f._value_
 862         return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
 863
 864     def _format_screen(self, *args, **kwargs):
 865         return self.__format_text('screen', *args, **kwargs)
 866
 867     def _format_err(self, *args, **kwargs):
 868         return self.__format_text('err', *args, **kwargs)
 869
 870     def report_warning(self, message, only_once=False):
 871         '''
 872         Print the message to stderr, it will be prefixed with 'WARNING:'
 873         If stderr is a tty file the 'WARNING:' will be colored
 874         '''
 875         if self.params.get('logger') is not None:
 876             self.params['logger'].warning(message)
 877         else:
 878             if self.params.get('no_warnings'):
 879                 return
 880             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 881
 882     def report_error(self, message, tb=None):
 883         '''
 884         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 885         in red if stderr is a tty file.
 886         '''
 887         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 888
 889     def write_debug(self, message, only_once=False):
 890         '''Log debug message or Print message to stderr'''
 891         if not self.params.get('verbose', False):
 892             return
 893         message = '[debug] %s' % message
 894         if self.params.get('logger'):
 895             self.params['logger'].debug(message)
 896         else:
 897             self.to_stderr(message, only_once)
 898
 899     def report_file_already_downloaded(self, file_name):
 900         """Report file has already been fully downloaded."""
 901         try:
 902             self.to_screen('[download] %s has already been downloaded' % file_name)
 903         except UnicodeEncodeError:
 904             self.to_screen('[download] The file has already been downloaded')
 905
 906     def report_file_delete(self, file_name):
 907         """Report that existing file will be deleted."""
 908         try:
 909             self.to_screen('Deleting existing file %s' % file_name)
 910         except UnicodeEncodeError:
 911             self.to_screen('Deleting existing file')
 912
 913     def raise_no_formats(self, info, forced=False):
 914         has_drm = info.get('__has_drm')
 915         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 916         expected = self.params.get('ignore_no_formats_error')
 917         if forced or not expected:
 918             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 919                                  expected=has_drm or expected)
 920         else:
 921             self.report_warning(msg)
 922
 923     def parse_outtmpl(self):
 924         outtmpl_dict = self.params.get('outtmpl', {})
 925         if not isinstance(outtmpl_dict, dict):
 926             outtmpl_dict = {'default': outtmpl_dict}
 927         # Remove spaces in the default template
 928         if self.params.get('restrictfilenames'):
 929             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 930         else:
 931             sanitize = lambda x: x
 932         outtmpl_dict.update({
 933             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 934             if outtmpl_dict.get(k) is None})
 935         for key, val in outtmpl_dict.items():
 936             if isinstance(val, bytes):
 937                 self.report_warning(
 938                     'Parameter outtmpl is bytes, but should be a unicode string. '
 939                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 940         return outtmpl_dict
 941
 942     def get_output_path(self, dir_type='', filename=None):
 943         paths = self.params.get('paths', {})
 944         assert isinstance(paths, dict)
 945         path = os.path.join(
 946             expand_path(paths.get('home', '').strip()),
 947             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 948             filename or '')
 949
 950         # Temporary fix for #4787
 951         # 'Treat' all problem characters by passing filename through preferredencoding
 952         # to workaround encoding issues with subprocess on python2 @ Windows
 953         if sys.version_info < (3, 0) and sys.platform == 'win32':
 954             path = encodeFilename(path, True).decode(preferredencoding())
 955         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 956
 957     @staticmethod
 958     def _outtmpl_expandpath(outtmpl):
 959         # expand_path translates '%%' into '%' and '$$' into '$'
 960         # correspondingly that is not what we want since we need to keep
 961         # '%%' intact for template dict substitution step. Working around
 962         # with boundary-alike separator hack.
 963         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 964         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 965
 966         # outtmpl should be expand_path'ed before template dict substitution
 967         # because meta fields may contain env variables we don't want to
 968         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 969         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 970         return expand_path(outtmpl).replace(sep, '')
 971
 972     @staticmethod
 973     def escape_outtmpl(outtmpl):
 974         ''' Escape any remaining strings like %s, %abc% etc. '''
 975         return re.sub(
 976             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 977             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 978             outtmpl)
 979
 980     @classmethod
 981     def validate_outtmpl(cls, outtmpl):
 982         ''' @return None or Exception object '''
 983         outtmpl = re.sub(
 984             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 985             lambda mobj: f'{mobj.group(0)[:-1]}s',
 986             cls._outtmpl_expandpath(outtmpl))
 987         try:
 988             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 989             return None
 990         except ValueError as err:
 991             return err
 992
 993     @staticmethod
 994     def _copy_infodict(info_dict):
 995         info_dict = dict(info_dict)
 996         for key in ('__original_infodict', '__postprocessors'):
 997             info_dict.pop(key, None)
 998         return info_dict
 999
1000     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
1001         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
1002         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1003
1004         info_dict = self._copy_infodict(info_dict)
1005         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1006             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1007             if info_dict.get('duration', None) is not None
1008             else None)
1009         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1010         if info_dict.get('resolution') is None:
1011             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1012
1013         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1014         # of %(field)s to %(field)0Nd for backward compatibility
1015         field_size_compat_map = {
1016             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1017             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1018             'autonumber': self.params.get('autonumber_size') or 5,
1019         }
1020
1021         TMPL_DICT = {}
1022         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1023         MATH_FUNCTIONS = {
1024             '+': float.__add__,
1025             '-': float.__sub__,
1026         }
1027         # Field is of the form key1.key2...
1028         # where keys (except first) can be string, int or slice
1029         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1030         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1031         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1032         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1033             (?P<negate>-)?
1034             (?P<fields>{field})
1035             (?P<maths>(?:{math_op}{math_field})*)
1036             (?:>(?P<strf_format>.+?))?
1037             (?P<alternate>(?<!\\),[^|)]+)?
1038             (?:\|(?P<default>.*?))?
1039             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1040
1041         def _traverse_infodict(k):
1042             k = k.split('.')
1043             if k[0] == '':
1044                 k.pop(0)
1045             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1046
1047         def get_value(mdict):
1048             # Object traversal
1049             value = _traverse_infodict(mdict['fields'])
1050             # Negative
1051             if mdict['negate']:
1052                 value = float_or_none(value)
1053                 if value is not None:
1054                     value *= -1
1055             # Do maths
1056             offset_key = mdict['maths']
1057             if offset_key:
1058                 value = float_or_none(value)
1059                 operator = None
1060                 while offset_key:
1061                     item = re.match(
1062                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1063                         offset_key).group(0)
1064                     offset_key = offset_key[len(item):]
1065                     if operator is None:
1066                         operator = MATH_FUNCTIONS[item]
1067                         continue
1068                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1069                     offset = float_or_none(item)
1070                     if offset is None:
1071                         offset = float_or_none(_traverse_infodict(item))
1072                     try:
1073                         value = operator(value, multiplier * offset)
1074                     except (TypeError, ZeroDivisionError):
1075                         return None
1076                     operator = None
1077             # Datetime formatting
1078             if mdict['strf_format']:
1079                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1080
1081             return value
1082
1083         na = self.params.get('outtmpl_na_placeholder', 'NA')
1084
1085         def _dumpjson_default(obj):
1086             if isinstance(obj, (set, LazyList)):
1087                 return list(obj)
1088             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1089
1090         def create_key(outer_mobj):
1091             if not outer_mobj.group('has_key'):
1092                 return outer_mobj.group(0)
1093             key = outer_mobj.group('key')
1094             mobj = re.match(INTERNAL_FORMAT_RE, key)
1095             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1096             value, default = None, na
1097             while mobj:
1098                 mobj = mobj.groupdict()
1099                 default = mobj['default'] if mobj['default'] is not None else default
1100                 value = get_value(mobj)
1101                 if value is None and mobj['alternate']:
1102                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1103                 else:
1104                     break
1105
1106             fmt = outer_mobj.group('format')
1107             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1108                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1109
1110             value = default if value is None else value
1111
1112             flags = outer_mobj.group('conversion') or ''
1113             str_fmt = f'{fmt[:-1]}s'
1114             if fmt[-1] == 'l':  # list
1115                 delim = '\n' if '#' in flags else ', '
1116                 value, fmt = delim.join(variadic(value)), str_fmt
1117             elif fmt[-1] == 'j':  # json
1118                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1119             elif fmt[-1] == 'q':  # quoted
1120                 value = map(str, variadic(value) if '#' in flags else [value])
1121                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1122             elif fmt[-1] == 'B':  # bytes
1123                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1124                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1125             elif fmt[-1] == 'U':  # unicode normalized
1126                 value, fmt = unicodedata.normalize(
1127                     # "+" = compatibility equivalence, "#" = NFD
1128                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1129                     value), str_fmt
1130             elif fmt[-1] == 'c':
1131                 if value:
1132                     value = str(value)[0]
1133                 else:
1134                     fmt = str_fmt
1135             elif fmt[-1] not in 'rs':  # numeric
1136                 value = float_or_none(value)
1137                 if value is None:
1138                     value, fmt = default, 's'
1139
1140             if sanitize:
1141                 if fmt[-1] == 'r':
1142                     # If value is an object, sanitize might convert it to a string
1143                     # So we convert it to repr first
1144                     value, fmt = repr(value), str_fmt
1145                 if fmt[-1] in 'csr':
1146                     value = sanitize(initial_field, value)
1147
1148             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1149             TMPL_DICT[key] = value
1150             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1151
1152         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1153
1154     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1155         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1156         return self.escape_outtmpl(outtmpl) % info_dict
1157
1158     def _prepare_filename(self, info_dict, tmpl_type='default'):
1159         try:
1160             sanitize = lambda k, v: sanitize_filename(
1161                 compat_str(v),
1162                 restricted=self.params.get('restrictfilenames'),
1163                 is_id=(k == 'id' or k.endswith('_id')))
1164             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1165             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1166
1167             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1168             if filename and force_ext is not None:
1169                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1170
1171             # https://github.com/blackjack4494/youtube-dlc/issues/85
1172             trim_file_name = self.params.get('trim_file_name', False)
1173             if trim_file_name:
1174                 fn_groups = filename.rsplit('.')
1175                 ext = fn_groups[-1]
1176                 sub_ext = ''
1177                 if len(fn_groups) > 2:
1178                     sub_ext = fn_groups[-2]
1179                 filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')
1180
1181             return filename
1182         except ValueError as err:
1183             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1184             return None
1185
1186     def prepare_filename(self, info_dict, dir_type='', warn=False):
1187         """Generate the output filename."""
1188
1189         filename = self._prepare_filename(info_dict, dir_type or 'default')
1190         if not filename and dir_type not in ('', 'temp'):
1191             return ''
1192
1193         if warn:
1194             if not self.params.get('paths'):
1195                 pass
1196             elif filename == '-':
1197                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1198             elif os.path.isabs(filename):
1199                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1200         if filename == '-' or not filename:
1201             return filename
1202
1203         return self.get_output_path(dir_type, filename)
1204
1205     def _match_entry(self, info_dict, incomplete=False, silent=False):
1206         """ Returns None if the file should be downloaded """
1207
1208         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1209
1210         def check_filter():
1211             if 'title' in info_dict:
1212                 # This can happen when we're just evaluating the playlist
1213                 title = info_dict['title']
1214                 matchtitle = self.params.get('matchtitle', False)
1215                 if matchtitle:
1216                     if not re.search(matchtitle, title, re.IGNORECASE):
1217                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1218                 rejecttitle = self.params.get('rejecttitle', False)
1219                 if rejecttitle:
1220                     if re.search(rejecttitle, title, re.IGNORECASE):
1221                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1222             date = info_dict.get('upload_date')
1223             if date is not None:
1224                 dateRange = self.params.get('daterange', DateRange())
1225                 if date not in dateRange:
1226                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1227             view_count = info_dict.get('view_count')
1228             if view_count is not None:
1229                 min_views = self.params.get('min_views')
1230                 if min_views is not None and view_count < min_views:
1231                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1232                 max_views = self.params.get('max_views')
1233                 if max_views is not None and view_count > max_views:
1234                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1235             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1236                 return 'Skipping "%s" because it is age restricted' % video_title
1237
1238             match_filter = self.params.get('match_filter')
1239             if match_filter is not None:
1240                 try:
1241                     ret = match_filter(info_dict, incomplete=incomplete)
1242                 except TypeError:
1243                     # For backward compatibility
1244                     ret = None if incomplete else match_filter(info_dict)
1245                 if ret is not None:
1246                     return ret
1247             return None
1248
1249         if self.in_download_archive(info_dict):
1250             reason = '%s has already been recorded in the archive' % video_title
1251             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1252         else:
1253             reason = check_filter()
1254             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1255         if reason is not None:
1256             if not silent:
1257                 self.to_screen('[download] ' + reason)
1258             if self.params.get(break_opt, False):
1259                 raise break_err()
1260         return reason
1261
1262     @staticmethod
1263     def add_extra_info(info_dict, extra_info):
1264         '''Set the keys from extra_info in info dict if they are missing'''
1265         for key, value in extra_info.items():
1266             info_dict.setdefault(key, value)
1267
1268     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1269                      process=True, force_generic_extractor=False):
1270         """
1271         Return a list with a dictionary for each video extracted.
1272
1273         Arguments:
1274         url -- URL to extract
1275
1276         Keyword arguments:
1277         download -- whether to download videos during extraction
1278         ie_key -- extractor key hint
1279         extra_info -- dictionary containing the extra values to add to each result
1280         process -- whether to resolve all unresolved references (URLs, playlist items),
1281             must be True for download to work.
1282         force_generic_extractor -- force using the generic extractor
1283         """
1284
1285         if extra_info is None:
1286             extra_info = {}
1287
1288         if not ie_key and force_generic_extractor:
1289             ie_key = 'Generic'
1290
1291         if ie_key:
1292             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1293         else:
1294             ies = self._ies
1295
1296         for ie_key, ie in ies.items():
1297             if not ie.suitable(url):
1298                 continue
1299
1300             if not ie.working():
1301                 self.report_warning('The program functionality for this site has been marked as broken, '
1302                                     'and will probably not work.')
1303
1304             temp_id = ie.get_temp_id(url)
1305             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1306                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1307                                ie_key, temp_id))
1308                 break
1309             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1310         else:
1311             self.report_error('no suitable InfoExtractor for URL %s' % url)
1312
1313     def __handle_extraction_exceptions(func):
1314         @functools.wraps(func)
1315         def wrapper(self, *args, **kwargs):
1316             try:
1317                 return func(self, *args, **kwargs)
1318             except GeoRestrictedError as e:
1319                 msg = e.msg
1320                 if e.countries:
1321                     msg += '\nThis video is available in %s.' % ', '.join(
1322                         map(ISO3166Utils.short2full, e.countries))
1323                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1324                 self.report_error(msg)
1325             except ExtractorError as e:  # An error we somewhat expected
1326                 self.report_error(compat_str(e), e.format_traceback())
1327             except ThrottledDownload as e:
1328                 self.to_stderr('\r')
1329                 self.report_warning(f'{e}; Re-extracting data')
1330                 return wrapper(self, *args, **kwargs)
1331             except (DownloadCancelled, LazyList.IndexError):
1332                 raise
1333             except Exception as e:
1334                 if self.params.get('ignoreerrors'):
1335                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1336                 else:
1337                     raise
1338         return wrapper
1339
1340     @__handle_extraction_exceptions
1341     def __extract_info(self, url, ie, download, extra_info, process):
1342         ie_result = ie.extract(url)
1343         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1344             return
1345         if isinstance(ie_result, list):
1346             # Backwards compatibility: old IE result format
1347             ie_result = {
1348                 '_type': 'compat_list',
1349                 'entries': ie_result,
1350             }
1351         if extra_info.get('original_url'):
1352             ie_result.setdefault('original_url', extra_info['original_url'])
1353         self.add_default_extra_info(ie_result, ie, url)
1354         if process:
1355             return self.process_ie_result(ie_result, download, extra_info)
1356         else:
1357             return ie_result
1358
1359     def add_default_extra_info(self, ie_result, ie, url):
1360         if url is not None:
1361             self.add_extra_info(ie_result, {
1362                 'webpage_url': url,
1363                 'original_url': url,
1364                 'webpage_url_basename': url_basename(url),
1365             })
1366         if ie is not None:
1367             self.add_extra_info(ie_result, {
1368                 'extractor': ie.IE_NAME,
1369                 'extractor_key': ie.ie_key(),
1370             })
1371
1372     def process_ie_result(self, ie_result, download=True, extra_info=None):
1373         """
1374         Take the result of the ie(may be modified) and resolve all unresolved
1375         references (URLs, playlist items).
1376
1377         It will also download the videos if 'download'.
1378         Returns the resolved ie_result.
1379         """
1380         if extra_info is None:
1381             extra_info = {}
1382         result_type = ie_result.get('_type', 'video')
1383
1384         if result_type in ('url', 'url_transparent'):
1385             ie_result['url'] = sanitize_url(ie_result['url'])
1386             if ie_result.get('original_url'):
1387                 extra_info.setdefault('original_url', ie_result['original_url'])
1388
1389             extract_flat = self.params.get('extract_flat', False)
1390             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1391                     or extract_flat is True):
1392                 info_copy = ie_result.copy()
1393                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1394                 if ie and not ie_result.get('id'):
1395                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1396                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1397                 self.add_extra_info(info_copy, extra_info)
1398                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1399                 if self.params.get('force_write_download_archive', False):
1400                     self.record_download_archive(info_copy)
1401                 return ie_result
1402
1403         if result_type == 'video':
1404             self.add_extra_info(ie_result, extra_info)
1405             ie_result = self.process_video_result(ie_result, download=download)
1406             additional_urls = (ie_result or {}).get('additional_urls')
1407             if additional_urls:
1408                 # TODO: Improve MetadataParserPP to allow setting a list
1409                 if isinstance(additional_urls, compat_str):
1410                     additional_urls = [additional_urls]
1411                 self.to_screen(
1412                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1413                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1414                 ie_result['additional_entries'] = [
1415                     self.extract_info(
1416                         url, download, extra_info,
1417                         force_generic_extractor=self.params.get('force_generic_extractor'))
1418                     for url in additional_urls
1419                 ]
1420             return ie_result
1421         elif result_type == 'url':
1422             # We have to add extra_info to the results because it may be
1423             # contained in a playlist
1424             return self.extract_info(
1425                 ie_result['url'], download,
1426                 ie_key=ie_result.get('ie_key'),
1427                 extra_info=extra_info)
1428         elif result_type == 'url_transparent':
1429             # Use the information from the embedding page
1430             info = self.extract_info(
1431                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1432                 extra_info=extra_info, download=False, process=False)
1433
1434             # extract_info may return None when ignoreerrors is enabled and
1435             # extraction failed with an error, don't crash and return early
1436             # in this case
1437             if not info:
1438                 return info
1439
1440             force_properties = dict(
1441                 (k, v) for k, v in ie_result.items() if v is not None)
1442             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1443                 if f in force_properties:
1444                     del force_properties[f]
1445             new_result = info.copy()
1446             new_result.update(force_properties)
1447
1448             # Extracted info may not be a video result (i.e.
1449             # info.get('_type', 'video') != video) but rather an url or
1450             # url_transparent. In such cases outer metadata (from ie_result)
1451             # should be propagated to inner one (info). For this to happen
1452             # _type of info should be overridden with url_transparent. This
1453             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1454             if new_result.get('_type') == 'url':
1455                 new_result['_type'] = 'url_transparent'
1456
1457             return self.process_ie_result(
1458                 new_result, download=download, extra_info=extra_info)
1459         elif result_type in ('playlist', 'multi_video'):
1460             # Protect from infinite recursion due to recursively nested playlists
1461             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1462             webpage_url = ie_result['webpage_url']
1463             if webpage_url in self._playlist_urls:
1464                 self.to_screen(
1465                     '[download] Skipping already downloaded playlist: %s'
1466                     % ie_result.get('title') or ie_result.get('id'))
1467                 return
1468
1469             self._playlist_level += 1
1470             self._playlist_urls.add(webpage_url)
1471             self._sanitize_thumbnails(ie_result)
1472             try:
1473                 return self.__process_playlist(ie_result, download)
1474             finally:
1475                 self._playlist_level -= 1
1476                 if not self._playlist_level:
1477                     self._playlist_urls.clear()
1478         elif result_type == 'compat_list':
1479             self.report_warning(
1480                 'Extractor %s returned a compat_list result. '
1481                 'It needs to be updated.' % ie_result.get('extractor'))
1482
1483             def _fixup(r):
1484                 self.add_extra_info(r, {
1485                     'extractor': ie_result['extractor'],
1486                     'webpage_url': ie_result['webpage_url'],
1487                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1488                     'extractor_key': ie_result['extractor_key'],
1489                 })
1490                 return r
1491             ie_result['entries'] = [
1492                 self.process_ie_result(_fixup(r), download, extra_info)
1493                 for r in ie_result['entries']
1494             ]
1495             return ie_result
1496         else:
1497             raise Exception('Invalid result type: %s' % result_type)
1498
1499     def _ensure_dir_exists(self, path):
1500         return make_dir(path, self.report_error)
1501
1502     def __process_playlist(self, ie_result, download):
1503         # We process each entry in the playlist
1504         playlist = ie_result.get('title') or ie_result.get('id')
1505         self.to_screen('[download] Downloading playlist: %s' % playlist)
1506
1507         if 'entries' not in ie_result:
1508             raise EntryNotInPlaylist('There are no entries')
1509
1510         MissingEntry = object()
1511         incomplete_entries = bool(ie_result.get('requested_entries'))
1512         if incomplete_entries:
1513             def fill_missing_entries(entries, indices):
1514                 ret = [MissingEntry] * max(indices)
1515                 for i, entry in zip(indices, entries):
1516                     ret[i - 1] = entry
1517                 return ret
1518             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1519
1520         playlist_results = []
1521
1522         playliststart = self.params.get('playliststart', 1)
1523         playlistend = self.params.get('playlistend')
1524         # For backwards compatibility, interpret -1 as whole list
1525         if playlistend == -1:
1526             playlistend = None
1527
1528         playlistitems_str = self.params.get('playlist_items')
1529         playlistitems = None
1530         if playlistitems_str is not None:
1531             def iter_playlistitems(format):
1532                 for string_segment in format.split(','):
1533                     if '-' in string_segment:
1534                         start, end = string_segment.split('-')
1535                         for item in range(int(start), int(end) + 1):
1536                             yield int(item)
1537                     else:
1538                         yield int(string_segment)
1539             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1540
1541         ie_entries = ie_result['entries']
1542         msg = (
1543             'Downloading %d videos' if not isinstance(ie_entries, list)
1544             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1545
1546         if isinstance(ie_entries, list):
1547             def get_entry(i):
1548                 return ie_entries[i - 1]
1549         else:
1550             if not isinstance(ie_entries, (PagedList, LazyList)):
1551                 ie_entries = LazyList(ie_entries)
1552
1553             def get_entry(i):
1554                 return YoutubeDL.__handle_extraction_exceptions(
1555                     lambda self, i: ie_entries[i - 1]
1556                 )(self, i)
1557
1558         entries = []
1559         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1560         for i in items:
1561             if i == 0:
1562                 continue
1563             if playlistitems is None and playlistend is not None and playlistend < i:
1564                 break
1565             entry = None
1566             try:
1567                 entry = get_entry(i)
1568                 if entry is MissingEntry:
1569                     raise EntryNotInPlaylist()
1570             except (IndexError, EntryNotInPlaylist):
1571                 if incomplete_entries:
1572                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1573                 elif not playlistitems:
1574                     break
1575             entries.append(entry)
1576             try:
1577                 if entry is not None:
1578                     self._match_entry(entry, incomplete=True, silent=True)
1579             except (ExistingVideoReached, RejectedVideoReached):
1580                 break
1581         ie_result['entries'] = entries
1582
1583         # Save playlist_index before re-ordering
1584         entries = [
1585             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1586             for i, entry in enumerate(entries, 1)
1587             if entry is not None]
1588         n_entries = len(entries)
1589
1590         if not playlistitems and (playliststart != 1 or playlistend):
1591             playlistitems = list(range(playliststart, playliststart + n_entries))
1592         ie_result['requested_entries'] = playlistitems
1593
1594         _infojson_written = False
1595         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1596             ie_copy = {
1597                 'playlist': playlist,
1598                 'playlist_id': ie_result.get('id'),
1599                 'playlist_title': ie_result.get('title'),
1600                 'playlist_uploader': ie_result.get('uploader'),
1601                 'playlist_uploader_id': ie_result.get('uploader_id'),
1602                 'playlist_index': 0,
1603                 'n_entries': n_entries,
1604             }
1605             ie_copy.update(dict(ie_result))
1606
1607             _infojson_written = self._write_info_json(
1608                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1609             if _infojson_written is None:
1610                 return
1611             if self._write_description('playlist', ie_result,
1612                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1613                 return
1614             # TODO: This should be passed to ThumbnailsConvertor if necessary
1615             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1616
1617         if self.params.get('playlistreverse', False):
1618             entries = entries[::-1]
1619         if self.params.get('playlistrandom', False):
1620             random.shuffle(entries)
1621
1622         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1623
1624         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1625         failures = 0
1626         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1627         for i, entry_tuple in enumerate(entries, 1):
1628             playlist_index, entry = entry_tuple
1629             if 'playlist-index' in self.params.get('compat_opts', []):
1630                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1631             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1632             # This __x_forwarded_for_ip thing is a bit ugly but requires
1633             # minimal changes
1634             if x_forwarded_for:
1635                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1636             extra = {
1637                 'n_entries': n_entries,
1638                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1639                 'playlist_index': playlist_index,
1640                 'playlist_autonumber': i,
1641                 'playlist': playlist,
1642                 'playlist_id': ie_result.get('id'),
1643                 'playlist_title': ie_result.get('title'),
1644                 'playlist_uploader': ie_result.get('uploader'),
1645                 'playlist_uploader_id': ie_result.get('uploader_id'),
1646                 'extractor': ie_result['extractor'],
1647                 'webpage_url': ie_result['webpage_url'],
1648                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1649                 'extractor_key': ie_result['extractor_key'],
1650             }
1651
1652             if self._match_entry(entry, incomplete=True) is not None:
1653                 continue
1654
1655             entry_result = self.__process_iterable_entry(entry, download, extra)
1656             if not entry_result:
1657                 failures += 1
1658             if failures >= max_failures:
1659                 self.report_error(
1660                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1661                 break
1662             playlist_results.append(entry_result)
1663         ie_result['entries'] = playlist_results
1664
1665         # Write the updated info to json
1666         if _infojson_written and self._write_info_json(
1667                 'updated playlist', ie_result,
1668                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1669             return
1670         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1671         return ie_result
1672
1673     @__handle_extraction_exceptions
1674     def __process_iterable_entry(self, entry, download, extra_info):
1675         return self.process_ie_result(
1676             entry, download=download, extra_info=extra_info)
1677
1678     def _build_format_filter(self, filter_spec):
1679         " Returns a function to filter the formats according to the filter_spec "
1680
1681         OPERATORS = {
1682             '<': operator.lt,
1683             '<=': operator.le,
1684             '>': operator.gt,
1685             '>=': operator.ge,
1686             '=': operator.eq,
1687             '!=': operator.ne,
1688         }
1689         operator_rex = re.compile(r'''(?x)\s*
1690             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1691             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1692             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1693             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1694         m = operator_rex.fullmatch(filter_spec)
1695         if m:
1696             try:
1697                 comparison_value = int(m.group('value'))
1698             except ValueError:
1699                 comparison_value = parse_filesize(m.group('value'))
1700                 if comparison_value is None:
1701                     comparison_value = parse_filesize(m.group('value') + 'B')
1702                 if comparison_value is None:
1703                     raise ValueError(
1704                         'Invalid value %r in format specification %r' % (
1705                             m.group('value'), filter_spec))
1706             op = OPERATORS[m.group('op')]
1707
1708         if not m:
1709             STR_OPERATORS = {
1710                 '=': operator.eq,
1711                 '^=': lambda attr, value: attr.startswith(value),
1712                 '$=': lambda attr, value: attr.endswith(value),
1713                 '*=': lambda attr, value: value in attr,
1714             }
1715             str_operator_rex = re.compile(r'''(?x)\s*
1716                 (?P<key>[a-zA-Z0-9._-]+)\s*
1717                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1718                 (?P<value>[a-zA-Z0-9._-]+)\s*
1719                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1720             m = str_operator_rex.fullmatch(filter_spec)
1721             if m:
1722                 comparison_value = m.group('value')
1723                 str_op = STR_OPERATORS[m.group('op')]
1724                 if m.group('negation'):
1725                     op = lambda attr, value: not str_op(attr, value)
1726                 else:
1727                     op = str_op
1728
1729         if not m:
1730             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1731
1732         def _filter(f):
1733             actual_value = f.get(m.group('key'))
1734             if actual_value is None:
1735                 return m.group('none_inclusive')
1736             return op(actual_value, comparison_value)
1737         return _filter
1738
1739     def _check_formats(self, formats):
1740         for f in formats:
1741             self.to_screen('[info] Testing format %s' % f['format_id'])
1742             temp_file = tempfile.NamedTemporaryFile(
1743                 suffix='.tmp', delete=False,
1744                 dir=self.get_output_path('temp') or None)
1745             temp_file.close()
1746             try:
1747                 success, _ = self.dl(temp_file.name, f, test=True)
1748             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1749                 success = False
1750             finally:
1751                 if os.path.exists(temp_file.name):
1752                     try:
1753                         os.remove(temp_file.name)
1754                     except OSError:
1755                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1756             if success:
1757                 yield f
1758             else:
1759                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1760
1761     def _default_format_spec(self, info_dict, download=True):
1762
1763         def can_merge():
1764             merger = FFmpegMergerPP(self)
1765             return merger.available and merger.can_merge()
1766
1767         prefer_best = (
1768             not self.params.get('simulate')
1769             and download
1770             and (
1771                 not can_merge()
1772                 or info_dict.get('is_live', False)
1773                 or self.outtmpl_dict['default'] == '-'))
1774         compat = (
1775             prefer_best
1776             or self.params.get('allow_multiple_audio_streams', False)
1777             or 'format-spec' in self.params.get('compat_opts', []))
1778
1779         return (
1780             'best/bestvideo+bestaudio' if prefer_best
1781             else 'bestvideo*+bestaudio/best' if not compat
1782             else 'bestvideo+bestaudio/best')
1783
1784     def build_format_selector(self, format_spec):
1785         def syntax_error(note, start):
1786             message = (
1787                 'Invalid format specification: '
1788                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1789             return SyntaxError(message)
1790
1791         PICKFIRST = 'PICKFIRST'
1792         MERGE = 'MERGE'
1793         SINGLE = 'SINGLE'
1794         GROUP = 'GROUP'
1795         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1796
1797         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1798                                   'video': self.params.get('allow_multiple_video_streams', False)}
1799
1800         check_formats = self.params.get('check_formats') == 'selected'
1801
1802         def _parse_filter(tokens):
1803             filter_parts = []
1804             for type, string, start, _, _ in tokens:
1805                 if type == tokenize.OP and string == ']':
1806                     return ''.join(filter_parts)
1807                 else:
1808                     filter_parts.append(string)
1809
1810         def _remove_unused_ops(tokens):
1811             # Remove operators that we don't use and join them with the surrounding strings
1812             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1813             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1814             last_string, last_start, last_end, last_line = None, None, None, None
1815             for type, string, start, end, line in tokens:
1816                 if type == tokenize.OP and string == '[':
1817                     if last_string:
1818                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1819                         last_string = None
1820                     yield type, string, start, end, line
1821                     # everything inside brackets will be handled by _parse_filter
1822                     for type, string, start, end, line in tokens:
1823                         yield type, string, start, end, line
1824                         if type == tokenize.OP and string == ']':
1825                             break
1826                 elif type == tokenize.OP and string in ALLOWED_OPS:
1827                     if last_string:
1828                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1829                         last_string = None
1830                     yield type, string, start, end, line
1831                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1832                     if not last_string:
1833                         last_string = string
1834                         last_start = start
1835                         last_end = end
1836                     else:
1837                         last_string += string
1838             if last_string:
1839                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1840
1841         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1842             selectors = []
1843             current_selector = None
1844             for type, string, start, _, _ in tokens:
1845                 # ENCODING is only defined in python 3.x
1846                 if type == getattr(tokenize, 'ENCODING', None):
1847                     continue
1848                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1849                     current_selector = FormatSelector(SINGLE, string, [])
1850                 elif type == tokenize.OP:
1851                     if string == ')':
1852                         if not inside_group:
1853                             # ')' will be handled by the parentheses group
1854                             tokens.restore_last_token()
1855                         break
1856                     elif inside_merge and string in ['/', ',']:
1857                         tokens.restore_last_token()
1858                         break
1859                     elif inside_choice and string == ',':
1860                         tokens.restore_last_token()
1861                         break
1862                     elif string == ',':
1863                         if not current_selector:
1864                             raise syntax_error('"," must follow a format selector', start)
1865                         selectors.append(current_selector)
1866                         current_selector = None
1867                     elif string == '/':
1868                         if not current_selector:
1869                             raise syntax_error('"/" must follow a format selector', start)
1870                         first_choice = current_selector
1871                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1872                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1873                     elif string == '[':
1874                         if not current_selector:
1875                             current_selector = FormatSelector(SINGLE, 'best', [])
1876                         format_filter = _parse_filter(tokens)
1877                         current_selector.filters.append(format_filter)
1878                     elif string == '(':
1879                         if current_selector:
1880                             raise syntax_error('Unexpected "("', start)
1881                         group = _parse_format_selection(tokens, inside_group=True)
1882                         current_selector = FormatSelector(GROUP, group, [])
1883                     elif string == '+':
1884                         if not current_selector:
1885                             raise syntax_error('Unexpected "+"', start)
1886                         selector_1 = current_selector
1887                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1888                         if not selector_2:
1889                             raise syntax_error('Expected a selector', start)
1890                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1891                     else:
1892                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1893                 elif type == tokenize.ENDMARKER:
1894                     break
1895             if current_selector:
1896                 selectors.append(current_selector)
1897             return selectors
1898
1899         def _merge(formats_pair):
1900             format_1, format_2 = formats_pair
1901
1902             formats_info = []
1903             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1904             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1905
1906             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1907                 get_no_more = {'video': False, 'audio': False}
1908                 for (i, fmt_info) in enumerate(formats_info):
1909                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1910                         formats_info.pop(i)
1911                         continue
1912                     for aud_vid in ['audio', 'video']:
1913                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1914                             if get_no_more[aud_vid]:
1915                                 formats_info.pop(i)
1916                                 break
1917                             get_no_more[aud_vid] = True
1918
1919             if len(formats_info) == 1:
1920                 return formats_info[0]
1921
1922             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1923             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1924
1925             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1926             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1927
1928             output_ext = self.params.get('merge_output_format')
1929             if not output_ext:
1930                 if the_only_video:
1931                     output_ext = the_only_video['ext']
1932                 elif the_only_audio and not video_fmts:
1933                     output_ext = the_only_audio['ext']
1934                 else:
1935                     output_ext = 'mkv'
1936
1937             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1938
1939             new_dict = {
1940                 'requested_formats': formats_info,
1941                 'format': '+'.join(filtered('format')),
1942                 'format_id': '+'.join(filtered('format_id')),
1943                 'ext': output_ext,
1944                 'protocol': '+'.join(map(determine_protocol, formats_info)),
1945                 'language': '+'.join(orderedSet(filtered('language'))) or None,
1946                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
1947                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
1948                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1949             }
1950
1951             if the_only_video:
1952                 new_dict.update({
1953                     'width': the_only_video.get('width'),
1954                     'height': the_only_video.get('height'),
1955                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1956                     'fps': the_only_video.get('fps'),
1957                     'dynamic_range': the_only_video.get('dynamic_range'),
1958                     'vcodec': the_only_video.get('vcodec'),
1959                     'vbr': the_only_video.get('vbr'),
1960                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1961                 })
1962
1963             if the_only_audio:
1964                 new_dict.update({
1965                     'acodec': the_only_audio.get('acodec'),
1966                     'abr': the_only_audio.get('abr'),
1967                     'asr': the_only_audio.get('asr'),
1968                 })
1969
1970             return new_dict
1971
1972         def _check_formats(formats):
1973             if not check_formats:
1974                 yield from formats
1975                 return
1976             yield from self._check_formats(formats)
1977
1978         def _build_selector_function(selector):
1979             if isinstance(selector, list):  # ,
1980                 fs = [_build_selector_function(s) for s in selector]
1981
1982                 def selector_function(ctx):
1983                     for f in fs:
1984                         yield from f(ctx)
1985                 return selector_function
1986
1987             elif selector.type == GROUP:  # ()
1988                 selector_function = _build_selector_function(selector.selector)
1989
1990             elif selector.type == PICKFIRST:  # /
1991                 fs = [_build_selector_function(s) for s in selector.selector]
1992
1993                 def selector_function(ctx):
1994                     for f in fs:
1995                         picked_formats = list(f(ctx))
1996                         if picked_formats:
1997                             return picked_formats
1998                     return []
1999
2000             elif selector.type == MERGE:  # +
2001                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2002
2003                 def selector_function(ctx):
2004                     for pair in itertools.product(
2005                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
2006                         yield _merge(pair)
2007
2008             elif selector.type == SINGLE:  # atom
2009                 format_spec = selector.selector or 'best'
2010
2011                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2012                 if format_spec == 'all':
2013                     def selector_function(ctx):
2014                         yield from _check_formats(ctx['formats'][::-1])
2015                 elif format_spec == 'mergeall':
2016                     def selector_function(ctx):
2017                         formats = list(_check_formats(ctx['formats']))
2018                         if not formats:
2019                             return
2020                         merged_format = formats[-1]
2021                         for f in formats[-2::-1]:
2022                             merged_format = _merge((merged_format, f))
2023                         yield merged_format
2024
2025                 else:
2026                     format_fallback, format_reverse, format_idx = False, True, 1
2027                     mobj = re.match(
2028                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2029                         format_spec)
2030                     if mobj is not None:
2031                         format_idx = int_or_none(mobj.group('n'), default=1)
2032                         format_reverse = mobj.group('bw')[0] == 'b'
2033                         format_type = (mobj.group('type') or [None])[0]
2034                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2035                         format_modified = mobj.group('mod') is not None
2036
2037                         format_fallback = not format_type and not format_modified  # for b, w
2038                         _filter_f = (
2039                             (lambda f: f.get('%scodec' % format_type) != 'none')
2040                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2041                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2042                             if format_type  # bv, ba, wv, wa
2043                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2044                             if not format_modified  # b, w
2045                             else lambda f: True)  # b*, w*
2046                         filter_f = lambda f: _filter_f(f) and (
2047                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2048                     else:
2049                         if format_spec in self._format_selection_exts['audio']:
2050                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2051                         elif format_spec in self._format_selection_exts['video']:
2052                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2053                         elif format_spec in self._format_selection_exts['storyboards']:
2054                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2055                         else:
2056                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2057
2058                     def selector_function(ctx):
2059                         formats = list(ctx['formats'])
2060                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2061                         if format_fallback and ctx['incomplete_formats'] and not matches:
2062                             # for extractors with incomplete formats (audio only (soundcloud)
2063                             # or video only (imgur)) best/worst will fallback to
2064                             # best/worst {video,audio}-only format
2065                             matches = formats
2066                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2067                         try:
2068                             yield matches[format_idx - 1]
2069                         except IndexError:
2070                             return
2071
2072             filters = [self._build_format_filter(f) for f in selector.filters]
2073
2074             def final_selector(ctx):
2075                 ctx_copy = copy.deepcopy(ctx)
2076                 for _filter in filters:
2077                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2078                 return selector_function(ctx_copy)
2079             return final_selector
2080
2081         stream = io.BytesIO(format_spec.encode('utf-8'))
2082         try:
2083             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2084         except tokenize.TokenError:
2085             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2086
2087         class TokenIterator(object):
2088             def __init__(self, tokens):
2089                 self.tokens = tokens
2090                 self.counter = 0
2091
2092             def __iter__(self):
2093                 return self
2094
2095             def __next__(self):
2096                 if self.counter >= len(self.tokens):
2097                     raise StopIteration()
2098                 value = self.tokens[self.counter]
2099                 self.counter += 1
2100                 return value
2101
2102             next = __next__
2103
2104             def restore_last_token(self):
2105                 self.counter -= 1
2106
2107         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2108         return _build_selector_function(parsed_selector)
2109
2110     def _calc_headers(self, info_dict):
2111         res = std_headers.copy()
2112
2113         add_headers = info_dict.get('http_headers')
2114         if add_headers:
2115             res.update(add_headers)
2116
2117         cookies = self._calc_cookies(info_dict)
2118         if cookies:
2119             res['Cookie'] = cookies
2120
2121         if 'X-Forwarded-For' not in res:
2122             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2123             if x_forwarded_for_ip:
2124                 res['X-Forwarded-For'] = x_forwarded_for_ip
2125
2126         return res
2127
2128     def _calc_cookies(self, info_dict):
2129         pr = sanitized_Request(info_dict['url'])
2130         self.cookiejar.add_cookie_header(pr)
2131         return pr.get_header('Cookie')
2132
2133     def _sort_thumbnails(self, thumbnails):
2134         thumbnails.sort(key=lambda t: (
2135             t.get('preference') if t.get('preference') is not None else -1,
2136             t.get('width') if t.get('width') is not None else -1,
2137             t.get('height') if t.get('height') is not None else -1,
2138             t.get('id') if t.get('id') is not None else '',
2139             t.get('url')))
2140
2141     def _sanitize_thumbnails(self, info_dict):
2142         thumbnails = info_dict.get('thumbnails')
2143         if thumbnails is None:
2144             thumbnail = info_dict.get('thumbnail')
2145             if thumbnail:
2146                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2147         if not thumbnails:
2148             return
2149
2150         def check_thumbnails(thumbnails):
2151             for t in thumbnails:
2152                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2153                 try:
2154                     self.urlopen(HEADRequest(t['url']))
2155                 except network_exceptions as err:
2156                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2157                     continue
2158                 yield t
2159
2160         self._sort_thumbnails(thumbnails)
2161         for i, t in enumerate(thumbnails):
2162             if t.get('id') is None:
2163                 t['id'] = '%d' % i
2164             if t.get('width') and t.get('height'):
2165                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2166             t['url'] = sanitize_url(t['url'])
2167
2168         if self.params.get('check_formats') is True:
2169             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse()
2170         else:
2171             info_dict['thumbnails'] = thumbnails
2172
2173     def process_video_result(self, info_dict, download=True):
2174         assert info_dict.get('_type', 'video') == 'video'
2175
2176         if 'id' not in info_dict:
2177             raise ExtractorError('Missing "id" field in extractor result')
2178         if 'title' not in info_dict:
2179             raise ExtractorError('Missing "title" field in extractor result',
2180                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2181
2182         def report_force_conversion(field, field_not, conversion):
2183             self.report_warning(
2184                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2185                 % (field, field_not, conversion))
2186
2187         def sanitize_string_field(info, string_field):
2188             field = info.get(string_field)
2189             if field is None or isinstance(field, compat_str):
2190                 return
2191             report_force_conversion(string_field, 'a string', 'string')
2192             info[string_field] = compat_str(field)
2193
2194         def sanitize_numeric_fields(info):
2195             for numeric_field in self._NUMERIC_FIELDS:
2196                 field = info.get(numeric_field)
2197                 if field is None or isinstance(field, compat_numeric_types):
2198                     continue
2199                 report_force_conversion(numeric_field, 'numeric', 'int')
2200                 info[numeric_field] = int_or_none(field)
2201
2202         sanitize_string_field(info_dict, 'id')
2203         sanitize_numeric_fields(info_dict)
2204
2205         if 'playlist' not in info_dict:
2206             # It isn't part of a playlist
2207             info_dict['playlist'] = None
2208             info_dict['playlist_index'] = None
2209
2210         self._sanitize_thumbnails(info_dict)
2211
2212         thumbnail = info_dict.get('thumbnail')
2213         thumbnails = info_dict.get('thumbnails')
2214         if thumbnail:
2215             info_dict['thumbnail'] = sanitize_url(thumbnail)
2216         elif thumbnails:
2217             info_dict['thumbnail'] = thumbnails[-1]['url']
2218
2219         if info_dict.get('display_id') is None and 'id' in info_dict:
2220             info_dict['display_id'] = info_dict['id']
2221
2222         if info_dict.get('duration') is not None:
2223             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2224
2225         for ts_key, date_key in (
2226                 ('timestamp', 'upload_date'),
2227                 ('release_timestamp', 'release_date'),
2228         ):
2229             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2230                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2231                 # see http://bugs.python.org/issue1646728)
2232                 try:
2233                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2234                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2235                 except (ValueError, OverflowError, OSError):
2236                     pass
2237
2238         live_keys = ('is_live', 'was_live')
2239         live_status = info_dict.get('live_status')
2240         if live_status is None:
2241             for key in live_keys:
2242                 if info_dict.get(key) is False:
2243                     continue
2244                 if info_dict.get(key):
2245                     live_status = key
2246                 break
2247             if all(info_dict.get(key) is False for key in live_keys):
2248                 live_status = 'not_live'
2249         if live_status:
2250             info_dict['live_status'] = live_status
2251             for key in live_keys:
2252                 if info_dict.get(key) is None:
2253                     info_dict[key] = (live_status == key)
2254
2255         # Auto generate title fields corresponding to the *_number fields when missing
2256         # in order to always have clean titles. This is very common for TV series.
2257         for field in ('chapter', 'season', 'episode'):
2258             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2259                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2260
2261         for cc_kind in ('subtitles', 'automatic_captions'):
2262             cc = info_dict.get(cc_kind)
2263             if cc:
2264                 for _, subtitle in cc.items():
2265                     for subtitle_format in subtitle:
2266                         if subtitle_format.get('url'):
2267                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2268                         if subtitle_format.get('ext') is None:
2269                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2270
2271         automatic_captions = info_dict.get('automatic_captions')
2272         subtitles = info_dict.get('subtitles')
2273
2274         info_dict['requested_subtitles'] = self.process_subtitles(
2275             info_dict['id'], subtitles, automatic_captions)
2276
2277         if info_dict.get('formats') is None:
2278             # There's only one format available
2279             formats = [info_dict]
2280         else:
2281             formats = info_dict['formats']
2282
2283         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2284         if not self.params.get('allow_unplayable_formats'):
2285             formats = [f for f in formats if not f.get('has_drm')]
2286
2287         if not formats:
2288             self.raise_no_formats(info_dict)
2289
2290         def is_wellformed(f):
2291             url = f.get('url')
2292             if not url:
2293                 self.report_warning(
2294                     '"url" field is missing or empty - skipping format, '
2295                     'there is an error in extractor')
2296                 return False
2297             if isinstance(url, bytes):
2298                 sanitize_string_field(f, 'url')
2299             return True
2300
2301         # Filter out malformed formats for better extraction robustness
2302         formats = list(filter(is_wellformed, formats))
2303
2304         formats_dict = {}
2305
2306         # We check that all the formats have the format and format_id fields
2307         for i, format in enumerate(formats):
2308             sanitize_string_field(format, 'format_id')
2309             sanitize_numeric_fields(format)
2310             format['url'] = sanitize_url(format['url'])
2311             if not format.get('format_id'):
2312                 format['format_id'] = compat_str(i)
2313             else:
2314                 # Sanitize format_id from characters used in format selector expression
2315                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2316             format_id = format['format_id']
2317             if format_id not in formats_dict:
2318                 formats_dict[format_id] = []
2319             formats_dict[format_id].append(format)
2320
2321         # Make sure all formats have unique format_id
2322         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2323         for format_id, ambiguous_formats in formats_dict.items():
2324             ambigious_id = len(ambiguous_formats) > 1
2325             for i, format in enumerate(ambiguous_formats):
2326                 if ambigious_id:
2327                     format['format_id'] = '%s-%d' % (format_id, i)
2328                 if format.get('ext') is None:
2329                     format['ext'] = determine_ext(format['url']).lower()
2330                 # Ensure there is no conflict between id and ext in format selection
2331                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2332                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2333                     format['format_id'] = 'f%s' % format['format_id']
2334
2335         for i, format in enumerate(formats):
2336             if format.get('format') is None:
2337                 format['format'] = '{id} - {res}{note}'.format(
2338                     id=format['format_id'],
2339                     res=self.format_resolution(format),
2340                     note=format_field(format, 'format_note', ' (%s)'),
2341                 )
2342             if format.get('protocol') is None:
2343                 format['protocol'] = determine_protocol(format)
2344             if format.get('resolution') is None:
2345                 format['resolution'] = self.format_resolution(format, default=None)
2346             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2347                 format['dynamic_range'] = 'SDR'
2348             if (info_dict.get('duration') and format.get('tbr')
2349                     and not format.get('filesize') and not format.get('filesize_approx')):
2350                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2351
2352             # Add HTTP headers, so that external programs can use them from the
2353             # json output
2354             full_format_info = info_dict.copy()
2355             full_format_info.update(format)
2356             format['http_headers'] = self._calc_headers(full_format_info)
2357         # Remove private housekeeping stuff
2358         if '__x_forwarded_for_ip' in info_dict:
2359             del info_dict['__x_forwarded_for_ip']
2360
2361         # TODO Central sorting goes here
2362
2363         if self.params.get('check_formats') is True:
2364             formats = LazyList(self._check_formats(formats[::-1])).reverse()
2365
2366         if not formats or formats[0] is not info_dict:
2367             # only set the 'formats' fields if the original info_dict list them
2368             # otherwise we end up with a circular reference, the first (and unique)
2369             # element in the 'formats' field in info_dict is info_dict itself,
2370             # which can't be exported to json
2371             info_dict['formats'] = formats
2372
2373         info_dict, _ = self.pre_process(info_dict)
2374
2375         # The pre-processors may have modified the formats
2376         formats = info_dict.get('formats', [info_dict])
2377
2378         if self.params.get('list_thumbnails'):
2379             self.list_thumbnails(info_dict)
2380         if self.params.get('listformats'):
2381             if not info_dict.get('formats') and not info_dict.get('url'):
2382                 self.to_screen('%s has no formats' % info_dict['id'])
2383             else:
2384                 self.list_formats(info_dict)
2385         if self.params.get('listsubtitles'):
2386             if 'automatic_captions' in info_dict:
2387                 self.list_subtitles(
2388                     info_dict['id'], automatic_captions, 'automatic captions')
2389             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2390         list_only = self.params.get('simulate') is None and (
2391             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2392         if list_only:
2393             # Without this printing, -F --print-json will not work
2394             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2395             return
2396
2397         format_selector = self.format_selector
2398         if format_selector is None:
2399             req_format = self._default_format_spec(info_dict, download=download)
2400             self.write_debug('Default format spec: %s' % req_format)
2401             format_selector = self.build_format_selector(req_format)
2402
2403         # While in format selection we may need to have an access to the original
2404         # format set in order to calculate some metrics or do some processing.
2405         # For now we need to be able to guess whether original formats provided
2406         # by extractor are incomplete or not (i.e. whether extractor provides only
2407         # video-only or audio-only formats) for proper formats selection for
2408         # extractors with such incomplete formats (see
2409         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2410         # Since formats may be filtered during format selection and may not match
2411         # the original formats the results may be incorrect. Thus original formats
2412         # or pre-calculated metrics should be passed to format selection routines
2413         # as well.
2414         # We will pass a context object containing all necessary additional data
2415         # instead of just formats.
2416         # This fixes incorrect format selection issue (see
2417         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2418         incomplete_formats = (
2419             # All formats are video-only or
2420             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2421             # all formats are audio-only
2422             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2423
2424         ctx = {
2425             'formats': formats,
2426             'incomplete_formats': incomplete_formats,
2427         }
2428
2429         formats_to_download = list(format_selector(ctx))
2430         if not formats_to_download:
2431             if not self.params.get('ignore_no_formats_error'):
2432                 raise ExtractorError('Requested format is not available', expected=True,
2433                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2434             else:
2435                 self.report_warning('Requested format is not available')
2436                 # Process what we can, even without any available formats.
2437                 self.process_info(dict(info_dict))
2438         elif download:
2439             self.to_screen(
2440                 '[info] %s: Downloading %d format(s): %s' % (
2441                     info_dict['id'], len(formats_to_download),
2442                     ", ".join([f['format_id'] for f in formats_to_download])))
2443             for fmt in formats_to_download:
2444                 new_info = dict(info_dict)
2445                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2446                 new_info['__original_infodict'] = info_dict
2447                 new_info.update(fmt)
2448                 self.process_info(new_info)
2449         # We update the info dict with the selected best quality format (backwards compatibility)
2450         if formats_to_download:
2451             info_dict.update(formats_to_download[-1])
2452         return info_dict
2453
2454     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2455         """Select the requested subtitles and their format"""
2456         available_subs = {}
2457         if normal_subtitles and self.params.get('writesubtitles'):
2458             available_subs.update(normal_subtitles)
2459         if automatic_captions and self.params.get('writeautomaticsub'):
2460             for lang, cap_info in automatic_captions.items():
2461                 if lang not in available_subs:
2462                     available_subs[lang] = cap_info
2463
2464         if (not self.params.get('writesubtitles') and not
2465                 self.params.get('writeautomaticsub') or not
2466                 available_subs):
2467             return None
2468
2469         all_sub_langs = available_subs.keys()
2470         if self.params.get('allsubtitles', False):
2471             requested_langs = all_sub_langs
2472         elif self.params.get('subtitleslangs', False):
2473             # A list is used so that the order of languages will be the same as
2474             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2475             requested_langs = []
2476             for lang_re in self.params.get('subtitleslangs'):
2477                 if lang_re == 'all':
2478                     requested_langs.extend(all_sub_langs)
2479                     continue
2480                 discard = lang_re[0] == '-'
2481                 if discard:
2482                     lang_re = lang_re[1:]
2483                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2484                 if discard:
2485                     for lang in current_langs:
2486                         while lang in requested_langs:
2487                             requested_langs.remove(lang)
2488                 else:
2489                     requested_langs.extend(current_langs)
2490             requested_langs = orderedSet(requested_langs)
2491         elif 'en' in available_subs:
2492             requested_langs = ['en']
2493         else:
2494             requested_langs = [list(all_sub_langs)[0]]
2495         if requested_langs:
2496             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2497
2498         formats_query = self.params.get('subtitlesformat', 'best')
2499         formats_preference = formats_query.split('/') if formats_query else []
2500         subs = {}
2501         for lang in requested_langs:
2502             formats = available_subs.get(lang)
2503             if formats is None:
2504                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2505                 continue
2506             for ext in formats_preference:
2507                 if ext == 'best':
2508                     f = formats[-1]
2509                     break
2510                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2511                 if matches:
2512                     f = matches[-1]
2513                     break
2514             else:
2515                 f = formats[-1]
2516                 self.report_warning(
2517                     'No subtitle format found matching "%s" for language %s, '
2518                     'using %s' % (formats_query, lang, f['ext']))
2519             subs[lang] = f
2520         return subs
2521
2522     def __forced_printings(self, info_dict, filename, incomplete):
2523         def print_mandatory(field, actual_field=None):
2524             if actual_field is None:
2525                 actual_field = field
2526             if (self.params.get('force%s' % field, False)
2527                     and (not incomplete or info_dict.get(actual_field) is not None)):
2528                 self.to_stdout(info_dict[actual_field])
2529
2530         def print_optional(field):
2531             if (self.params.get('force%s' % field, False)
2532                     and info_dict.get(field) is not None):
2533                 self.to_stdout(info_dict[field])
2534
2535         info_dict = info_dict.copy()
2536         if filename is not None:
2537             info_dict['filename'] = filename
2538         if info_dict.get('requested_formats') is not None:
2539             # For RTMP URLs, also include the playpath
2540             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2541         elif 'url' in info_dict:
2542             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2543
2544         if self.params.get('forceprint') or self.params.get('forcejson'):
2545             self.post_extract(info_dict)
2546         for tmpl in self.params.get('forceprint', []):
2547             mobj = re.match(r'\w+(=?)$', tmpl)
2548             if mobj and mobj.group(1):
2549                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2550             elif mobj:
2551                 tmpl = '%({})s'.format(tmpl)
2552             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2553
2554         print_mandatory('title')
2555         print_mandatory('id')
2556         print_mandatory('url', 'urls')
2557         print_optional('thumbnail')
2558         print_optional('description')
2559         print_optional('filename')
2560         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2561             self.to_stdout(formatSeconds(info_dict['duration']))
2562         print_mandatory('format')
2563
2564         if self.params.get('forcejson'):
2565             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2566
2567     def dl(self, name, info, subtitle=False, test=False):
2568         if not info.get('url'):
2569             self.raise_no_formats(info, True)
2570
2571         if test:
2572             verbose = self.params.get('verbose')
2573             params = {
2574                 'test': True,
2575                 'quiet': self.params.get('quiet') or not verbose,
2576                 'verbose': verbose,
2577                 'noprogress': not verbose,
2578                 'nopart': True,
2579                 'skip_unavailable_fragments': False,
2580                 'keep_fragments': False,
2581                 'overwrites': True,
2582                 '_no_ytdl_file': True,
2583             }
2584         else:
2585             params = self.params
2586         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2587         if not test:
2588             for ph in self._progress_hooks:
2589                 fd.add_progress_hook(ph)
2590             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2591             self.write_debug('Invoking downloader on "%s"' % urls)
2592
2593         new_info = copy.deepcopy(self._copy_infodict(info))
2594         if new_info.get('http_headers') is None:
2595             new_info['http_headers'] = self._calc_headers(new_info)
2596         return fd.download(name, new_info, subtitle)
2597
2598     def process_info(self, info_dict):
2599         """Process a single resolved IE result."""
2600
2601         assert info_dict.get('_type', 'video') == 'video'
2602
2603         max_downloads = self.params.get('max_downloads')
2604         if max_downloads is not None:
2605             if self._num_downloads >= int(max_downloads):
2606                 raise MaxDownloadsReached()
2607
2608         # TODO: backward compatibility, to be removed
2609         info_dict['fulltitle'] = info_dict['title']
2610
2611         if 'format' not in info_dict and 'ext' in info_dict:
2612             info_dict['format'] = info_dict['ext']
2613
2614         if self._match_entry(info_dict) is not None:
2615             return
2616
2617         self.post_extract(info_dict)
2618         self._num_downloads += 1
2619
2620         # info_dict['_filename'] needs to be set for backward compatibility
2621         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2622         temp_filename = self.prepare_filename(info_dict, 'temp')
2623         files_to_move = {}
2624
2625         # Forced printings
2626         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2627
2628         if self.params.get('simulate'):
2629             if self.params.get('force_write_download_archive', False):
2630                 self.record_download_archive(info_dict)
2631             # Do nothing else if in simulate mode
2632             return
2633
2634         if full_filename is None:
2635             return
2636         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2637             return
2638         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2639             return
2640
2641         if self._write_description('video', info_dict,
2642                                    self.prepare_filename(info_dict, 'description')) is None:
2643             return
2644
2645         sub_files = self._write_subtitles(info_dict, temp_filename)
2646         if sub_files is None:
2647             return
2648         files_to_move.update(dict(sub_files))
2649
2650         thumb_files = self._write_thumbnails(
2651             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2652         if thumb_files is None:
2653             return
2654         files_to_move.update(dict(thumb_files))
2655
2656         infofn = self.prepare_filename(info_dict, 'infojson')
2657         _infojson_written = self._write_info_json('video', info_dict, infofn)
2658         if _infojson_written:
2659             info_dict['infojson_filename'] = infofn
2660             # For backward compatability, even though it was a private field
2661             info_dict['__infojson_filename'] = infofn
2662         elif _infojson_written is None:
2663             return
2664
2665         # Note: Annotations are deprecated
2666         annofn = None
2667         if self.params.get('writeannotations', False):
2668             annofn = self.prepare_filename(info_dict, 'annotation')
2669         if annofn:
2670             if not self._ensure_dir_exists(encodeFilename(annofn)):
2671                 return
2672             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2673                 self.to_screen('[info] Video annotations are already present')
2674             elif not info_dict.get('annotations'):
2675                 self.report_warning('There are no annotations to write.')
2676             else:
2677                 try:
2678                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2679                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2680                         annofile.write(info_dict['annotations'])
2681                 except (KeyError, TypeError):
2682                     self.report_warning('There are no annotations to write.')
2683                 except (OSError, IOError):
2684                     self.report_error('Cannot write annotations file: ' + annofn)
2685                     return
2686
2687         # Write internet shortcut files
2688         def _write_link_file(link_type):
2689             if 'webpage_url' not in info_dict:
2690                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2691                 return False
2692             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2693             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2694                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2695                 return True
2696             try:
2697                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2698                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2699                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2700                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2701                     if link_type == 'desktop':
2702                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2703                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2704             except (OSError, IOError):
2705                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2706                 return False
2707             return True
2708
2709         write_links = {
2710             'url': self.params.get('writeurllink'),
2711             'webloc': self.params.get('writewebloclink'),
2712             'desktop': self.params.get('writedesktoplink'),
2713         }
2714         if self.params.get('writelink'):
2715             link_type = ('webloc' if sys.platform == 'darwin'
2716                          else 'desktop' if sys.platform.startswith('linux')
2717                          else 'url')
2718             write_links[link_type] = True
2719
2720         if any(should_write and not _write_link_file(link_type)
2721                for link_type, should_write in write_links.items()):
2722             return
2723
2724         try:
2725             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2726         except PostProcessingError as err:
2727             self.report_error('Preprocessing: %s' % str(err))
2728             return
2729
2730         must_record_download_archive = False
2731         if self.params.get('skip_download', False):
2732             info_dict['filepath'] = temp_filename
2733             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2734             info_dict['__files_to_move'] = files_to_move
2735             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2736         else:
2737             # Download
2738             info_dict.setdefault('__postprocessors', [])
2739             try:
2740
2741                 def existing_file(*filepaths):
2742                     ext = info_dict.get('ext')
2743                     final_ext = self.params.get('final_ext', ext)
2744                     existing_files = []
2745                     for file in orderedSet(filepaths):
2746                         if final_ext != ext:
2747                             converted = replace_extension(file, final_ext, ext)
2748                             if os.path.exists(encodeFilename(converted)):
2749                                 existing_files.append(converted)
2750                         if os.path.exists(encodeFilename(file)):
2751                             existing_files.append(file)
2752
2753                     if not existing_files or self.params.get('overwrites', False):
2754                         for file in orderedSet(existing_files):
2755                             self.report_file_delete(file)
2756                             os.remove(encodeFilename(file))
2757                         return None
2758
2759                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2760                     return existing_files[0]
2761
2762                 success = True
2763                 if info_dict.get('requested_formats') is not None:
2764
2765                     def compatible_formats(formats):
2766                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2767                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2768                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2769                         if len(video_formats) > 2 or len(audio_formats) > 2:
2770                             return False
2771
2772                         # Check extension
2773                         exts = set(format.get('ext') for format in formats)
2774                         COMPATIBLE_EXTS = (
2775                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2776                             set(('webm',)),
2777                         )
2778                         for ext_sets in COMPATIBLE_EXTS:
2779                             if ext_sets.issuperset(exts):
2780                                 return True
2781                         # TODO: Check acodec/vcodec
2782                         return False
2783
2784                     requested_formats = info_dict['requested_formats']
2785                     old_ext = info_dict['ext']
2786                     if self.params.get('merge_output_format') is None:
2787                         if not compatible_formats(requested_formats):
2788                             info_dict['ext'] = 'mkv'
2789                             self.report_warning(
2790                                 'Requested formats are incompatible for merge and will be merged into mkv')
2791                         if (info_dict['ext'] == 'webm'
2792                                 and info_dict.get('thumbnails')
2793                                 # check with type instead of pp_key, __name__, or isinstance
2794                                 # since we dont want any custom PPs to trigger this
2795                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2796                             info_dict['ext'] = 'mkv'
2797                             self.report_warning(
2798                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2799                     new_ext = info_dict['ext']
2800
2801                     def correct_ext(filename, ext=new_ext):
2802                         if filename == '-':
2803                             return filename
2804                         filename_real_ext = os.path.splitext(filename)[1][1:]
2805                         filename_wo_ext = (
2806                             os.path.splitext(filename)[0]
2807                             if filename_real_ext in (old_ext, new_ext)
2808                             else filename)
2809                         return '%s.%s' % (filename_wo_ext, ext)
2810
2811                     # Ensure filename always has a correct extension for successful merge
2812                     full_filename = correct_ext(full_filename)
2813                     temp_filename = correct_ext(temp_filename)
2814                     dl_filename = existing_file(full_filename, temp_filename)
2815                     info_dict['__real_download'] = False
2816
2817                     if dl_filename is not None:
2818                         self.report_file_already_downloaded(dl_filename)
2819                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2820                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2821                         success, real_download = self.dl(temp_filename, info_dict)
2822                         info_dict['__real_download'] = real_download
2823                     else:
2824                         downloaded = []
2825                         merger = FFmpegMergerPP(self)
2826                         if self.params.get('allow_unplayable_formats'):
2827                             self.report_warning(
2828                                 'You have requested merging of multiple formats '
2829                                 'while also allowing unplayable formats to be downloaded. '
2830                                 'The formats won\'t be merged to prevent data corruption.')
2831                         elif not merger.available:
2832                             self.report_warning(
2833                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2834                                 'The formats won\'t be merged.')
2835
2836                         if temp_filename == '-':
2837                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2838                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2839                                       else 'but ffmpeg is not installed')
2840                             self.report_warning(
2841                                 f'You have requested downloading multiple formats to stdout {reason}. '
2842                                 'The formats will be streamed one after the other')
2843                             fname = temp_filename
2844                         for f in requested_formats:
2845                             new_info = dict(info_dict)
2846                             del new_info['requested_formats']
2847                             new_info.update(f)
2848                             if temp_filename != '-':
2849                                 fname = prepend_extension(
2850                                     correct_ext(temp_filename, new_info['ext']),
2851                                     'f%s' % f['format_id'], new_info['ext'])
2852                                 if not self._ensure_dir_exists(fname):
2853                                     return
2854                                 f['filepath'] = fname
2855                                 downloaded.append(fname)
2856                             partial_success, real_download = self.dl(fname, new_info)
2857                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2858                             success = success and partial_success
2859                         if merger.available and not self.params.get('allow_unplayable_formats'):
2860                             info_dict['__postprocessors'].append(merger)
2861                             info_dict['__files_to_merge'] = downloaded
2862                             # Even if there were no downloads, it is being merged only now
2863                             info_dict['__real_download'] = True
2864                         else:
2865                             for file in downloaded:
2866                                 files_to_move[file] = None
2867                 else:
2868                     # Just a single file
2869                     dl_filename = existing_file(full_filename, temp_filename)
2870                     if dl_filename is None or dl_filename == temp_filename:
2871                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2872                         # So we should try to resume the download
2873                         success, real_download = self.dl(temp_filename, info_dict)
2874                         info_dict['__real_download'] = real_download
2875                     else:
2876                         self.report_file_already_downloaded(dl_filename)
2877
2878                 dl_filename = dl_filename or temp_filename
2879                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2880
2881             except network_exceptions as err:
2882                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2883                 return
2884             except (OSError, IOError) as err:
2885                 raise UnavailableVideoError(err)
2886             except (ContentTooShortError, ) as err:
2887                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2888                 return
2889
2890             if success and full_filename != '-':
2891
2892                 def fixup():
2893                     do_fixup = True
2894                     fixup_policy = self.params.get('fixup')
2895                     vid = info_dict['id']
2896
2897                     if fixup_policy in ('ignore', 'never'):
2898                         return
2899                     elif fixup_policy == 'warn':
2900                         do_fixup = False
2901                     elif fixup_policy != 'force':
2902                         assert fixup_policy in ('detect_or_warn', None)
2903                         if not info_dict.get('__real_download'):
2904                             do_fixup = False
2905
2906                     def ffmpeg_fixup(cndn, msg, cls):
2907                         if not cndn:
2908                             return
2909                         if not do_fixup:
2910                             self.report_warning(f'{vid}: {msg}')
2911                             return
2912                         pp = cls(self)
2913                         if pp.available:
2914                             info_dict['__postprocessors'].append(pp)
2915                         else:
2916                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2917
2918                     stretched_ratio = info_dict.get('stretched_ratio')
2919                     ffmpeg_fixup(
2920                         stretched_ratio not in (1, None),
2921                         f'Non-uniform pixel ratio {stretched_ratio}',
2922                         FFmpegFixupStretchedPP)
2923
2924                     ffmpeg_fixup(
2925                         (info_dict.get('requested_formats') is None
2926                          and info_dict.get('container') == 'm4a_dash'
2927                          and info_dict.get('ext') == 'm4a'),
2928                         'writing DASH m4a. Only some players support this container',
2929                         FFmpegFixupM4aPP)
2930
2931                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2932                     downloader = downloader.__name__ if downloader else None
2933                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2934                                  'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2935                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2936                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2937
2938                 fixup()
2939                 try:
2940                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2941                 except PostProcessingError as err:
2942                     self.report_error('Postprocessing: %s' % str(err))
2943                     return
2944                 try:
2945                     for ph in self._post_hooks:
2946                         ph(info_dict['filepath'])
2947                 except Exception as err:
2948                     self.report_error('post hooks: %s' % str(err))
2949                     return
2950                 must_record_download_archive = True
2951
2952         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2953             self.record_download_archive(info_dict)
2954         max_downloads = self.params.get('max_downloads')
2955         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2956             raise MaxDownloadsReached()
2957
2958     def __download_wrapper(self, func):
2959         @functools.wraps(func)
2960         def wrapper(*args, **kwargs):
2961             try:
2962                 res = func(*args, **kwargs)
2963             except UnavailableVideoError as e:
2964                 self.report_error(e)
2965             except DownloadCancelled as e:
2966                 self.to_screen(f'[info] {e}')
2967                 raise
2968             else:
2969                 if self.params.get('dump_single_json', False):
2970                     self.post_extract(res)
2971                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2972         return wrapper
2973
2974     def download(self, url_list):
2975         """Download a given list of URLs."""
2976         url_list = variadic(url_list)  # Passing a single URL is a common mistake
2977         outtmpl = self.outtmpl_dict['default']
2978         if (len(url_list) > 1
2979                 and outtmpl != '-'
2980                 and '%' not in outtmpl
2981                 and self.params.get('max_downloads') != 1):
2982             raise SameFileError(outtmpl)
2983
2984         for url in url_list:
2985             self.__download_wrapper(self.extract_info)(
2986                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2987
2988         return self._download_retcode
2989
2990     def download_with_info_file(self, info_filename):
2991         with contextlib.closing(fileinput.FileInput(
2992                 [info_filename], mode='r',
2993                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2994             # FileInput doesn't have a read method, we can't call json.load
2995             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2996         try:
2997             self.__download_wrapper(self.process_ie_result)(info, download=True)
2998         except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e:
2999             if not isinstance(e, EntryNotInPlaylist):
3000                 self.to_stderr('\r')
3001             webpage_url = info.get('webpage_url')
3002             if webpage_url is not None:
3003                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3004                 return self.download([webpage_url])
3005             else:
3006                 raise
3007         return self._download_retcode
3008
3009     @staticmethod
3010     def sanitize_info(info_dict, remove_private_keys=False):
3011         ''' Sanitize the infodict for converting to json '''
3012         if info_dict is None:
3013             return info_dict
3014         info_dict.setdefault('epoch', int(time.time()))
3015         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3016         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3017         if remove_private_keys:
3018             remove_keys |= {
3019                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3020                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3021             }
3022             empty_values = (None, {}, [], set(), tuple())
3023             reject = lambda k, v: k not in keep_keys and (
3024                 k.startswith('_') or k in remove_keys or v in empty_values)
3025         else:
3026             reject = lambda k, v: k in remove_keys
3027         filter_fn = lambda obj: (
3028             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3029             else obj if not isinstance(obj, dict)
3030             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3031         return filter_fn(info_dict)
3032
3033     @staticmethod
3034     def filter_requested_info(info_dict, actually_filter=True):
3035         ''' Alias of sanitize_info for backward compatibility '''
3036         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3037
3038     def run_pp(self, pp, infodict):
3039         files_to_delete = []
3040         if '__files_to_move' not in infodict:
3041             infodict['__files_to_move'] = {}
3042         try:
3043             files_to_delete, infodict = pp.run(infodict)
3044         except PostProcessingError as e:
3045             # Must be True and not 'only_download'
3046             if self.params.get('ignoreerrors') is True:
3047                 self.report_error(e)
3048                 return infodict
3049             raise
3050
3051         if not files_to_delete:
3052             return infodict
3053         if self.params.get('keepvideo', False):
3054             for f in files_to_delete:
3055                 infodict['__files_to_move'].setdefault(f, '')
3056         else:
3057             for old_filename in set(files_to_delete):
3058                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3059                 try:
3060                     os.remove(encodeFilename(old_filename))
3061                 except (IOError, OSError):
3062                     self.report_warning('Unable to remove downloaded original file')
3063                 if old_filename in infodict['__files_to_move']:
3064                     del infodict['__files_to_move'][old_filename]
3065         return infodict
3066
3067     @staticmethod
3068     def post_extract(info_dict):
3069         def actual_post_extract(info_dict):
3070             if info_dict.get('_type') in ('playlist', 'multi_video'):
3071                 for video_dict in info_dict.get('entries', {}):
3072                     actual_post_extract(video_dict or {})
3073                 return
3074
3075             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3076             extra = post_extractor().items()
3077             info_dict.update(extra)
3078             info_dict.pop('__post_extractor', None)
3079
3080             original_infodict = info_dict.get('__original_infodict') or {}
3081             original_infodict.update(extra)
3082             original_infodict.pop('__post_extractor', None)
3083
3084         actual_post_extract(info_dict or {})
3085
3086     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3087         info = dict(ie_info)
3088         info['__files_to_move'] = files_to_move or {}
3089         for pp in self._pps[key]:
3090             info = self.run_pp(pp, info)
3091         return info, info.pop('__files_to_move', None)
3092
3093     def post_process(self, filename, ie_info, files_to_move=None):
3094         """Run all the postprocessors on the given file."""
3095         info = dict(ie_info)
3096         info['filepath'] = filename
3097         info['__files_to_move'] = files_to_move or {}
3098
3099         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3100             info = self.run_pp(pp, info)
3101         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3102         del info['__files_to_move']
3103         for pp in self._pps['after_move']:
3104             info = self.run_pp(pp, info)
3105         return info
3106
3107     def _make_archive_id(self, info_dict):
3108         video_id = info_dict.get('id')
3109         if not video_id:
3110             return
3111         # Future-proof against any change in case
3112         # and backwards compatibility with prior versions
3113         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3114         if extractor is None:
3115             url = str_or_none(info_dict.get('url'))
3116             if not url:
3117                 return
3118             # Try to find matching extractor for the URL and take its ie_key
3119             for ie_key, ie in self._ies.items():
3120                 if ie.suitable(url):
3121                     extractor = ie_key
3122                     break
3123             else:
3124                 return
3125         return '%s %s' % (extractor.lower(), video_id)
3126
3127     def in_download_archive(self, info_dict):
3128         fn = self.params.get('download_archive')
3129         if fn is None:
3130             return False
3131
3132         vid_id = self._make_archive_id(info_dict)
3133         if not vid_id:
3134             return False  # Incomplete video information
3135
3136         return vid_id in self.archive
3137
3138     def record_download_archive(self, info_dict):
3139         fn = self.params.get('download_archive')
3140         if fn is None:
3141             return
3142         vid_id = self._make_archive_id(info_dict)
3143         assert vid_id
3144         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3145             archive_file.write(vid_id + '\n')
3146         self.archive.add(vid_id)
3147
3148     @staticmethod
3149     def format_resolution(format, default='unknown'):
3150         is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
3151         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3152             return 'audio only'
3153         if format.get('resolution') is not None:
3154             return format['resolution']
3155         if format.get('width') and format.get('height'):
3156             res = '%dx%d' % (format['width'], format['height'])
3157         elif format.get('height'):
3158             res = '%sp' % format['height']
3159         elif format.get('width'):
3160             res = '%dx?' % format['width']
3161         elif is_images:
3162             return 'images'
3163         else:
3164             return default
3165         return f'img {res}' if is_images else res
3166
3167     def _format_note(self, fdict):
3168         res = ''
3169         if fdict.get('ext') in ['f4f', 'f4m']:
3170             res += '(unsupported) '
3171         if fdict.get('language'):
3172             if res:
3173                 res += ' '
3174             res += '[%s] ' % fdict['language']
3175         if fdict.get('format_note') is not None:
3176             res += fdict['format_note'] + ' '
3177         if fdict.get('tbr') is not None:
3178             res += '%4dk ' % fdict['tbr']
3179         if fdict.get('container') is not None:
3180             if res:
3181                 res += ', '
3182             res += '%s container' % fdict['container']
3183         if (fdict.get('vcodec') is not None
3184                 and fdict.get('vcodec') != 'none'):
3185             if res:
3186                 res += ', '
3187             res += fdict['vcodec']
3188             if fdict.get('vbr') is not None:
3189                 res += '@'
3190         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3191             res += 'video@'
3192         if fdict.get('vbr') is not None:
3193             res += '%4dk' % fdict['vbr']
3194         if fdict.get('fps') is not None:
3195             if res:
3196                 res += ', '
3197             res += '%sfps' % fdict['fps']
3198         if fdict.get('acodec') is not None:
3199             if res:
3200                 res += ', '
3201             if fdict['acodec'] == 'none':
3202                 res += 'video only'
3203             else:
3204                 res += '%-5s' % fdict['acodec']
3205         elif fdict.get('abr') is not None:
3206             if res:
3207                 res += ', '
3208             res += 'audio'
3209         if fdict.get('abr') is not None:
3210             res += '@%3dk' % fdict['abr']
3211         if fdict.get('asr') is not None:
3212             res += ' (%5dHz)' % fdict['asr']
3213         if fdict.get('filesize') is not None:
3214             if res:
3215                 res += ', '
3216             res += format_bytes(fdict['filesize'])
3217         elif fdict.get('filesize_approx') is not None:
3218             if res:
3219                 res += ', '
3220             res += '~' + format_bytes(fdict['filesize_approx'])
3221         return res
3222
3223     def _list_format_headers(self, *headers):
3224         if self.params.get('listformats_table', True) is not False:
3225             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3226         return headers
3227
3228     def list_formats(self, info_dict):
3229         formats = info_dict.get('formats', [info_dict])
3230         new_format = self.params.get('listformats_table', True) is not False
3231         if new_format:
3232             tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats))
3233             vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats))
3234             abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats))
3235             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3236             table = [
3237                 [
3238                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3239                     format_field(f, 'ext'),
3240                     self.format_resolution(f),
3241                     format_field(f, 'fps', '%3d'),
3242                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3243                     delim,
3244                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3245                     format_field(f, 'tbr', f'%{tbr_digits}dk'),
3246                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3247                     delim,
3248                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3249                     format_field(f, 'vbr', f'%{vbr_digits}dk'),
3250                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3251                     format_field(f, 'abr', f'%{abr_digits}dk'),
3252                     format_field(f, 'asr', '%5dHz'),
3253                     join_nonempty(
3254                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3255                         format_field(f, 'language', '[%s]'),
3256                         format_field(f, 'format_note'),
3257                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3258                         delim=', '),
3259                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3260             header_line = self._list_format_headers(
3261                 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', '  TBR', 'PROTO',
3262                 delim, 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO')
3263         else:
3264             table = [
3265                 [
3266                     format_field(f, 'format_id'),
3267                     format_field(f, 'ext'),
3268                     self.format_resolution(f),
3269                     self._format_note(f)]
3270                 for f in formats
3271                 if f.get('preference') is None or f['preference'] >= -1000]
3272             header_line = ['format code', 'extension', 'resolution', 'note']
3273
3274         self.to_screen(
3275             '[info] Available formats for %s:' % info_dict['id'])
3276         self.to_stdout(render_table(
3277             header_line, table,
3278             extraGap=(0 if new_format else 1),
3279             hideEmpty=new_format,
3280             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3281
3282     def list_thumbnails(self, info_dict):
3283         thumbnails = list(info_dict.get('thumbnails'))
3284         if not thumbnails:
3285             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3286             return
3287
3288         self.to_screen(
3289             '[info] Thumbnails for %s:' % info_dict['id'])
3290         self.to_stdout(render_table(
3291             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3292             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3293
3294     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3295         if not subtitles:
3296             self.to_screen('%s has no %s' % (video_id, name))
3297             return
3298         self.to_screen(
3299             'Available %s for %s:' % (name, video_id))
3300
3301         def _row(lang, formats):
3302             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3303             if len(set(names)) == 1:
3304                 names = [] if names[0] == 'unknown' else names[:1]
3305             return [lang, ', '.join(names), ', '.join(exts)]
3306
3307         self.to_stdout(render_table(
3308             self._list_format_headers('Language', 'Name', 'Formats'),
3309             [_row(lang, formats) for lang, formats in subtitles.items()],
3310             hideEmpty=True))
3311
3312     def urlopen(self, req):
3313         """ Start an HTTP download """
3314         if isinstance(req, compat_basestring):
3315             req = sanitized_Request(req)
3316         return self._opener.open(req, timeout=self._socket_timeout)
3317
3318     def print_debug_header(self):
3319         if not self.params.get('verbose'):
3320             return
3321
3322         def get_encoding(stream):
3323             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3324             if not supports_terminal_sequences(stream):
3325                 ret += ' (No ANSI)'
3326             return ret
3327
3328         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3329             locale.getpreferredencoding(),
3330             sys.getfilesystemencoding(),
3331             get_encoding(self._screen_file), get_encoding(self._err_file),
3332             self.get_encoding())
3333
3334         logger = self.params.get('logger')
3335         if logger:
3336             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3337             write_debug(encoding_str)
3338         else:
3339             write_string(f'[debug] {encoding_str}\n', encoding=None)
3340             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3341
3342         source = detect_variant()
3343         write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
3344         if not _LAZY_LOADER:
3345             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3346                 write_debug('Lazy loading extractors is forcibly disabled')
3347             else:
3348                 write_debug('Lazy loading extractors is disabled')
3349         if plugin_extractors or plugin_postprocessors:
3350             write_debug('Plugins: %s' % [
3351                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3352                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3353         if self.params.get('compat_opts'):
3354             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3355         try:
3356             sp = Popen(
3357                 ['git', 'rev-parse', '--short', 'HEAD'],
3358                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3359                 cwd=os.path.dirname(os.path.abspath(__file__)))
3360             out, err = sp.communicate_or_kill()
3361             out = out.decode().strip()
3362             if re.match('[0-9a-f]+', out):
3363                 write_debug('Git HEAD: %s' % out)
3364         except Exception:
3365             try:
3366                 sys.exc_clear()
3367             except Exception:
3368                 pass
3369
3370         def python_implementation():
3371             impl_name = platform.python_implementation()
3372             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3373                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3374             return impl_name
3375
3376         write_debug('Python version %s (%s %s) - %s' % (
3377             platform.python_version(),
3378             python_implementation(),
3379             platform.architecture()[0],
3380             platform_name()))
3381
3382         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3383         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3384         if ffmpeg_features:
3385             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3386
3387         exe_versions['rtmpdump'] = rtmpdump_version()
3388         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3389         exe_str = ', '.join(
3390             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3391         ) or 'none'
3392         write_debug('exe versions: %s' % exe_str)
3393
3394         from .downloader.websocket import has_websockets
3395         from .postprocessor.embedthumbnail import has_mutagen
3396         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3397
3398         lib_str = join_nonempty(
3399             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3400             KEYRING_AVAILABLE and 'keyring',
3401             has_mutagen and 'mutagen',
3402             SQLITE_AVAILABLE and 'sqlite',
3403             has_websockets and 'websockets',
3404             delim=', ') or 'none'
3405         write_debug('Optional libraries: %s' % lib_str)
3406
3407         proxy_map = {}
3408         for handler in self._opener.handlers:
3409             if hasattr(handler, 'proxies'):
3410                 proxy_map.update(handler.proxies)
3411         write_debug(f'Proxy map: {proxy_map}')
3412
3413         # Not implemented
3414         if False and self.params.get('call_home'):
3415             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3416             write_debug('Public IP address: %s' % ipaddr)
3417             latest_version = self.urlopen(
3418                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3419             if version_tuple(latest_version) > version_tuple(__version__):
3420                 self.report_warning(
3421                     'You are using an outdated version (newest version: %s)! '
3422                     'See https://yt-dl.org/update if you need help updating.' %
3423                     latest_version)
3424
3425     def _setup_opener(self):
3426         timeout_val = self.params.get('socket_timeout')
3427         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3428
3429         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3430         opts_cookiefile = self.params.get('cookiefile')
3431         opts_proxy = self.params.get('proxy')
3432
3433         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3434
3435         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3436         if opts_proxy is not None:
3437             if opts_proxy == '':
3438                 proxies = {}
3439             else:
3440                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3441         else:
3442             proxies = compat_urllib_request.getproxies()
3443             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3444             if 'http' in proxies and 'https' not in proxies:
3445                 proxies['https'] = proxies['http']
3446         proxy_handler = PerRequestProxyHandler(proxies)
3447
3448         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3449         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3450         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3451         redirect_handler = YoutubeDLRedirectHandler()
3452         data_handler = compat_urllib_request_DataHandler()
3453
3454         # When passing our own FileHandler instance, build_opener won't add the
3455         # default FileHandler and allows us to disable the file protocol, which
3456         # can be used for malicious purposes (see
3457         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3458         file_handler = compat_urllib_request.FileHandler()
3459
3460         def file_open(*args, **kwargs):
3461             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3462         file_handler.file_open = file_open
3463
3464         opener = compat_urllib_request.build_opener(
3465             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3466
3467         # Delete the default user-agent header, which would otherwise apply in
3468         # cases where our custom HTTP handler doesn't come into play
3469         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3470         opener.addheaders = []
3471         self._opener = opener
3472
3473     def encode(self, s):
3474         if isinstance(s, bytes):
3475             return s  # Already encoded
3476
3477         try:
3478             return s.encode(self.get_encoding())
3479         except UnicodeEncodeError as err:
3480             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3481             raise
3482
3483     def get_encoding(self):
3484         encoding = self.params.get('encoding')
3485         if encoding is None:
3486             encoding = preferredencoding()
3487         return encoding
3488
3489     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3490         ''' Write infojson and returns True = written, False = skip, None = error '''
3491         if overwrite is None:
3492             overwrite = self.params.get('overwrites', True)
3493         if not self.params.get('writeinfojson'):
3494             return False
3495         elif not infofn:
3496             self.write_debug(f'Skipping writing {label} infojson')
3497             return False
3498         elif not self._ensure_dir_exists(infofn):
3499             return None
3500         elif not overwrite and os.path.exists(infofn):
3501             self.to_screen(f'[info] {label.title()} metadata is already present')
3502         else:
3503             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3504             try:
3505                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3506             except (OSError, IOError):
3507                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3508                 return None
3509         return True
3510
3511     def _write_description(self, label, ie_result, descfn):
3512         ''' Write description and returns True = written, False = skip, None = error '''
3513         if not self.params.get('writedescription'):
3514             return False
3515         elif not descfn:
3516             self.write_debug(f'Skipping writing {label} description')
3517             return False
3518         elif not self._ensure_dir_exists(descfn):
3519             return None
3520         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3521             self.to_screen(f'[info] {label.title()} description is already present')
3522         elif ie_result.get('description') is None:
3523             self.report_warning(f'There\'s no {label} description to write')
3524             return False
3525         else:
3526             try:
3527                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3528                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3529                     descfile.write(ie_result['description'])
3530             except (OSError, IOError):
3531                 self.report_error(f'Cannot write {label} description file {descfn}')
3532                 return None
3533         return True
3534
3535     def _write_subtitles(self, info_dict, filename):
3536         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3537         ret = []
3538         subtitles = info_dict.get('requested_subtitles')
3539         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3540             # subtitles download errors are already managed as troubles in relevant IE
3541             # that way it will silently go on when used with unsupporting IE
3542             return ret
3543
3544         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3545         if not sub_filename_base:
3546             self.to_screen('[info] Skipping writing video subtitles')
3547             return ret
3548         for sub_lang, sub_info in subtitles.items():
3549             sub_format = sub_info['ext']
3550             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3551             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3552             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3553                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3554                 sub_info['filepath'] = sub_filename
3555                 ret.append((sub_filename, sub_filename_final))
3556                 continue
3557
3558             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3559             if sub_info.get('data') is not None:
3560                 try:
3561                     # Use newline='' to prevent conversion of newline characters
3562                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3563                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3564                         subfile.write(sub_info['data'])
3565                     sub_info['filepath'] = sub_filename
3566                     ret.append((sub_filename, sub_filename_final))
3567                     continue
3568                 except (OSError, IOError):
3569                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3570                     return None
3571
3572             try:
3573                 sub_copy = sub_info.copy()
3574                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3575                 self.dl(sub_filename, sub_copy, subtitle=True)
3576                 sub_info['filepath'] = sub_filename
3577                 ret.append((sub_filename, sub_filename_final))
3578             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3579                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3580                 continue
3581         return ret
3582
3583     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3584         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3585         write_all = self.params.get('write_all_thumbnails', False)
3586         thumbnails, ret = [], []
3587         if write_all or self.params.get('writethumbnail', False):
3588             thumbnails = info_dict.get('thumbnails') or []
3589         multiple = write_all and len(thumbnails) > 1
3590
3591         if thumb_filename_base is None:
3592             thumb_filename_base = filename
3593         if thumbnails and not thumb_filename_base:
3594             self.write_debug(f'Skipping writing {label} thumbnail')
3595             return ret
3596
3597         for t in thumbnails[::-1]:
3598             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3599             thumb_display_id = f'{label} thumbnail {t["id"]}'
3600             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3601             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3602
3603             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3604                 ret.append((thumb_filename, thumb_filename_final))
3605                 t['filepath'] = thumb_filename
3606                 self.to_screen('[info] %s is already present' % (
3607                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3608             else:
3609                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3610                 try:
3611                     uf = self.urlopen(t['url'])
3612                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3613                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3614                         shutil.copyfileobj(uf, thumbf)
3615                     ret.append((thumb_filename, thumb_filename_final))
3616                     t['filepath'] = thumb_filename
3617                 except network_exceptions as err:
3618                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3619             if ret and not write_all:
3620                 break
3621         return ret