yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     join_nonempty,
  78     LazyList,
  79     LINK_TEMPLATES,
  80     locked_file,
  81     make_dir,
  82     make_HTTPS_handler,
  83     MaxDownloadsReached,
  84     network_exceptions,
  85     number_of_digits,
  86     orderedSet,
  87     OUTTMPL_TYPES,
  88     PagedList,
  89     parse_filesize,
  90     PerRequestProxyHandler,
  91     platform_name,
  92     Popen,
  93     PostProcessingError,
  94     preferredencoding,
  95     prepend_extension,
  96     register_socks_protocols,
  97     RejectedVideoReached,
  98     render_table,
  99     replace_extension,
 100     SameFileError,
 101     sanitize_filename,
 102     sanitize_path,
 103     sanitize_url,
 104     sanitized_Request,
 105     std_headers,
 106     STR_FORMAT_RE_TMPL,
 107     STR_FORMAT_TYPES,
 108     str_or_none,
 109     strftime_or_none,
 110     subtitles_filename,
 111     supports_terminal_sequences,
 112     ThrottledDownload,
 113     to_high_limit_path,
 114     traverse_obj,
 115     try_get,
 116     UnavailableVideoError,
 117     url_basename,
 118     variadic,
 119     version_tuple,
 120     write_json_file,
 121     write_string,
 122     YoutubeDLCookieProcessor,
 123     YoutubeDLHandler,
 124     YoutubeDLRedirectHandler,
 125 )
 126 from .cache import Cache
 127 from .minicurses import format_text
 128 from .extractor import (
 129     gen_extractor_classes,
 130     get_info_extractor,
 131     _LAZY_LOADER,
 132     _PLUGIN_CLASSES as plugin_extractors
 133 )
 134 from .extractor.openload import PhantomJSwrapper
 135 from .downloader import (
 136     FFmpegFD,
 137     get_suitable_downloader,
 138     shorten_protocol_name
 139 )
 140 from .downloader.rtmp import rtmpdump_version
 141 from .postprocessor import (
 142     get_postprocessor,
 143     EmbedThumbnailPP,
 144     FFmpegFixupDurationPP,
 145     FFmpegFixupM3u8PP,
 146     FFmpegFixupM4aPP,
 147     FFmpegFixupStretchedPP,
 148     FFmpegFixupTimestampPP,
 149     FFmpegMergerPP,
 150     FFmpegPostProcessor,
 151     MoveFilesAfterDownloadPP,
 152     _PLUGIN_CLASSES as plugin_postprocessors
 153 )
 154 from .update import detect_variant
 155 from .version import __version__
 156
 157 if compat_os_name == 'nt':
 158     import ctypes
 159
 160
 161 class YoutubeDL(object):
 162     """YoutubeDL class.
 163
 164     YoutubeDL objects are the ones responsible of downloading the
 165     actual video file and writing it to disk if the user has requested
 166     it, among some other tasks. In most cases there should be one per
 167     program. As, given a video URL, the downloader doesn't know how to
 168     extract all the needed information, task that InfoExtractors do, it
 169     has to pass the URL to one of them.
 170
 171     For this, YoutubeDL objects have a method that allows
 172     InfoExtractors to be registered in a given order. When it is passed
 173     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 174     finds that reports being able to handle it. The InfoExtractor extracts
 175     all the information about the video or videos the URL refers to, and
 176     YoutubeDL process the extracted information, possibly using a File
 177     Downloader to download the video.
 178
 179     YoutubeDL objects accept a lot of parameters. In order not to saturate
 180     the object constructor with arguments, it receives a dictionary of
 181     options instead. These options are available through the params
 182     attribute for the InfoExtractors to use. The YoutubeDL also
 183     registers itself as the downloader in charge for the InfoExtractors
 184     that are added to it, so this is a "mutual registration".
 185
 186     Available options:
 187
 188     username:          Username for authentication purposes.
 189     password:          Password for authentication purposes.
 190     videopassword:     Password for accessing a video.
 191     ap_mso:            Adobe Pass multiple-system operator identifier.
 192     ap_username:       Multiple-system operator account username.
 193     ap_password:       Multiple-system operator account password.
 194     usenetrc:          Use netrc for authentication instead.
 195     verbose:           Print additional info to stdout.
 196     quiet:             Do not print messages to stdout.
 197     no_warnings:       Do not print out anything for warnings.
 198     forceprint:        A list of templates to force print
 199     forceurl:          Force printing final URL. (Deprecated)
 200     forcetitle:        Force printing title. (Deprecated)
 201     forceid:           Force printing ID. (Deprecated)
 202     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 203     forcedescription:  Force printing description. (Deprecated)
 204     forcefilename:     Force printing final filename. (Deprecated)
 205     forceduration:     Force printing duration. (Deprecated)
 206     forcejson:         Force printing info_dict as JSON.
 207     dump_single_json:  Force printing the info_dict of the whole playlist
 208                        (or video) as a single JSON line.
 209     force_write_download_archive: Force writing download archive regardless
 210                        of 'skip_download' or 'simulate'.
 211     simulate:          Do not download the video files. If unset (or None),
 212                        simulate only if listsubtitles, listformats or list_thumbnails is used
 213     format:            Video format code. see "FORMAT SELECTION" for more details.
 214                        You can also pass a function. The function takes 'ctx' as
 215                        argument and returns the formats to download.
 216                        See "build_format_selector" for an implementation
 217     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 218     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 219                        extracting metadata even if the video is not actually
 220                        available for download (experimental)
 221     format_sort:       A list of fields by which to sort the video formats.
 222                        See "Sorting Formats" for more details.
 223     format_sort_force: Force the given format_sort. see "Sorting Formats"
 224                        for more details.
 225     allow_multiple_video_streams:   Allow multiple video streams to be merged
 226                        into a single file
 227     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 228                        into a single file
 229     check_formats      Whether to test if the formats are downloadable.
 230                        Can be True (check all), False (check none),
 231                        'selected' (check selected formats),
 232                        or None (check only if requested by extractor)
 233     paths:             Dictionary of output paths. The allowed keys are 'home'
 234                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 235     outtmpl:           Dictionary of templates for output names. Allowed keys
 236                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 237                        For compatibility with youtube-dl, a single string can also be used
 238     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 239     restrictfilenames: Do not allow "&" and spaces in file names
 240     trim_file_name:    Limit length of filename (extension excluded)
 241     windowsfilenames:  Force the filenames to be windows compatible
 242     ignoreerrors:      Do not stop on download/postprocessing errors.
 243                        Can be 'only_download' to ignore only download errors.
 244                        Default is 'only_download' for CLI, but False for API
 245     skip_playlist_after_errors: Number of allowed failures until the rest of
 246                        the playlist is skipped
 247     force_generic_extractor: Force downloader to use the generic extractor
 248     overwrites:        Overwrite all video and metadata files if True,
 249                        overwrite only non-video files if None
 250                        and don't overwrite any file if False
 251                        For compatibility with youtube-dl,
 252                        "nooverwrites" may also be used instead
 253     playliststart:     Playlist item to start at.
 254     playlistend:       Playlist item to end at.
 255     playlist_items:    Specific indices of playlist to download.
 256     playlistreverse:   Download playlist items in reverse order.
 257     playlistrandom:    Download playlist items in random order.
 258     matchtitle:        Download only matching titles.
 259     rejecttitle:       Reject downloads for matching titles.
 260     logger:            Log messages to a logging.Logger instance.
 261     logtostderr:       Log messages to stderr instead of stdout.
 262     consoletitle:       Display progress in console window's titlebar.
 263     writedescription:  Write the video description to a .description file
 264     writeinfojson:     Write the video description to a .info.json file
 265     clean_infojson:    Remove private fields from the infojson
 266     getcomments:       Extract video comments. This will not be written to disk
 267                        unless writeinfojson is also given
 268     writeannotations:  Write the video annotations to a .annotations.xml file
 269     writethumbnail:    Write the thumbnail image to a file
 270     allow_playlist_files: Whether to write playlists' description, infojson etc
 271                        also to disk when using the 'write*' options
 272     write_all_thumbnails:  Write all thumbnail formats to files
 273     writelink:         Write an internet shortcut file, depending on the
 274                        current platform (.url/.webloc/.desktop)
 275     writeurllink:      Write a Windows internet shortcut file (.url)
 276     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 277     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 278     writesubtitles:    Write the video subtitles to a file
 279     writeautomaticsub: Write the automatically generated subtitles to a file
 280     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 281                        Downloads all the subtitles of the video
 282                        (requires writesubtitles or writeautomaticsub)
 283     listsubtitles:     Lists all available subtitles for the video
 284     subtitlesformat:   The format code for subtitles
 285     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 286                        The list may contain "all" to refer to all the available
 287                        subtitles. The language can be prefixed with a "-" to
 288                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 289     keepvideo:         Keep the video file after post-processing
 290     daterange:         A DateRange object, download only if the upload_date is in the range.
 291     skip_download:     Skip the actual download of the video file
 292     cachedir:          Location of the cache files in the filesystem.
 293                        False to disable filesystem cache.
 294     noplaylist:        Download single video instead of a playlist if in doubt.
 295     age_limit:         An integer representing the user's age in years.
 296                        Unsuitable videos for the given age are skipped.
 297     min_views:         An integer representing the minimum view count the video
 298                        must have in order to not be skipped.
 299                        Videos without view count information are always
 300                        downloaded. None for no limit.
 301     max_views:         An integer representing the maximum view count.
 302                        Videos that are more popular than that are not
 303                        downloaded.
 304                        Videos without view count information are always
 305                        downloaded. None for no limit.
 306     download_archive:  File name of a file where all downloads are recorded.
 307                        Videos already present in the file are not downloaded
 308                        again.
 309     break_on_existing: Stop the download process after attempting to download a
 310                        file that is in the archive.
 311     break_on_reject:   Stop the download process when encountering a video that
 312                        has been filtered out.
 313     cookiefile:        File name where cookies should be read from and dumped to
 314     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 315                        name/path from where cookies are loaded.
 316                        Eg: ('chrome', ) or ('vivaldi', 'default')
 317     nocheckcertificate:Do not verify SSL certificates
 318     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 319                        At the moment, this is only supported by YouTube.
 320     proxy:             URL of the proxy server to use
 321     geo_verification_proxy:  URL of the proxy to use for IP address verification
 322                        on geo-restricted sites.
 323     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 324     bidi_workaround:   Work around buggy terminals without bidirectional text
 325                        support, using fridibi
 326     debug_printtraffic:Print out sent and received HTTP traffic
 327     include_ads:       Download ads as well
 328     default_search:    Prepend this string if an input url is not valid.
 329                        'auto' for elaborate guessing
 330     encoding:          Use this encoding instead of the system-specified.
 331     extract_flat:      Do not resolve URLs, return the immediate result.
 332                        Pass in 'in_playlist' to only show this behavior for
 333                        playlist items.
 334     postprocessors:    A list of dictionaries, each with an entry
 335                        * key:  The name of the postprocessor. See
 336                                yt_dlp/postprocessor/__init__.py for a list.
 337                        * when: When to run the postprocessor. Can be one of
 338                                pre_process|before_dl|post_process|after_move.
 339                                Assumed to be 'post_process' if not given
 340     post_hooks:        Deprecated - Register a custom postprocessor instead
 341                        A list of functions that get called as the final step
 342                        for each video file, after all postprocessors have been
 343                        called. The filename will be passed as the only argument.
 344     progress_hooks:    A list of functions that get called on download
 345                        progress, with a dictionary with the entries
 346                        * status: One of "downloading", "error", or "finished".
 347                                  Check this first and ignore unknown values.
 348                        * info_dict: The extracted info_dict
 349
 350                        If status is one of "downloading", or "finished", the
 351                        following properties may also be present:
 352                        * filename: The final filename (always present)
 353                        * tmpfilename: The filename we're currently writing to
 354                        * downloaded_bytes: Bytes on disk
 355                        * total_bytes: Size of the whole file, None if unknown
 356                        * total_bytes_estimate: Guess of the eventual file size,
 357                                                None if unavailable.
 358                        * elapsed: The number of seconds since download started.
 359                        * eta: The estimated time in seconds, None if unknown
 360                        * speed: The download speed in bytes/second, None if
 361                                 unknown
 362                        * fragment_index: The counter of the currently
 363                                          downloaded video fragment.
 364                        * fragment_count: The number of fragments (= individual
 365                                          files that will be merged)
 366
 367                        Progress hooks are guaranteed to be called at least once
 368                        (with status "finished") if the download is successful.
 369     postprocessor_hooks:  A list of functions that get called on postprocessing
 370                        progress, with a dictionary with the entries
 371                        * status: One of "started", "processing", or "finished".
 372                                  Check this first and ignore unknown values.
 373                        * postprocessor: Name of the postprocessor
 374                        * info_dict: The extracted info_dict
 375
 376                        Progress hooks are guaranteed to be called at least twice
 377                        (with status "started" and "finished") if the processing is successful.
 378     merge_output_format: Extension to use when merging formats.
 379     final_ext:         Expected final extension; used to detect when the file was
 380                        already downloaded and converted
 381     fixup:             Automatically correct known faults of the file.
 382                        One of:
 383                        - "never": do nothing
 384                        - "warn": only emit a warning
 385                        - "detect_or_warn": check whether we can do anything
 386                                            about it, warn otherwise (default)
 387     source_address:    Client-side IP address to bind to.
 388     call_home:         Boolean, true iff we are allowed to contact the
 389                        yt-dlp servers for debugging. (BROKEN)
 390     sleep_interval_requests: Number of seconds to sleep between requests
 391                        during extraction
 392     sleep_interval:    Number of seconds to sleep before each download when
 393                        used alone or a lower bound of a range for randomized
 394                        sleep before each download (minimum possible number
 395                        of seconds to sleep) when used along with
 396                        max_sleep_interval.
 397     max_sleep_interval:Upper bound of a range for randomized sleep before each
 398                        download (maximum possible number of seconds to sleep).
 399                        Must only be used along with sleep_interval.
 400                        Actual sleep time will be a random float from range
 401                        [sleep_interval; max_sleep_interval].
 402     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 403     listformats:       Print an overview of available video formats and exit.
 404     list_thumbnails:   Print a table of all thumbnails and exit.
 405     match_filter:      A function that gets called with the info_dict of
 406                        every video.
 407                        If it returns a message, the video is ignored.
 408                        If it returns None, the video is downloaded.
 409                        match_filter_func in utils.py is one example for this.
 410     no_color:          Do not emit color codes in output.
 411     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 412                        HTTP header
 413     geo_bypass_country:
 414                        Two-letter ISO 3166-2 country code that will be used for
 415                        explicit geographic restriction bypassing via faking
 416                        X-Forwarded-For HTTP header
 417     geo_bypass_ip_block:
 418                        IP range in CIDR notation that will be used similarly to
 419                        geo_bypass_country
 420
 421     The following options determine which downloader is picked:
 422     external_downloader: A dictionary of protocol keys and the executable of the
 423                        external downloader to use for it. The allowed protocols
 424                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 425                        Set the value to 'native' to use the native downloader
 426     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 427                        or {'m3u8': 'ffmpeg'} instead.
 428                        Use the native HLS downloader instead of ffmpeg/avconv
 429                        if True, otherwise use ffmpeg/avconv if False, otherwise
 430                        use downloader suggested by extractor if None.
 431     compat_opts:       Compatibility options. See "Differences in default behavior".
 432                        The following options do not work when used through the API:
 433                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 434                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 435                        Refer __init__.py for their implementation
 436     progress_template: Dictionary of templates for progress outputs.
 437                        Allowed keys are 'download', 'postprocess',
 438                        'download-title' (console title) and 'postprocess-title'.
 439                        The template is mapped on a dictionary with keys 'progress' and 'info'
 440
 441     The following parameters are not used by YoutubeDL itself, they are used by
 442     the downloader (see yt_dlp/downloader/common.py):
 443     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 444     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 445     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 446     external_downloader_args, concurrent_fragment_downloads.
 447
 448     The following options are used by the post processors:
 449     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 450                        otherwise prefer ffmpeg. (avconv support is deprecated)
 451     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 452                        to the binary or its containing directory.
 453     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 454                        and a list of additional command-line arguments for the
 455                        postprocessor/executable. The dict can also have "PP+EXE" keys
 456                        which are used when the given exe is used by the given PP.
 457                        Use 'default' as the name for arguments to passed to all PP
 458                        For compatibility with youtube-dl, a single list of args
 459                        can also be used
 460
 461     The following options are used by the extractors:
 462     extractor_retries: Number of times to retry for known errors
 463     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 464     hls_split_discontinuity: Split HLS playlists to different formats at
 465                        discontinuities such as ad breaks (default: False)
 466     extractor_args:    A dictionary of arguments to be passed to the extractors.
 467                        See "EXTRACTOR ARGUMENTS" for details.
 468                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 469     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 470                        If True (default), DASH manifests and related
 471                        data will be downloaded and processed by extractor.
 472                        You can reduce network I/O by disabling it if you don't
 473                        care about DASH. (only for youtube)
 474     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 475                        If True (default), HLS manifests and related
 476                        data will be downloaded and processed by extractor.
 477                        You can reduce network I/O by disabling it if you don't
 478                        care about HLS. (only for youtube)
 479     """
 480
 481     _NUMERIC_FIELDS = set((
 482         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 483         'timestamp', 'release_timestamp',
 484         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 485         'average_rating', 'comment_count', 'age_limit',
 486         'start_time', 'end_time',
 487         'chapter_number', 'season_number', 'episode_number',
 488         'track_number', 'disc_number', 'release_year',
 489     ))
 490
 491     _format_selection_exts = {
 492         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 493         'video': {'mp4', 'flv', 'webm', '3gp'},
 494         'storyboards': {'mhtml'},
 495     }
 496
 497     params = None
 498     _ies = {}
 499     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 500     _printed_messages = set()
 501     _first_webpage_request = True
 502     _download_retcode = None
 503     _num_downloads = None
 504     _playlist_level = 0
 505     _playlist_urls = set()
 506     _screen_file = None
 507
 508     def __init__(self, params=None, auto_init=True):
 509         """Create a FileDownloader object with the given options.
 510         @param auto_init    Whether to load the default extractors and print header (if verbose).
 511                             Set to 'no_verbose_header' to not print the header
 512         """
 513         if params is None:
 514             params = {}
 515         self._ies = {}
 516         self._ies_instances = {}
 517         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 518         self._printed_messages = set()
 519         self._first_webpage_request = True
 520         self._post_hooks = []
 521         self._progress_hooks = []
 522         self._postprocessor_hooks = []
 523         self._download_retcode = 0
 524         self._num_downloads = 0
 525         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 526         self._err_file = sys.stderr
 527         self.params = params
 528         self.cache = Cache(self)
 529
 530         windows_enable_vt_mode()
 531         self._allow_colors = {
 532             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 533             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 534         }
 535
 536         if sys.version_info < (3, 6):
 537             self.report_warning(
 538                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 539
 540         if self.params.get('allow_unplayable_formats'):
 541             self.report_warning(
 542                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 543                 'This is a developer option intended for debugging. \n'
 544                 '         If you experience any issues while using this option, '
 545                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 546
 547         def check_deprecated(param, option, suggestion):
 548             if self.params.get(param) is not None:
 549                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 550                 return True
 551             return False
 552
 553         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 554             if self.params.get('geo_verification_proxy') is None:
 555                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 556
 557         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 558         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 559         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 560
 561         for msg in self.params.get('_warnings', []):
 562             self.report_warning(msg)
 563
 564         if 'list-formats' in self.params.get('compat_opts', []):
 565             self.params['listformats_table'] = False
 566
 567         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 568             # nooverwrites was unnecessarily changed to overwrites
 569             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 570             # This ensures compatibility with both keys
 571             self.params['overwrites'] = not self.params['nooverwrites']
 572         elif self.params.get('overwrites') is None:
 573             self.params.pop('overwrites', None)
 574         else:
 575             self.params['nooverwrites'] = not self.params['overwrites']
 576
 577         if params.get('bidi_workaround', False):
 578             try:
 579                 import pty
 580                 master, slave = pty.openpty()
 581                 width = compat_get_terminal_size().columns
 582                 if width is None:
 583                     width_args = []
 584                 else:
 585                     width_args = ['-w', str(width)]
 586                 sp_kwargs = dict(
 587                     stdin=subprocess.PIPE,
 588                     stdout=slave,
 589                     stderr=self._err_file)
 590                 try:
 591                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 592                 except OSError:
 593                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 594                 self._output_channel = os.fdopen(master, 'rb')
 595             except OSError as ose:
 596                 if ose.errno == errno.ENOENT:
 597                     self.report_warning(
 598                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 599                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 600                 else:
 601                     raise
 602
 603         if (sys.platform != 'win32'
 604                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 605                 and not params.get('restrictfilenames', False)):
 606             # Unicode filesystem API will throw errors (#1474, #13027)
 607             self.report_warning(
 608                 'Assuming --restrict-filenames since file system encoding '
 609                 'cannot encode all characters. '
 610                 'Set the LC_ALL environment variable to fix this.')
 611             self.params['restrictfilenames'] = True
 612
 613         self.outtmpl_dict = self.parse_outtmpl()
 614
 615         # Creating format selector here allows us to catch syntax errors before the extraction
 616         self.format_selector = (
 617             None if self.params.get('format') is None
 618             else self.params['format'] if callable(self.params['format'])
 619             else self.build_format_selector(self.params['format']))
 620
 621         self._setup_opener()
 622
 623         if auto_init:
 624             if auto_init != 'no_verbose_header':
 625                 self.print_debug_header()
 626             self.add_default_info_extractors()
 627
 628         for pp_def_raw in self.params.get('postprocessors', []):
 629             pp_def = dict(pp_def_raw)
 630             when = pp_def.pop('when', 'post_process')
 631             pp_class = get_postprocessor(pp_def.pop('key'))
 632             pp = pp_class(self, **compat_kwargs(pp_def))
 633             self.add_post_processor(pp, when=when)
 634
 635         hooks = {
 636             'post_hooks': self.add_post_hook,
 637             'progress_hooks': self.add_progress_hook,
 638             'postprocessor_hooks': self.add_postprocessor_hook,
 639         }
 640         for opt, fn in hooks.items():
 641             for ph in self.params.get(opt, []):
 642                 fn(ph)
 643
 644         register_socks_protocols()
 645
 646         def preload_download_archive(fn):
 647             """Preload the archive, if any is specified"""
 648             if fn is None:
 649                 return False
 650             self.write_debug(f'Loading archive file {fn!r}')
 651             try:
 652                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 653                     for line in archive_file:
 654                         self.archive.add(line.strip())
 655             except IOError as ioe:
 656                 if ioe.errno != errno.ENOENT:
 657                     raise
 658                 return False
 659             return True
 660
 661         self.archive = set()
 662         preload_download_archive(self.params.get('download_archive'))
 663
 664     def warn_if_short_id(self, argv):
 665         # short YouTube ID starting with dash?
 666         idxs = [
 667             i for i, a in enumerate(argv)
 668             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 669         if idxs:
 670             correct_argv = (
 671                 ['yt-dlp']
 672                 + [a for i, a in enumerate(argv) if i not in idxs]
 673                 + ['--'] + [argv[i] for i in idxs]
 674             )
 675             self.report_warning(
 676                 'Long argument string detected. '
 677                 'Use -- to separate parameters and URLs, like this:\n%s' %
 678                 args_to_str(correct_argv))
 679
 680     def add_info_extractor(self, ie):
 681         """Add an InfoExtractor object to the end of the list."""
 682         ie_key = ie.ie_key()
 683         self._ies[ie_key] = ie
 684         if not isinstance(ie, type):
 685             self._ies_instances[ie_key] = ie
 686             ie.set_downloader(self)
 687
 688     def _get_info_extractor_class(self, ie_key):
 689         ie = self._ies.get(ie_key)
 690         if ie is None:
 691             ie = get_info_extractor(ie_key)
 692             self.add_info_extractor(ie)
 693         return ie
 694
 695     def get_info_extractor(self, ie_key):
 696         """
 697         Get an instance of an IE with name ie_key, it will try to get one from
 698         the _ies list, if there's no instance it will create a new one and add
 699         it to the extractor list.
 700         """
 701         ie = self._ies_instances.get(ie_key)
 702         if ie is None:
 703             ie = get_info_extractor(ie_key)()
 704             self.add_info_extractor(ie)
 705         return ie
 706
 707     def add_default_info_extractors(self):
 708         """
 709         Add the InfoExtractors returned by gen_extractors to the end of the list
 710         """
 711         for ie in gen_extractor_classes():
 712             self.add_info_extractor(ie)
 713
 714     def add_post_processor(self, pp, when='post_process'):
 715         """Add a PostProcessor object to the end of the chain."""
 716         self._pps[when].append(pp)
 717         pp.set_downloader(self)
 718
 719     def add_post_hook(self, ph):
 720         """Add the post hook"""
 721         self._post_hooks.append(ph)
 722
 723     def add_progress_hook(self, ph):
 724         """Add the download progress hook"""
 725         self._progress_hooks.append(ph)
 726
 727     def add_postprocessor_hook(self, ph):
 728         """Add the postprocessing progress hook"""
 729         self._postprocessor_hooks.append(ph)
 730
 731     def _bidi_workaround(self, message):
 732         if not hasattr(self, '_output_channel'):
 733             return message
 734
 735         assert hasattr(self, '_output_process')
 736         assert isinstance(message, compat_str)
 737         line_count = message.count('\n') + 1
 738         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 739         self._output_process.stdin.flush()
 740         res = ''.join(self._output_channel.readline().decode('utf-8')
 741                       for _ in range(line_count))
 742         return res[:-len('\n')]
 743
 744     def _write_string(self, message, out=None, only_once=False):
 745         if only_once:
 746             if message in self._printed_messages:
 747                 return
 748             self._printed_messages.add(message)
 749         write_string(message, out=out, encoding=self.params.get('encoding'))
 750
 751     def to_stdout(self, message, skip_eol=False, quiet=False):
 752         """Print message to stdout"""
 753         if self.params.get('logger'):
 754             self.params['logger'].debug(message)
 755         elif not quiet or self.params.get('verbose'):
 756             self._write_string(
 757                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 758                 self._err_file if quiet else self._screen_file)
 759
 760     def to_stderr(self, message, only_once=False):
 761         """Print message to stderr"""
 762         assert isinstance(message, compat_str)
 763         if self.params.get('logger'):
 764             self.params['logger'].error(message)
 765         else:
 766             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 767
 768     def to_console_title(self, message):
 769         if not self.params.get('consoletitle', False):
 770             return
 771         if compat_os_name == 'nt':
 772             if ctypes.windll.kernel32.GetConsoleWindow():
 773                 # c_wchar_p() might not be necessary if `message` is
 774                 # already of type unicode()
 775                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 776         elif 'TERM' in os.environ:
 777             self._write_string('\033]0;%s\007' % message, self._screen_file)
 778
 779     def save_console_title(self):
 780         if not self.params.get('consoletitle', False):
 781             return
 782         if self.params.get('simulate'):
 783             return
 784         if compat_os_name != 'nt' and 'TERM' in os.environ:
 785             # Save the title on stack
 786             self._write_string('\033[22;0t', self._screen_file)
 787
 788     def restore_console_title(self):
 789         if not self.params.get('consoletitle', False):
 790             return
 791         if self.params.get('simulate'):
 792             return
 793         if compat_os_name != 'nt' and 'TERM' in os.environ:
 794             # Restore the title from stack
 795             self._write_string('\033[23;0t', self._screen_file)
 796
 797     def __enter__(self):
 798         self.save_console_title()
 799         return self
 800
 801     def __exit__(self, *args):
 802         self.restore_console_title()
 803
 804         if self.params.get('cookiefile') is not None:
 805             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 806
 807     def trouble(self, message=None, tb=None):
 808         """Determine action to take when a download problem appears.
 809
 810         Depending on if the downloader has been configured to ignore
 811         download errors or not, this method may throw an exception or
 812         not when errors are found, after printing the message.
 813
 814         tb, if given, is additional traceback information.
 815         """
 816         if message is not None:
 817             self.to_stderr(message)
 818         if self.params.get('verbose'):
 819             if tb is None:
 820                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 821                     tb = ''
 822                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 823                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 824                     tb += encode_compat_str(traceback.format_exc())
 825                 else:
 826                     tb_data = traceback.format_list(traceback.extract_stack())
 827                     tb = ''.join(tb_data)
 828             if tb:
 829                 self.to_stderr(tb)
 830         if not self.params.get('ignoreerrors'):
 831             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 832                 exc_info = sys.exc_info()[1].exc_info
 833             else:
 834                 exc_info = sys.exc_info()
 835             raise DownloadError(message, exc_info)
 836         self._download_retcode = 1
 837
 838     def to_screen(self, message, skip_eol=False):
 839         """Print message to stdout if not in quiet mode"""
 840         self.to_stdout(
 841             message, skip_eol, quiet=self.params.get('quiet', False))
 842
 843     class Styles(Enum):
 844         HEADERS = 'yellow'
 845         EMPHASIS = 'blue'
 846         ID = 'green'
 847         DELIM = 'blue'
 848         ERROR = 'red'
 849         WARNING = 'yellow'
 850         SUPPRESS = 'light black'
 851
 852     def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
 853         assert out in ('screen', 'err')
 854         if test_encoding:
 855             original_text = text
 856             handle = self._screen_file if out == 'screen' else self._err_file
 857             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 858             text = text.encode(encoding, 'ignore').decode(encoding)
 859             if fallback is not None and text != original_text:
 860                 text = fallback
 861         if isinstance(f, self.Styles):
 862             f = f._value_
 863         return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
 864
 865     def _format_screen(self, *args, **kwargs):
 866         return self.__format_text('screen', *args, **kwargs)
 867
 868     def _format_err(self, *args, **kwargs):
 869         return self.__format_text('err', *args, **kwargs)
 870
 871     def report_warning(self, message, only_once=False):
 872         '''
 873         Print the message to stderr, it will be prefixed with 'WARNING:'
 874         If stderr is a tty file the 'WARNING:' will be colored
 875         '''
 876         if self.params.get('logger') is not None:
 877             self.params['logger'].warning(message)
 878         else:
 879             if self.params.get('no_warnings'):
 880                 return
 881             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 882
 883     def report_error(self, message, tb=None):
 884         '''
 885         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 886         in red if stderr is a tty file.
 887         '''
 888         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 889
 890     def write_debug(self, message, only_once=False):
 891         '''Log debug message or Print message to stderr'''
 892         if not self.params.get('verbose', False):
 893             return
 894         message = '[debug] %s' % message
 895         if self.params.get('logger'):
 896             self.params['logger'].debug(message)
 897         else:
 898             self.to_stderr(message, only_once)
 899
 900     def report_file_already_downloaded(self, file_name):
 901         """Report file has already been fully downloaded."""
 902         try:
 903             self.to_screen('[download] %s has already been downloaded' % file_name)
 904         except UnicodeEncodeError:
 905             self.to_screen('[download] The file has already been downloaded')
 906
 907     def report_file_delete(self, file_name):
 908         """Report that existing file will be deleted."""
 909         try:
 910             self.to_screen('Deleting existing file %s' % file_name)
 911         except UnicodeEncodeError:
 912             self.to_screen('Deleting existing file')
 913
 914     def raise_no_formats(self, info, forced=False):
 915         has_drm = info.get('__has_drm')
 916         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 917         expected = self.params.get('ignore_no_formats_error')
 918         if forced or not expected:
 919             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 920                                  expected=has_drm or expected)
 921         else:
 922             self.report_warning(msg)
 923
 924     def parse_outtmpl(self):
 925         outtmpl_dict = self.params.get('outtmpl', {})
 926         if not isinstance(outtmpl_dict, dict):
 927             outtmpl_dict = {'default': outtmpl_dict}
 928         # Remove spaces in the default template
 929         if self.params.get('restrictfilenames'):
 930             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 931         else:
 932             sanitize = lambda x: x
 933         outtmpl_dict.update({
 934             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 935             if outtmpl_dict.get(k) is None})
 936         for key, val in outtmpl_dict.items():
 937             if isinstance(val, bytes):
 938                 self.report_warning(
 939                     'Parameter outtmpl is bytes, but should be a unicode string. '
 940                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 941         return outtmpl_dict
 942
 943     def get_output_path(self, dir_type='', filename=None):
 944         paths = self.params.get('paths', {})
 945         assert isinstance(paths, dict)
 946         path = os.path.join(
 947             expand_path(paths.get('home', '').strip()),
 948             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 949             filename or '')
 950
 951         # Temporary fix for #4787
 952         # 'Treat' all problem characters by passing filename through preferredencoding
 953         # to workaround encoding issues with subprocess on python2 @ Windows
 954         if sys.version_info < (3, 0) and sys.platform == 'win32':
 955             path = encodeFilename(path, True).decode(preferredencoding())
 956         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 957
 958     @staticmethod
 959     def _outtmpl_expandpath(outtmpl):
 960         # expand_path translates '%%' into '%' and '$$' into '$'
 961         # correspondingly that is not what we want since we need to keep
 962         # '%%' intact for template dict substitution step. Working around
 963         # with boundary-alike separator hack.
 964         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 965         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 966
 967         # outtmpl should be expand_path'ed before template dict substitution
 968         # because meta fields may contain env variables we don't want to
 969         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 970         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 971         return expand_path(outtmpl).replace(sep, '')
 972
 973     @staticmethod
 974     def escape_outtmpl(outtmpl):
 975         ''' Escape any remaining strings like %s, %abc% etc. '''
 976         return re.sub(
 977             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 978             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 979             outtmpl)
 980
 981     @classmethod
 982     def validate_outtmpl(cls, outtmpl):
 983         ''' @return None or Exception object '''
 984         outtmpl = re.sub(
 985             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 986             lambda mobj: f'{mobj.group(0)[:-1]}s',
 987             cls._outtmpl_expandpath(outtmpl))
 988         try:
 989             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 990             return None
 991         except ValueError as err:
 992             return err
 993
 994     @staticmethod
 995     def _copy_infodict(info_dict):
 996         info_dict = dict(info_dict)
 997         for key in ('__original_infodict', '__postprocessors'):
 998             info_dict.pop(key, None)
 999         return info_dict
1000
1001     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
1002         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
1003         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1004
1005         info_dict = self._copy_infodict(info_dict)
1006         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1007             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1008             if info_dict.get('duration', None) is not None
1009             else None)
1010         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1011         if info_dict.get('resolution') is None:
1012             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1013
1014         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1015         # of %(field)s to %(field)0Nd for backward compatibility
1016         field_size_compat_map = {
1017             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1018             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1019             'autonumber': self.params.get('autonumber_size') or 5,
1020         }
1021
1022         TMPL_DICT = {}
1023         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1024         MATH_FUNCTIONS = {
1025             '+': float.__add__,
1026             '-': float.__sub__,
1027         }
1028         # Field is of the form key1.key2...
1029         # where keys (except first) can be string, int or slice
1030         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1031         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1032         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1033         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1034             (?P<negate>-)?
1035             (?P<fields>{field})
1036             (?P<maths>(?:{math_op}{math_field})*)
1037             (?:>(?P<strf_format>.+?))?
1038             (?P<alternate>(?<!\\),[^|)]+)?
1039             (?:\|(?P<default>.*?))?
1040             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1041
1042         def _traverse_infodict(k):
1043             k = k.split('.')
1044             if k[0] == '':
1045                 k.pop(0)
1046             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1047
1048         def get_value(mdict):
1049             # Object traversal
1050             value = _traverse_infodict(mdict['fields'])
1051             # Negative
1052             if mdict['negate']:
1053                 value = float_or_none(value)
1054                 if value is not None:
1055                     value *= -1
1056             # Do maths
1057             offset_key = mdict['maths']
1058             if offset_key:
1059                 value = float_or_none(value)
1060                 operator = None
1061                 while offset_key:
1062                     item = re.match(
1063                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1064                         offset_key).group(0)
1065                     offset_key = offset_key[len(item):]
1066                     if operator is None:
1067                         operator = MATH_FUNCTIONS[item]
1068                         continue
1069                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1070                     offset = float_or_none(item)
1071                     if offset is None:
1072                         offset = float_or_none(_traverse_infodict(item))
1073                     try:
1074                         value = operator(value, multiplier * offset)
1075                     except (TypeError, ZeroDivisionError):
1076                         return None
1077                     operator = None
1078             # Datetime formatting
1079             if mdict['strf_format']:
1080                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1081
1082             return value
1083
1084         na = self.params.get('outtmpl_na_placeholder', 'NA')
1085
1086         def _dumpjson_default(obj):
1087             if isinstance(obj, (set, LazyList)):
1088                 return list(obj)
1089             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1090
1091         def create_key(outer_mobj):
1092             if not outer_mobj.group('has_key'):
1093                 return outer_mobj.group(0)
1094             key = outer_mobj.group('key')
1095             mobj = re.match(INTERNAL_FORMAT_RE, key)
1096             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1097             value, default = None, na
1098             while mobj:
1099                 mobj = mobj.groupdict()
1100                 default = mobj['default'] if mobj['default'] is not None else default
1101                 value = get_value(mobj)
1102                 if value is None and mobj['alternate']:
1103                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1104                 else:
1105                     break
1106
1107             fmt = outer_mobj.group('format')
1108             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1109                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1110
1111             value = default if value is None else value
1112
1113             flags = outer_mobj.group('conversion') or ''
1114             str_fmt = f'{fmt[:-1]}s'
1115             if fmt[-1] == 'l':  # list
1116                 delim = '\n' if '#' in flags else ', '
1117                 value, fmt = delim.join(variadic(value)), str_fmt
1118             elif fmt[-1] == 'j':  # json
1119                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1120             elif fmt[-1] == 'q':  # quoted
1121                 value = map(str, variadic(value) if '#' in flags else [value])
1122                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1123             elif fmt[-1] == 'B':  # bytes
1124                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1125                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1126             elif fmt[-1] == 'U':  # unicode normalized
1127                 value, fmt = unicodedata.normalize(
1128                     # "+" = compatibility equivalence, "#" = NFD
1129                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1130                     value), str_fmt
1131             elif fmt[-1] == 'c':
1132                 if value:
1133                     value = str(value)[0]
1134                 else:
1135                     fmt = str_fmt
1136             elif fmt[-1] not in 'rs':  # numeric
1137                 value = float_or_none(value)
1138                 if value is None:
1139                     value, fmt = default, 's'
1140
1141             if sanitize:
1142                 if fmt[-1] == 'r':
1143                     # If value is an object, sanitize might convert it to a string
1144                     # So we convert it to repr first
1145                     value, fmt = repr(value), str_fmt
1146                 if fmt[-1] in 'csr':
1147                     value = sanitize(initial_field, value)
1148
1149             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1150             TMPL_DICT[key] = value
1151             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1152
1153         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1154
1155     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1156         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1157         return self.escape_outtmpl(outtmpl) % info_dict
1158
1159     def _prepare_filename(self, info_dict, tmpl_type='default'):
1160         try:
1161             sanitize = lambda k, v: sanitize_filename(
1162                 compat_str(v),
1163                 restricted=self.params.get('restrictfilenames'),
1164                 is_id=(k == 'id' or k.endswith('_id')))
1165             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1166             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1167
1168             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1169             if filename and force_ext is not None:
1170                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1171
1172             # https://github.com/blackjack4494/youtube-dlc/issues/85
1173             trim_file_name = self.params.get('trim_file_name', False)
1174             if trim_file_name:
1175                 fn_groups = filename.rsplit('.')
1176                 ext = fn_groups[-1]
1177                 sub_ext = ''
1178                 if len(fn_groups) > 2:
1179                     sub_ext = fn_groups[-2]
1180                 filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')
1181
1182             return filename
1183         except ValueError as err:
1184             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1185             return None
1186
1187     def prepare_filename(self, info_dict, dir_type='', warn=False):
1188         """Generate the output filename."""
1189
1190         filename = self._prepare_filename(info_dict, dir_type or 'default')
1191         if not filename and dir_type not in ('', 'temp'):
1192             return ''
1193
1194         if warn:
1195             if not self.params.get('paths'):
1196                 pass
1197             elif filename == '-':
1198                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1199             elif os.path.isabs(filename):
1200                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1201         if filename == '-' or not filename:
1202             return filename
1203
1204         return self.get_output_path(dir_type, filename)
1205
1206     def _match_entry(self, info_dict, incomplete=False, silent=False):
1207         """ Returns None if the file should be downloaded """
1208
1209         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1210
1211         def check_filter():
1212             if 'title' in info_dict:
1213                 # This can happen when we're just evaluating the playlist
1214                 title = info_dict['title']
1215                 matchtitle = self.params.get('matchtitle', False)
1216                 if matchtitle:
1217                     if not re.search(matchtitle, title, re.IGNORECASE):
1218                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1219                 rejecttitle = self.params.get('rejecttitle', False)
1220                 if rejecttitle:
1221                     if re.search(rejecttitle, title, re.IGNORECASE):
1222                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1223             date = info_dict.get('upload_date')
1224             if date is not None:
1225                 dateRange = self.params.get('daterange', DateRange())
1226                 if date not in dateRange:
1227                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1228             view_count = info_dict.get('view_count')
1229             if view_count is not None:
1230                 min_views = self.params.get('min_views')
1231                 if min_views is not None and view_count < min_views:
1232                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1233                 max_views = self.params.get('max_views')
1234                 if max_views is not None and view_count > max_views:
1235                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1236             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1237                 return 'Skipping "%s" because it is age restricted' % video_title
1238
1239             match_filter = self.params.get('match_filter')
1240             if match_filter is not None:
1241                 try:
1242                     ret = match_filter(info_dict, incomplete=incomplete)
1243                 except TypeError:
1244                     # For backward compatibility
1245                     ret = None if incomplete else match_filter(info_dict)
1246                 if ret is not None:
1247                     return ret
1248             return None
1249
1250         if self.in_download_archive(info_dict):
1251             reason = '%s has already been recorded in the archive' % video_title
1252             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1253         else:
1254             reason = check_filter()
1255             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1256         if reason is not None:
1257             if not silent:
1258                 self.to_screen('[download] ' + reason)
1259             if self.params.get(break_opt, False):
1260                 raise break_err()
1261         return reason
1262
1263     @staticmethod
1264     def add_extra_info(info_dict, extra_info):
1265         '''Set the keys from extra_info in info dict if they are missing'''
1266         for key, value in extra_info.items():
1267             info_dict.setdefault(key, value)
1268
1269     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1270                      process=True, force_generic_extractor=False):
1271         """
1272         Return a list with a dictionary for each video extracted.
1273
1274         Arguments:
1275         url -- URL to extract
1276
1277         Keyword arguments:
1278         download -- whether to download videos during extraction
1279         ie_key -- extractor key hint
1280         extra_info -- dictionary containing the extra values to add to each result
1281         process -- whether to resolve all unresolved references (URLs, playlist items),
1282             must be True for download to work.
1283         force_generic_extractor -- force using the generic extractor
1284         """
1285
1286         if extra_info is None:
1287             extra_info = {}
1288
1289         if not ie_key and force_generic_extractor:
1290             ie_key = 'Generic'
1291
1292         if ie_key:
1293             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1294         else:
1295             ies = self._ies
1296
1297         for ie_key, ie in ies.items():
1298             if not ie.suitable(url):
1299                 continue
1300
1301             if not ie.working():
1302                 self.report_warning('The program functionality for this site has been marked as broken, '
1303                                     'and will probably not work.')
1304
1305             temp_id = ie.get_temp_id(url)
1306             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1307                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1308                                ie_key, temp_id))
1309                 break
1310             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1311         else:
1312             self.report_error('no suitable InfoExtractor for URL %s' % url)
1313
1314     def __handle_extraction_exceptions(func):
1315         @functools.wraps(func)
1316         def wrapper(self, *args, **kwargs):
1317             try:
1318                 return func(self, *args, **kwargs)
1319             except GeoRestrictedError as e:
1320                 msg = e.msg
1321                 if e.countries:
1322                     msg += '\nThis video is available in %s.' % ', '.join(
1323                         map(ISO3166Utils.short2full, e.countries))
1324                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1325                 self.report_error(msg)
1326             except ExtractorError as e:  # An error we somewhat expected
1327                 self.report_error(compat_str(e), e.format_traceback())
1328             except ThrottledDownload as e:
1329                 self.to_stderr('\r')
1330                 self.report_warning(f'{e}; Re-extracting data')
1331                 return wrapper(self, *args, **kwargs)
1332             except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1333                 raise
1334             except Exception as e:
1335                 if self.params.get('ignoreerrors'):
1336                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1337                 else:
1338                     raise
1339         return wrapper
1340
1341     @__handle_extraction_exceptions
1342     def __extract_info(self, url, ie, download, extra_info, process):
1343         ie_result = ie.extract(url)
1344         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1345             return
1346         if isinstance(ie_result, list):
1347             # Backwards compatibility: old IE result format
1348             ie_result = {
1349                 '_type': 'compat_list',
1350                 'entries': ie_result,
1351             }
1352         if extra_info.get('original_url'):
1353             ie_result.setdefault('original_url', extra_info['original_url'])
1354         self.add_default_extra_info(ie_result, ie, url)
1355         if process:
1356             return self.process_ie_result(ie_result, download, extra_info)
1357         else:
1358             return ie_result
1359
1360     def add_default_extra_info(self, ie_result, ie, url):
1361         if url is not None:
1362             self.add_extra_info(ie_result, {
1363                 'webpage_url': url,
1364                 'original_url': url,
1365                 'webpage_url_basename': url_basename(url),
1366             })
1367         if ie is not None:
1368             self.add_extra_info(ie_result, {
1369                 'extractor': ie.IE_NAME,
1370                 'extractor_key': ie.ie_key(),
1371             })
1372
1373     def process_ie_result(self, ie_result, download=True, extra_info=None):
1374         """
1375         Take the result of the ie(may be modified) and resolve all unresolved
1376         references (URLs, playlist items).
1377
1378         It will also download the videos if 'download'.
1379         Returns the resolved ie_result.
1380         """
1381         if extra_info is None:
1382             extra_info = {}
1383         result_type = ie_result.get('_type', 'video')
1384
1385         if result_type in ('url', 'url_transparent'):
1386             ie_result['url'] = sanitize_url(ie_result['url'])
1387             if ie_result.get('original_url'):
1388                 extra_info.setdefault('original_url', ie_result['original_url'])
1389
1390             extract_flat = self.params.get('extract_flat', False)
1391             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1392                     or extract_flat is True):
1393                 info_copy = ie_result.copy()
1394                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1395                 if ie and not ie_result.get('id'):
1396                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1397                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1398                 self.add_extra_info(info_copy, extra_info)
1399                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1400                 if self.params.get('force_write_download_archive', False):
1401                     self.record_download_archive(info_copy)
1402                 return ie_result
1403
1404         if result_type == 'video':
1405             self.add_extra_info(ie_result, extra_info)
1406             ie_result = self.process_video_result(ie_result, download=download)
1407             additional_urls = (ie_result or {}).get('additional_urls')
1408             if additional_urls:
1409                 # TODO: Improve MetadataParserPP to allow setting a list
1410                 if isinstance(additional_urls, compat_str):
1411                     additional_urls = [additional_urls]
1412                 self.to_screen(
1413                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1414                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1415                 ie_result['additional_entries'] = [
1416                     self.extract_info(
1417                         url, download, extra_info,
1418                         force_generic_extractor=self.params.get('force_generic_extractor'))
1419                     for url in additional_urls
1420                 ]
1421             return ie_result
1422         elif result_type == 'url':
1423             # We have to add extra_info to the results because it may be
1424             # contained in a playlist
1425             return self.extract_info(
1426                 ie_result['url'], download,
1427                 ie_key=ie_result.get('ie_key'),
1428                 extra_info=extra_info)
1429         elif result_type == 'url_transparent':
1430             # Use the information from the embedding page
1431             info = self.extract_info(
1432                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1433                 extra_info=extra_info, download=False, process=False)
1434
1435             # extract_info may return None when ignoreerrors is enabled and
1436             # extraction failed with an error, don't crash and return early
1437             # in this case
1438             if not info:
1439                 return info
1440
1441             force_properties = dict(
1442                 (k, v) for k, v in ie_result.items() if v is not None)
1443             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1444                 if f in force_properties:
1445                     del force_properties[f]
1446             new_result = info.copy()
1447             new_result.update(force_properties)
1448
1449             # Extracted info may not be a video result (i.e.
1450             # info.get('_type', 'video') != video) but rather an url or
1451             # url_transparent. In such cases outer metadata (from ie_result)
1452             # should be propagated to inner one (info). For this to happen
1453             # _type of info should be overridden with url_transparent. This
1454             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1455             if new_result.get('_type') == 'url':
1456                 new_result['_type'] = 'url_transparent'
1457
1458             return self.process_ie_result(
1459                 new_result, download=download, extra_info=extra_info)
1460         elif result_type in ('playlist', 'multi_video'):
1461             # Protect from infinite recursion due to recursively nested playlists
1462             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1463             webpage_url = ie_result['webpage_url']
1464             if webpage_url in self._playlist_urls:
1465                 self.to_screen(
1466                     '[download] Skipping already downloaded playlist: %s'
1467                     % ie_result.get('title') or ie_result.get('id'))
1468                 return
1469
1470             self._playlist_level += 1
1471             self._playlist_urls.add(webpage_url)
1472             self._sanitize_thumbnails(ie_result)
1473             try:
1474                 return self.__process_playlist(ie_result, download)
1475             finally:
1476                 self._playlist_level -= 1
1477                 if not self._playlist_level:
1478                     self._playlist_urls.clear()
1479         elif result_type == 'compat_list':
1480             self.report_warning(
1481                 'Extractor %s returned a compat_list result. '
1482                 'It needs to be updated.' % ie_result.get('extractor'))
1483
1484             def _fixup(r):
1485                 self.add_extra_info(r, {
1486                     'extractor': ie_result['extractor'],
1487                     'webpage_url': ie_result['webpage_url'],
1488                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1489                     'extractor_key': ie_result['extractor_key'],
1490                 })
1491                 return r
1492             ie_result['entries'] = [
1493                 self.process_ie_result(_fixup(r), download, extra_info)
1494                 for r in ie_result['entries']
1495             ]
1496             return ie_result
1497         else:
1498             raise Exception('Invalid result type: %s' % result_type)
1499
1500     def _ensure_dir_exists(self, path):
1501         return make_dir(path, self.report_error)
1502
1503     def __process_playlist(self, ie_result, download):
1504         # We process each entry in the playlist
1505         playlist = ie_result.get('title') or ie_result.get('id')
1506         self.to_screen('[download] Downloading playlist: %s' % playlist)
1507
1508         if 'entries' not in ie_result:
1509             raise EntryNotInPlaylist('There are no entries')
1510
1511         MissingEntry = object()
1512         incomplete_entries = bool(ie_result.get('requested_entries'))
1513         if incomplete_entries:
1514             def fill_missing_entries(entries, indices):
1515                 ret = [MissingEntry] * max(indices)
1516                 for i, entry in zip(indices, entries):
1517                     ret[i - 1] = entry
1518                 return ret
1519             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1520
1521         playlist_results = []
1522
1523         playliststart = self.params.get('playliststart', 1)
1524         playlistend = self.params.get('playlistend')
1525         # For backwards compatibility, interpret -1 as whole list
1526         if playlistend == -1:
1527             playlistend = None
1528
1529         playlistitems_str = self.params.get('playlist_items')
1530         playlistitems = None
1531         if playlistitems_str is not None:
1532             def iter_playlistitems(format):
1533                 for string_segment in format.split(','):
1534                     if '-' in string_segment:
1535                         start, end = string_segment.split('-')
1536                         for item in range(int(start), int(end) + 1):
1537                             yield int(item)
1538                     else:
1539                         yield int(string_segment)
1540             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1541
1542         ie_entries = ie_result['entries']
1543         msg = (
1544             'Downloading %d videos' if not isinstance(ie_entries, list)
1545             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1546
1547         if isinstance(ie_entries, list):
1548             def get_entry(i):
1549                 return ie_entries[i - 1]
1550         else:
1551             if not isinstance(ie_entries, (PagedList, LazyList)):
1552                 ie_entries = LazyList(ie_entries)
1553
1554             def get_entry(i):
1555                 return YoutubeDL.__handle_extraction_exceptions(
1556                     lambda self, i: ie_entries[i - 1]
1557                 )(self, i)
1558
1559         entries = []
1560         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1561         for i in items:
1562             if i == 0:
1563                 continue
1564             if playlistitems is None and playlistend is not None and playlistend < i:
1565                 break
1566             entry = None
1567             try:
1568                 entry = get_entry(i)
1569                 if entry is MissingEntry:
1570                     raise EntryNotInPlaylist()
1571             except (IndexError, EntryNotInPlaylist):
1572                 if incomplete_entries:
1573                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1574                 elif not playlistitems:
1575                     break
1576             entries.append(entry)
1577             try:
1578                 if entry is not None:
1579                     self._match_entry(entry, incomplete=True, silent=True)
1580             except (ExistingVideoReached, RejectedVideoReached):
1581                 break
1582         ie_result['entries'] = entries
1583
1584         # Save playlist_index before re-ordering
1585         entries = [
1586             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1587             for i, entry in enumerate(entries, 1)
1588             if entry is not None]
1589         n_entries = len(entries)
1590
1591         if not playlistitems and (playliststart != 1 or playlistend):
1592             playlistitems = list(range(playliststart, playliststart + n_entries))
1593         ie_result['requested_entries'] = playlistitems
1594
1595         _infojson_written = False
1596         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1597             ie_copy = {
1598                 'playlist': playlist,
1599                 'playlist_id': ie_result.get('id'),
1600                 'playlist_title': ie_result.get('title'),
1601                 'playlist_uploader': ie_result.get('uploader'),
1602                 'playlist_uploader_id': ie_result.get('uploader_id'),
1603                 'playlist_index': 0,
1604                 'n_entries': n_entries,
1605             }
1606             ie_copy.update(dict(ie_result))
1607
1608             _infojson_written = self._write_info_json(
1609                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1610             if _infojson_written is None:
1611                 return
1612             if self._write_description('playlist', ie_result,
1613                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1614                 return
1615             # TODO: This should be passed to ThumbnailsConvertor if necessary
1616             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1617
1618         if self.params.get('playlistreverse', False):
1619             entries = entries[::-1]
1620         if self.params.get('playlistrandom', False):
1621             random.shuffle(entries)
1622
1623         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1624
1625         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1626         failures = 0
1627         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1628         for i, entry_tuple in enumerate(entries, 1):
1629             playlist_index, entry = entry_tuple
1630             if 'playlist-index' in self.params.get('compat_opts', []):
1631                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1632             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1633             # This __x_forwarded_for_ip thing is a bit ugly but requires
1634             # minimal changes
1635             if x_forwarded_for:
1636                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1637             extra = {
1638                 'n_entries': n_entries,
1639                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1640                 'playlist_index': playlist_index,
1641                 'playlist_autonumber': i,
1642                 'playlist': playlist,
1643                 'playlist_id': ie_result.get('id'),
1644                 'playlist_title': ie_result.get('title'),
1645                 'playlist_uploader': ie_result.get('uploader'),
1646                 'playlist_uploader_id': ie_result.get('uploader_id'),
1647                 'extractor': ie_result['extractor'],
1648                 'webpage_url': ie_result['webpage_url'],
1649                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1650                 'extractor_key': ie_result['extractor_key'],
1651             }
1652
1653             if self._match_entry(entry, incomplete=True) is not None:
1654                 continue
1655
1656             entry_result = self.__process_iterable_entry(entry, download, extra)
1657             if not entry_result:
1658                 failures += 1
1659             if failures >= max_failures:
1660                 self.report_error(
1661                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1662                 break
1663             playlist_results.append(entry_result)
1664         ie_result['entries'] = playlist_results
1665
1666         # Write the updated info to json
1667         if _infojson_written and self._write_info_json(
1668                 'updated playlist', ie_result,
1669                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1670             return
1671         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1672         return ie_result
1673
1674     @__handle_extraction_exceptions
1675     def __process_iterable_entry(self, entry, download, extra_info):
1676         return self.process_ie_result(
1677             entry, download=download, extra_info=extra_info)
1678
1679     def _build_format_filter(self, filter_spec):
1680         " Returns a function to filter the formats according to the filter_spec "
1681
1682         OPERATORS = {
1683             '<': operator.lt,
1684             '<=': operator.le,
1685             '>': operator.gt,
1686             '>=': operator.ge,
1687             '=': operator.eq,
1688             '!=': operator.ne,
1689         }
1690         operator_rex = re.compile(r'''(?x)\s*
1691             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1692             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1693             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1694             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1695         m = operator_rex.fullmatch(filter_spec)
1696         if m:
1697             try:
1698                 comparison_value = int(m.group('value'))
1699             except ValueError:
1700                 comparison_value = parse_filesize(m.group('value'))
1701                 if comparison_value is None:
1702                     comparison_value = parse_filesize(m.group('value') + 'B')
1703                 if comparison_value is None:
1704                     raise ValueError(
1705                         'Invalid value %r in format specification %r' % (
1706                             m.group('value'), filter_spec))
1707             op = OPERATORS[m.group('op')]
1708
1709         if not m:
1710             STR_OPERATORS = {
1711                 '=': operator.eq,
1712                 '^=': lambda attr, value: attr.startswith(value),
1713                 '$=': lambda attr, value: attr.endswith(value),
1714                 '*=': lambda attr, value: value in attr,
1715             }
1716             str_operator_rex = re.compile(r'''(?x)\s*
1717                 (?P<key>[a-zA-Z0-9._-]+)\s*
1718                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1719                 (?P<value>[a-zA-Z0-9._-]+)\s*
1720                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1721             m = str_operator_rex.fullmatch(filter_spec)
1722             if m:
1723                 comparison_value = m.group('value')
1724                 str_op = STR_OPERATORS[m.group('op')]
1725                 if m.group('negation'):
1726                     op = lambda attr, value: not str_op(attr, value)
1727                 else:
1728                     op = str_op
1729
1730         if not m:
1731             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1732
1733         def _filter(f):
1734             actual_value = f.get(m.group('key'))
1735             if actual_value is None:
1736                 return m.group('none_inclusive')
1737             return op(actual_value, comparison_value)
1738         return _filter
1739
1740     def _check_formats(self, formats):
1741         for f in formats:
1742             self.to_screen('[info] Testing format %s' % f['format_id'])
1743             path = self.get_output_path('temp')
1744             if not self._ensure_dir_exists(f'{path}/'):
1745                 continue
1746             temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1747             temp_file.close()
1748             try:
1749                 success, _ = self.dl(temp_file.name, f, test=True)
1750             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1751                 success = False
1752             finally:
1753                 if os.path.exists(temp_file.name):
1754                     try:
1755                         os.remove(temp_file.name)
1756                     except OSError:
1757                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1758             if success:
1759                 yield f
1760             else:
1761                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1762
1763     def _default_format_spec(self, info_dict, download=True):
1764
1765         def can_merge():
1766             merger = FFmpegMergerPP(self)
1767             return merger.available and merger.can_merge()
1768
1769         prefer_best = (
1770             not self.params.get('simulate')
1771             and download
1772             and (
1773                 not can_merge()
1774                 or info_dict.get('is_live', False)
1775                 or self.outtmpl_dict['default'] == '-'))
1776         compat = (
1777             prefer_best
1778             or self.params.get('allow_multiple_audio_streams', False)
1779             or 'format-spec' in self.params.get('compat_opts', []))
1780
1781         return (
1782             'best/bestvideo+bestaudio' if prefer_best
1783             else 'bestvideo*+bestaudio/best' if not compat
1784             else 'bestvideo+bestaudio/best')
1785
1786     def build_format_selector(self, format_spec):
1787         def syntax_error(note, start):
1788             message = (
1789                 'Invalid format specification: '
1790                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1791             return SyntaxError(message)
1792
1793         PICKFIRST = 'PICKFIRST'
1794         MERGE = 'MERGE'
1795         SINGLE = 'SINGLE'
1796         GROUP = 'GROUP'
1797         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1798
1799         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1800                                   'video': self.params.get('allow_multiple_video_streams', False)}
1801
1802         check_formats = self.params.get('check_formats') == 'selected'
1803
1804         def _parse_filter(tokens):
1805             filter_parts = []
1806             for type, string, start, _, _ in tokens:
1807                 if type == tokenize.OP and string == ']':
1808                     return ''.join(filter_parts)
1809                 else:
1810                     filter_parts.append(string)
1811
1812         def _remove_unused_ops(tokens):
1813             # Remove operators that we don't use and join them with the surrounding strings
1814             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1815             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1816             last_string, last_start, last_end, last_line = None, None, None, None
1817             for type, string, start, end, line in tokens:
1818                 if type == tokenize.OP and string == '[':
1819                     if last_string:
1820                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1821                         last_string = None
1822                     yield type, string, start, end, line
1823                     # everything inside brackets will be handled by _parse_filter
1824                     for type, string, start, end, line in tokens:
1825                         yield type, string, start, end, line
1826                         if type == tokenize.OP and string == ']':
1827                             break
1828                 elif type == tokenize.OP and string in ALLOWED_OPS:
1829                     if last_string:
1830                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1831                         last_string = None
1832                     yield type, string, start, end, line
1833                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1834                     if not last_string:
1835                         last_string = string
1836                         last_start = start
1837                         last_end = end
1838                     else:
1839                         last_string += string
1840             if last_string:
1841                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1842
1843         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1844             selectors = []
1845             current_selector = None
1846             for type, string, start, _, _ in tokens:
1847                 # ENCODING is only defined in python 3.x
1848                 if type == getattr(tokenize, 'ENCODING', None):
1849                     continue
1850                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1851                     current_selector = FormatSelector(SINGLE, string, [])
1852                 elif type == tokenize.OP:
1853                     if string == ')':
1854                         if not inside_group:
1855                             # ')' will be handled by the parentheses group
1856                             tokens.restore_last_token()
1857                         break
1858                     elif inside_merge and string in ['/', ',']:
1859                         tokens.restore_last_token()
1860                         break
1861                     elif inside_choice and string == ',':
1862                         tokens.restore_last_token()
1863                         break
1864                     elif string == ',':
1865                         if not current_selector:
1866                             raise syntax_error('"," must follow a format selector', start)
1867                         selectors.append(current_selector)
1868                         current_selector = None
1869                     elif string == '/':
1870                         if not current_selector:
1871                             raise syntax_error('"/" must follow a format selector', start)
1872                         first_choice = current_selector
1873                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1874                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1875                     elif string == '[':
1876                         if not current_selector:
1877                             current_selector = FormatSelector(SINGLE, 'best', [])
1878                         format_filter = _parse_filter(tokens)
1879                         current_selector.filters.append(format_filter)
1880                     elif string == '(':
1881                         if current_selector:
1882                             raise syntax_error('Unexpected "("', start)
1883                         group = _parse_format_selection(tokens, inside_group=True)
1884                         current_selector = FormatSelector(GROUP, group, [])
1885                     elif string == '+':
1886                         if not current_selector:
1887                             raise syntax_error('Unexpected "+"', start)
1888                         selector_1 = current_selector
1889                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1890                         if not selector_2:
1891                             raise syntax_error('Expected a selector', start)
1892                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1893                     else:
1894                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1895                 elif type == tokenize.ENDMARKER:
1896                     break
1897             if current_selector:
1898                 selectors.append(current_selector)
1899             return selectors
1900
1901         def _merge(formats_pair):
1902             format_1, format_2 = formats_pair
1903
1904             formats_info = []
1905             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1906             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1907
1908             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1909                 get_no_more = {'video': False, 'audio': False}
1910                 for (i, fmt_info) in enumerate(formats_info):
1911                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1912                         formats_info.pop(i)
1913                         continue
1914                     for aud_vid in ['audio', 'video']:
1915                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1916                             if get_no_more[aud_vid]:
1917                                 formats_info.pop(i)
1918                                 break
1919                             get_no_more[aud_vid] = True
1920
1921             if len(formats_info) == 1:
1922                 return formats_info[0]
1923
1924             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1925             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1926
1927             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1928             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1929
1930             output_ext = self.params.get('merge_output_format')
1931             if not output_ext:
1932                 if the_only_video:
1933                     output_ext = the_only_video['ext']
1934                 elif the_only_audio and not video_fmts:
1935                     output_ext = the_only_audio['ext']
1936                 else:
1937                     output_ext = 'mkv'
1938
1939             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1940
1941             new_dict = {
1942                 'requested_formats': formats_info,
1943                 'format': '+'.join(filtered('format')),
1944                 'format_id': '+'.join(filtered('format_id')),
1945                 'ext': output_ext,
1946                 'protocol': '+'.join(map(determine_protocol, formats_info)),
1947                 'language': '+'.join(orderedSet(filtered('language'))) or None,
1948                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
1949                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
1950                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1951             }
1952
1953             if the_only_video:
1954                 new_dict.update({
1955                     'width': the_only_video.get('width'),
1956                     'height': the_only_video.get('height'),
1957                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1958                     'fps': the_only_video.get('fps'),
1959                     'dynamic_range': the_only_video.get('dynamic_range'),
1960                     'vcodec': the_only_video.get('vcodec'),
1961                     'vbr': the_only_video.get('vbr'),
1962                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1963                 })
1964
1965             if the_only_audio:
1966                 new_dict.update({
1967                     'acodec': the_only_audio.get('acodec'),
1968                     'abr': the_only_audio.get('abr'),
1969                     'asr': the_only_audio.get('asr'),
1970                 })
1971
1972             return new_dict
1973
1974         def _check_formats(formats):
1975             if not check_formats:
1976                 yield from formats
1977                 return
1978             yield from self._check_formats(formats)
1979
1980         def _build_selector_function(selector):
1981             if isinstance(selector, list):  # ,
1982                 fs = [_build_selector_function(s) for s in selector]
1983
1984                 def selector_function(ctx):
1985                     for f in fs:
1986                         yield from f(ctx)
1987                 return selector_function
1988
1989             elif selector.type == GROUP:  # ()
1990                 selector_function = _build_selector_function(selector.selector)
1991
1992             elif selector.type == PICKFIRST:  # /
1993                 fs = [_build_selector_function(s) for s in selector.selector]
1994
1995                 def selector_function(ctx):
1996                     for f in fs:
1997                         picked_formats = list(f(ctx))
1998                         if picked_formats:
1999                             return picked_formats
2000                     return []
2001
2002             elif selector.type == MERGE:  # +
2003                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2004
2005                 def selector_function(ctx):
2006                     for pair in itertools.product(
2007                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
2008                         yield _merge(pair)
2009
2010             elif selector.type == SINGLE:  # atom
2011                 format_spec = selector.selector or 'best'
2012
2013                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2014                 if format_spec == 'all':
2015                     def selector_function(ctx):
2016                         yield from _check_formats(ctx['formats'][::-1])
2017                 elif format_spec == 'mergeall':
2018                     def selector_function(ctx):
2019                         formats = list(_check_formats(ctx['formats']))
2020                         if not formats:
2021                             return
2022                         merged_format = formats[-1]
2023                         for f in formats[-2::-1]:
2024                             merged_format = _merge((merged_format, f))
2025                         yield merged_format
2026
2027                 else:
2028                     format_fallback, format_reverse, format_idx = False, True, 1
2029                     mobj = re.match(
2030                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2031                         format_spec)
2032                     if mobj is not None:
2033                         format_idx = int_or_none(mobj.group('n'), default=1)
2034                         format_reverse = mobj.group('bw')[0] == 'b'
2035                         format_type = (mobj.group('type') or [None])[0]
2036                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2037                         format_modified = mobj.group('mod') is not None
2038
2039                         format_fallback = not format_type and not format_modified  # for b, w
2040                         _filter_f = (
2041                             (lambda f: f.get('%scodec' % format_type) != 'none')
2042                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2043                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2044                             if format_type  # bv, ba, wv, wa
2045                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2046                             if not format_modified  # b, w
2047                             else lambda f: True)  # b*, w*
2048                         filter_f = lambda f: _filter_f(f) and (
2049                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2050                     else:
2051                         if format_spec in self._format_selection_exts['audio']:
2052                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2053                         elif format_spec in self._format_selection_exts['video']:
2054                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2055                         elif format_spec in self._format_selection_exts['storyboards']:
2056                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2057                         else:
2058                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2059
2060                     def selector_function(ctx):
2061                         formats = list(ctx['formats'])
2062                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2063                         if format_fallback and ctx['incomplete_formats'] and not matches:
2064                             # for extractors with incomplete formats (audio only (soundcloud)
2065                             # or video only (imgur)) best/worst will fallback to
2066                             # best/worst {video,audio}-only format
2067                             matches = formats
2068                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2069                         try:
2070                             yield matches[format_idx - 1]
2071                         except IndexError:
2072                             return
2073
2074             filters = [self._build_format_filter(f) for f in selector.filters]
2075
2076             def final_selector(ctx):
2077                 ctx_copy = copy.deepcopy(ctx)
2078                 for _filter in filters:
2079                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2080                 return selector_function(ctx_copy)
2081             return final_selector
2082
2083         stream = io.BytesIO(format_spec.encode('utf-8'))
2084         try:
2085             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2086         except tokenize.TokenError:
2087             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2088
2089         class TokenIterator(object):
2090             def __init__(self, tokens):
2091                 self.tokens = tokens
2092                 self.counter = 0
2093
2094             def __iter__(self):
2095                 return self
2096
2097             def __next__(self):
2098                 if self.counter >= len(self.tokens):
2099                     raise StopIteration()
2100                 value = self.tokens[self.counter]
2101                 self.counter += 1
2102                 return value
2103
2104             next = __next__
2105
2106             def restore_last_token(self):
2107                 self.counter -= 1
2108
2109         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2110         return _build_selector_function(parsed_selector)
2111
2112     def _calc_headers(self, info_dict):
2113         res = std_headers.copy()
2114
2115         add_headers = info_dict.get('http_headers')
2116         if add_headers:
2117             res.update(add_headers)
2118
2119         cookies = self._calc_cookies(info_dict)
2120         if cookies:
2121             res['Cookie'] = cookies
2122
2123         if 'X-Forwarded-For' not in res:
2124             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2125             if x_forwarded_for_ip:
2126                 res['X-Forwarded-For'] = x_forwarded_for_ip
2127
2128         return res
2129
2130     def _calc_cookies(self, info_dict):
2131         pr = sanitized_Request(info_dict['url'])
2132         self.cookiejar.add_cookie_header(pr)
2133         return pr.get_header('Cookie')
2134
2135     def _sort_thumbnails(self, thumbnails):
2136         thumbnails.sort(key=lambda t: (
2137             t.get('preference') if t.get('preference') is not None else -1,
2138             t.get('width') if t.get('width') is not None else -1,
2139             t.get('height') if t.get('height') is not None else -1,
2140             t.get('id') if t.get('id') is not None else '',
2141             t.get('url')))
2142
2143     def _sanitize_thumbnails(self, info_dict):
2144         thumbnails = info_dict.get('thumbnails')
2145         if thumbnails is None:
2146             thumbnail = info_dict.get('thumbnail')
2147             if thumbnail:
2148                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2149         if not thumbnails:
2150             return
2151
2152         def check_thumbnails(thumbnails):
2153             for t in thumbnails:
2154                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2155                 try:
2156                     self.urlopen(HEADRequest(t['url']))
2157                 except network_exceptions as err:
2158                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2159                     continue
2160                 yield t
2161
2162         self._sort_thumbnails(thumbnails)
2163         for i, t in enumerate(thumbnails):
2164             if t.get('id') is None:
2165                 t['id'] = '%d' % i
2166             if t.get('width') and t.get('height'):
2167                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2168             t['url'] = sanitize_url(t['url'])
2169
2170         if self.params.get('check_formats') is True:
2171             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2172         else:
2173             info_dict['thumbnails'] = thumbnails
2174
2175     def process_video_result(self, info_dict, download=True):
2176         assert info_dict.get('_type', 'video') == 'video'
2177
2178         if 'id' not in info_dict:
2179             raise ExtractorError('Missing "id" field in extractor result')
2180         if 'title' not in info_dict:
2181             raise ExtractorError('Missing "title" field in extractor result',
2182                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2183
2184         def report_force_conversion(field, field_not, conversion):
2185             self.report_warning(
2186                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2187                 % (field, field_not, conversion))
2188
2189         def sanitize_string_field(info, string_field):
2190             field = info.get(string_field)
2191             if field is None or isinstance(field, compat_str):
2192                 return
2193             report_force_conversion(string_field, 'a string', 'string')
2194             info[string_field] = compat_str(field)
2195
2196         def sanitize_numeric_fields(info):
2197             for numeric_field in self._NUMERIC_FIELDS:
2198                 field = info.get(numeric_field)
2199                 if field is None or isinstance(field, compat_numeric_types):
2200                     continue
2201                 report_force_conversion(numeric_field, 'numeric', 'int')
2202                 info[numeric_field] = int_or_none(field)
2203
2204         sanitize_string_field(info_dict, 'id')
2205         sanitize_numeric_fields(info_dict)
2206
2207         if 'playlist' not in info_dict:
2208             # It isn't part of a playlist
2209             info_dict['playlist'] = None
2210             info_dict['playlist_index'] = None
2211
2212         self._sanitize_thumbnails(info_dict)
2213
2214         thumbnail = info_dict.get('thumbnail')
2215         thumbnails = info_dict.get('thumbnails')
2216         if thumbnail:
2217             info_dict['thumbnail'] = sanitize_url(thumbnail)
2218         elif thumbnails:
2219             info_dict['thumbnail'] = thumbnails[-1]['url']
2220
2221         if info_dict.get('display_id') is None and 'id' in info_dict:
2222             info_dict['display_id'] = info_dict['id']
2223
2224         if info_dict.get('duration') is not None:
2225             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2226
2227         for ts_key, date_key in (
2228                 ('timestamp', 'upload_date'),
2229                 ('release_timestamp', 'release_date'),
2230         ):
2231             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2232                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2233                 # see http://bugs.python.org/issue1646728)
2234                 try:
2235                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2236                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2237                 except (ValueError, OverflowError, OSError):
2238                     pass
2239
2240         live_keys = ('is_live', 'was_live')
2241         live_status = info_dict.get('live_status')
2242         if live_status is None:
2243             for key in live_keys:
2244                 if info_dict.get(key) is False:
2245                     continue
2246                 if info_dict.get(key):
2247                     live_status = key
2248                 break
2249             if all(info_dict.get(key) is False for key in live_keys):
2250                 live_status = 'not_live'
2251         if live_status:
2252             info_dict['live_status'] = live_status
2253             for key in live_keys:
2254                 if info_dict.get(key) is None:
2255                     info_dict[key] = (live_status == key)
2256
2257         # Auto generate title fields corresponding to the *_number fields when missing
2258         # in order to always have clean titles. This is very common for TV series.
2259         for field in ('chapter', 'season', 'episode'):
2260             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2261                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2262
2263         for cc_kind in ('subtitles', 'automatic_captions'):
2264             cc = info_dict.get(cc_kind)
2265             if cc:
2266                 for _, subtitle in cc.items():
2267                     for subtitle_format in subtitle:
2268                         if subtitle_format.get('url'):
2269                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2270                         if subtitle_format.get('ext') is None:
2271                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2272
2273         automatic_captions = info_dict.get('automatic_captions')
2274         subtitles = info_dict.get('subtitles')
2275
2276         info_dict['requested_subtitles'] = self.process_subtitles(
2277             info_dict['id'], subtitles, automatic_captions)
2278
2279         if info_dict.get('formats') is None:
2280             # There's only one format available
2281             formats = [info_dict]
2282         else:
2283             formats = info_dict['formats']
2284
2285         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2286         if not self.params.get('allow_unplayable_formats'):
2287             formats = [f for f in formats if not f.get('has_drm')]
2288
2289         if not formats:
2290             self.raise_no_formats(info_dict)
2291
2292         def is_wellformed(f):
2293             url = f.get('url')
2294             if not url:
2295                 self.report_warning(
2296                     '"url" field is missing or empty - skipping format, '
2297                     'there is an error in extractor')
2298                 return False
2299             if isinstance(url, bytes):
2300                 sanitize_string_field(f, 'url')
2301             return True
2302
2303         # Filter out malformed formats for better extraction robustness
2304         formats = list(filter(is_wellformed, formats))
2305
2306         formats_dict = {}
2307
2308         # We check that all the formats have the format and format_id fields
2309         for i, format in enumerate(formats):
2310             sanitize_string_field(format, 'format_id')
2311             sanitize_numeric_fields(format)
2312             format['url'] = sanitize_url(format['url'])
2313             if not format.get('format_id'):
2314                 format['format_id'] = compat_str(i)
2315             else:
2316                 # Sanitize format_id from characters used in format selector expression
2317                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2318             format_id = format['format_id']
2319             if format_id not in formats_dict:
2320                 formats_dict[format_id] = []
2321             formats_dict[format_id].append(format)
2322
2323         # Make sure all formats have unique format_id
2324         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2325         for format_id, ambiguous_formats in formats_dict.items():
2326             ambigious_id = len(ambiguous_formats) > 1
2327             for i, format in enumerate(ambiguous_formats):
2328                 if ambigious_id:
2329                     format['format_id'] = '%s-%d' % (format_id, i)
2330                 if format.get('ext') is None:
2331                     format['ext'] = determine_ext(format['url']).lower()
2332                 # Ensure there is no conflict between id and ext in format selection
2333                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2334                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2335                     format['format_id'] = 'f%s' % format['format_id']
2336
2337         for i, format in enumerate(formats):
2338             if format.get('format') is None:
2339                 format['format'] = '{id} - {res}{note}'.format(
2340                     id=format['format_id'],
2341                     res=self.format_resolution(format),
2342                     note=format_field(format, 'format_note', ' (%s)'),
2343                 )
2344             if format.get('protocol') is None:
2345                 format['protocol'] = determine_protocol(format)
2346             if format.get('resolution') is None:
2347                 format['resolution'] = self.format_resolution(format, default=None)
2348             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2349                 format['dynamic_range'] = 'SDR'
2350             if (info_dict.get('duration') and format.get('tbr')
2351                     and not format.get('filesize') and not format.get('filesize_approx')):
2352                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2353
2354             # Add HTTP headers, so that external programs can use them from the
2355             # json output
2356             full_format_info = info_dict.copy()
2357             full_format_info.update(format)
2358             format['http_headers'] = self._calc_headers(full_format_info)
2359         # Remove private housekeeping stuff
2360         if '__x_forwarded_for_ip' in info_dict:
2361             del info_dict['__x_forwarded_for_ip']
2362
2363         # TODO Central sorting goes here
2364
2365         if self.params.get('check_formats') is True:
2366             formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2367
2368         if not formats or formats[0] is not info_dict:
2369             # only set the 'formats' fields if the original info_dict list them
2370             # otherwise we end up with a circular reference, the first (and unique)
2371             # element in the 'formats' field in info_dict is info_dict itself,
2372             # which can't be exported to json
2373             info_dict['formats'] = formats
2374
2375         info_dict, _ = self.pre_process(info_dict)
2376
2377         # The pre-processors may have modified the formats
2378         formats = info_dict.get('formats', [info_dict])
2379
2380         if self.params.get('list_thumbnails'):
2381             self.list_thumbnails(info_dict)
2382         if self.params.get('listformats'):
2383             if not info_dict.get('formats') and not info_dict.get('url'):
2384                 self.to_screen('%s has no formats' % info_dict['id'])
2385             else:
2386                 self.list_formats(info_dict)
2387         if self.params.get('listsubtitles'):
2388             if 'automatic_captions' in info_dict:
2389                 self.list_subtitles(
2390                     info_dict['id'], automatic_captions, 'automatic captions')
2391             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2392         list_only = self.params.get('simulate') is None and (
2393             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2394         if list_only:
2395             # Without this printing, -F --print-json will not work
2396             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2397             return
2398
2399         format_selector = self.format_selector
2400         if format_selector is None:
2401             req_format = self._default_format_spec(info_dict, download=download)
2402             self.write_debug('Default format spec: %s' % req_format)
2403             format_selector = self.build_format_selector(req_format)
2404
2405         # While in format selection we may need to have an access to the original
2406         # format set in order to calculate some metrics or do some processing.
2407         # For now we need to be able to guess whether original formats provided
2408         # by extractor are incomplete or not (i.e. whether extractor provides only
2409         # video-only or audio-only formats) for proper formats selection for
2410         # extractors with such incomplete formats (see
2411         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2412         # Since formats may be filtered during format selection and may not match
2413         # the original formats the results may be incorrect. Thus original formats
2414         # or pre-calculated metrics should be passed to format selection routines
2415         # as well.
2416         # We will pass a context object containing all necessary additional data
2417         # instead of just formats.
2418         # This fixes incorrect format selection issue (see
2419         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2420         incomplete_formats = (
2421             # All formats are video-only or
2422             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2423             # all formats are audio-only
2424             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2425
2426         ctx = {
2427             'formats': formats,
2428             'incomplete_formats': incomplete_formats,
2429         }
2430
2431         formats_to_download = list(format_selector(ctx))
2432         if not formats_to_download:
2433             if not self.params.get('ignore_no_formats_error'):
2434                 raise ExtractorError('Requested format is not available', expected=True,
2435                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2436             else:
2437                 self.report_warning('Requested format is not available')
2438                 # Process what we can, even without any available formats.
2439                 self.process_info(dict(info_dict))
2440         elif download:
2441             self.to_screen(
2442                 '[info] %s: Downloading %d format(s): %s' % (
2443                     info_dict['id'], len(formats_to_download),
2444                     ", ".join([f['format_id'] for f in formats_to_download])))
2445             for fmt in formats_to_download:
2446                 new_info = dict(info_dict)
2447                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2448                 new_info['__original_infodict'] = info_dict
2449                 new_info.update(fmt)
2450                 self.process_info(new_info)
2451         # We update the info dict with the selected best quality format (backwards compatibility)
2452         if formats_to_download:
2453             info_dict.update(formats_to_download[-1])
2454         return info_dict
2455
2456     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2457         """Select the requested subtitles and their format"""
2458         available_subs = {}
2459         if normal_subtitles and self.params.get('writesubtitles'):
2460             available_subs.update(normal_subtitles)
2461         if automatic_captions and self.params.get('writeautomaticsub'):
2462             for lang, cap_info in automatic_captions.items():
2463                 if lang not in available_subs:
2464                     available_subs[lang] = cap_info
2465
2466         if (not self.params.get('writesubtitles') and not
2467                 self.params.get('writeautomaticsub') or not
2468                 available_subs):
2469             return None
2470
2471         all_sub_langs = available_subs.keys()
2472         if self.params.get('allsubtitles', False):
2473             requested_langs = all_sub_langs
2474         elif self.params.get('subtitleslangs', False):
2475             # A list is used so that the order of languages will be the same as
2476             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2477             requested_langs = []
2478             for lang_re in self.params.get('subtitleslangs'):
2479                 if lang_re == 'all':
2480                     requested_langs.extend(all_sub_langs)
2481                     continue
2482                 discard = lang_re[0] == '-'
2483                 if discard:
2484                     lang_re = lang_re[1:]
2485                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2486                 if discard:
2487                     for lang in current_langs:
2488                         while lang in requested_langs:
2489                             requested_langs.remove(lang)
2490                 else:
2491                     requested_langs.extend(current_langs)
2492             requested_langs = orderedSet(requested_langs)
2493         elif 'en' in available_subs:
2494             requested_langs = ['en']
2495         else:
2496             requested_langs = [list(all_sub_langs)[0]]
2497         if requested_langs:
2498             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2499
2500         formats_query = self.params.get('subtitlesformat', 'best')
2501         formats_preference = formats_query.split('/') if formats_query else []
2502         subs = {}
2503         for lang in requested_langs:
2504             formats = available_subs.get(lang)
2505             if formats is None:
2506                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2507                 continue
2508             for ext in formats_preference:
2509                 if ext == 'best':
2510                     f = formats[-1]
2511                     break
2512                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2513                 if matches:
2514                     f = matches[-1]
2515                     break
2516             else:
2517                 f = formats[-1]
2518                 self.report_warning(
2519                     'No subtitle format found matching "%s" for language %s, '
2520                     'using %s' % (formats_query, lang, f['ext']))
2521             subs[lang] = f
2522         return subs
2523
2524     def __forced_printings(self, info_dict, filename, incomplete):
2525         def print_mandatory(field, actual_field=None):
2526             if actual_field is None:
2527                 actual_field = field
2528             if (self.params.get('force%s' % field, False)
2529                     and (not incomplete or info_dict.get(actual_field) is not None)):
2530                 self.to_stdout(info_dict[actual_field])
2531
2532         def print_optional(field):
2533             if (self.params.get('force%s' % field, False)
2534                     and info_dict.get(field) is not None):
2535                 self.to_stdout(info_dict[field])
2536
2537         info_dict = info_dict.copy()
2538         if filename is not None:
2539             info_dict['filename'] = filename
2540         if info_dict.get('requested_formats') is not None:
2541             # For RTMP URLs, also include the playpath
2542             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2543         elif 'url' in info_dict:
2544             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2545
2546         if self.params.get('forceprint') or self.params.get('forcejson'):
2547             self.post_extract(info_dict)
2548         for tmpl in self.params.get('forceprint', []):
2549             mobj = re.match(r'\w+(=?)$', tmpl)
2550             if mobj and mobj.group(1):
2551                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2552             elif mobj:
2553                 tmpl = '%({})s'.format(tmpl)
2554             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2555
2556         print_mandatory('title')
2557         print_mandatory('id')
2558         print_mandatory('url', 'urls')
2559         print_optional('thumbnail')
2560         print_optional('description')
2561         print_optional('filename')
2562         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2563             self.to_stdout(formatSeconds(info_dict['duration']))
2564         print_mandatory('format')
2565
2566         if self.params.get('forcejson'):
2567             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2568
2569     def dl(self, name, info, subtitle=False, test=False):
2570         if not info.get('url'):
2571             self.raise_no_formats(info, True)
2572
2573         if test:
2574             verbose = self.params.get('verbose')
2575             params = {
2576                 'test': True,
2577                 'quiet': self.params.get('quiet') or not verbose,
2578                 'verbose': verbose,
2579                 'noprogress': not verbose,
2580                 'nopart': True,
2581                 'skip_unavailable_fragments': False,
2582                 'keep_fragments': False,
2583                 'overwrites': True,
2584                 '_no_ytdl_file': True,
2585             }
2586         else:
2587             params = self.params
2588         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2589         if not test:
2590             for ph in self._progress_hooks:
2591                 fd.add_progress_hook(ph)
2592             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2593             self.write_debug('Invoking downloader on "%s"' % urls)
2594
2595         new_info = copy.deepcopy(self._copy_infodict(info))
2596         if new_info.get('http_headers') is None:
2597             new_info['http_headers'] = self._calc_headers(new_info)
2598         return fd.download(name, new_info, subtitle)
2599
2600     def process_info(self, info_dict):
2601         """Process a single resolved IE result."""
2602
2603         assert info_dict.get('_type', 'video') == 'video'
2604
2605         max_downloads = self.params.get('max_downloads')
2606         if max_downloads is not None:
2607             if self._num_downloads >= int(max_downloads):
2608                 raise MaxDownloadsReached()
2609
2610         # TODO: backward compatibility, to be removed
2611         info_dict['fulltitle'] = info_dict['title']
2612
2613         if 'format' not in info_dict and 'ext' in info_dict:
2614             info_dict['format'] = info_dict['ext']
2615
2616         if self._match_entry(info_dict) is not None:
2617             return
2618
2619         self.post_extract(info_dict)
2620         self._num_downloads += 1
2621
2622         # info_dict['_filename'] needs to be set for backward compatibility
2623         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2624         temp_filename = self.prepare_filename(info_dict, 'temp')
2625         files_to_move = {}
2626
2627         # Forced printings
2628         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2629
2630         if self.params.get('simulate'):
2631             if self.params.get('force_write_download_archive', False):
2632                 self.record_download_archive(info_dict)
2633             # Do nothing else if in simulate mode
2634             return
2635
2636         if full_filename is None:
2637             return
2638         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2639             return
2640         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2641             return
2642
2643         if self._write_description('video', info_dict,
2644                                    self.prepare_filename(info_dict, 'description')) is None:
2645             return
2646
2647         sub_files = self._write_subtitles(info_dict, temp_filename)
2648         if sub_files is None:
2649             return
2650         files_to_move.update(dict(sub_files))
2651
2652         thumb_files = self._write_thumbnails(
2653             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2654         if thumb_files is None:
2655             return
2656         files_to_move.update(dict(thumb_files))
2657
2658         infofn = self.prepare_filename(info_dict, 'infojson')
2659         _infojson_written = self._write_info_json('video', info_dict, infofn)
2660         if _infojson_written:
2661             info_dict['infojson_filename'] = infofn
2662             # For backward compatability, even though it was a private field
2663             info_dict['__infojson_filename'] = infofn
2664         elif _infojson_written is None:
2665             return
2666
2667         # Note: Annotations are deprecated
2668         annofn = None
2669         if self.params.get('writeannotations', False):
2670             annofn = self.prepare_filename(info_dict, 'annotation')
2671         if annofn:
2672             if not self._ensure_dir_exists(encodeFilename(annofn)):
2673                 return
2674             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2675                 self.to_screen('[info] Video annotations are already present')
2676             elif not info_dict.get('annotations'):
2677                 self.report_warning('There are no annotations to write.')
2678             else:
2679                 try:
2680                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2681                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2682                         annofile.write(info_dict['annotations'])
2683                 except (KeyError, TypeError):
2684                     self.report_warning('There are no annotations to write.')
2685                 except (OSError, IOError):
2686                     self.report_error('Cannot write annotations file: ' + annofn)
2687                     return
2688
2689         # Write internet shortcut files
2690         def _write_link_file(link_type):
2691             if 'webpage_url' not in info_dict:
2692                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2693                 return False
2694             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2695             if not self._ensure_dir_exists(encodeFilename(linkfn)):
2696                 return False
2697             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2698                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2699                 return True
2700             try:
2701                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2702                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2703                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2704                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2705                     if link_type == 'desktop':
2706                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2707                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2708             except (OSError, IOError):
2709                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2710                 return False
2711             return True
2712
2713         write_links = {
2714             'url': self.params.get('writeurllink'),
2715             'webloc': self.params.get('writewebloclink'),
2716             'desktop': self.params.get('writedesktoplink'),
2717         }
2718         if self.params.get('writelink'):
2719             link_type = ('webloc' if sys.platform == 'darwin'
2720                          else 'desktop' if sys.platform.startswith('linux')
2721                          else 'url')
2722             write_links[link_type] = True
2723
2724         if any(should_write and not _write_link_file(link_type)
2725                for link_type, should_write in write_links.items()):
2726             return
2727
2728         try:
2729             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2730         except PostProcessingError as err:
2731             self.report_error('Preprocessing: %s' % str(err))
2732             return
2733
2734         must_record_download_archive = False
2735         if self.params.get('skip_download', False):
2736             info_dict['filepath'] = temp_filename
2737             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2738             info_dict['__files_to_move'] = files_to_move
2739             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2740         else:
2741             # Download
2742             info_dict.setdefault('__postprocessors', [])
2743             try:
2744
2745                 def existing_file(*filepaths):
2746                     ext = info_dict.get('ext')
2747                     final_ext = self.params.get('final_ext', ext)
2748                     existing_files = []
2749                     for file in orderedSet(filepaths):
2750                         if final_ext != ext:
2751                             converted = replace_extension(file, final_ext, ext)
2752                             if os.path.exists(encodeFilename(converted)):
2753                                 existing_files.append(converted)
2754                         if os.path.exists(encodeFilename(file)):
2755                             existing_files.append(file)
2756
2757                     if not existing_files or self.params.get('overwrites', False):
2758                         for file in orderedSet(existing_files):
2759                             self.report_file_delete(file)
2760                             os.remove(encodeFilename(file))
2761                         return None
2762
2763                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2764                     return existing_files[0]
2765
2766                 success = True
2767                 if info_dict.get('requested_formats') is not None:
2768
2769                     def compatible_formats(formats):
2770                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2771                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2772                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2773                         if len(video_formats) > 2 or len(audio_formats) > 2:
2774                             return False
2775
2776                         # Check extension
2777                         exts = set(format.get('ext') for format in formats)
2778                         COMPATIBLE_EXTS = (
2779                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2780                             set(('webm',)),
2781                         )
2782                         for ext_sets in COMPATIBLE_EXTS:
2783                             if ext_sets.issuperset(exts):
2784                                 return True
2785                         # TODO: Check acodec/vcodec
2786                         return False
2787
2788                     requested_formats = info_dict['requested_formats']
2789                     old_ext = info_dict['ext']
2790                     if self.params.get('merge_output_format') is None:
2791                         if not compatible_formats(requested_formats):
2792                             info_dict['ext'] = 'mkv'
2793                             self.report_warning(
2794                                 'Requested formats are incompatible for merge and will be merged into mkv')
2795                         if (info_dict['ext'] == 'webm'
2796                                 and info_dict.get('thumbnails')
2797                                 # check with type instead of pp_key, __name__, or isinstance
2798                                 # since we dont want any custom PPs to trigger this
2799                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2800                             info_dict['ext'] = 'mkv'
2801                             self.report_warning(
2802                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2803                     new_ext = info_dict['ext']
2804
2805                     def correct_ext(filename, ext=new_ext):
2806                         if filename == '-':
2807                             return filename
2808                         filename_real_ext = os.path.splitext(filename)[1][1:]
2809                         filename_wo_ext = (
2810                             os.path.splitext(filename)[0]
2811                             if filename_real_ext in (old_ext, new_ext)
2812                             else filename)
2813                         return '%s.%s' % (filename_wo_ext, ext)
2814
2815                     # Ensure filename always has a correct extension for successful merge
2816                     full_filename = correct_ext(full_filename)
2817                     temp_filename = correct_ext(temp_filename)
2818                     dl_filename = existing_file(full_filename, temp_filename)
2819                     info_dict['__real_download'] = False
2820
2821                     if dl_filename is not None:
2822                         self.report_file_already_downloaded(dl_filename)
2823                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2824                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2825                         success, real_download = self.dl(temp_filename, info_dict)
2826                         info_dict['__real_download'] = real_download
2827                     else:
2828                         downloaded = []
2829                         merger = FFmpegMergerPP(self)
2830                         if self.params.get('allow_unplayable_formats'):
2831                             self.report_warning(
2832                                 'You have requested merging of multiple formats '
2833                                 'while also allowing unplayable formats to be downloaded. '
2834                                 'The formats won\'t be merged to prevent data corruption.')
2835                         elif not merger.available:
2836                             self.report_warning(
2837                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2838                                 'The formats won\'t be merged.')
2839
2840                         if temp_filename == '-':
2841                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2842                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2843                                       else 'but ffmpeg is not installed')
2844                             self.report_warning(
2845                                 f'You have requested downloading multiple formats to stdout {reason}. '
2846                                 'The formats will be streamed one after the other')
2847                             fname = temp_filename
2848                         for f in requested_formats:
2849                             new_info = dict(info_dict)
2850                             del new_info['requested_formats']
2851                             new_info.update(f)
2852                             if temp_filename != '-':
2853                                 fname = prepend_extension(
2854                                     correct_ext(temp_filename, new_info['ext']),
2855                                     'f%s' % f['format_id'], new_info['ext'])
2856                                 if not self._ensure_dir_exists(fname):
2857                                     return
2858                                 f['filepath'] = fname
2859                                 downloaded.append(fname)
2860                             partial_success, real_download = self.dl(fname, new_info)
2861                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2862                             success = success and partial_success
2863                         if merger.available and not self.params.get('allow_unplayable_formats'):
2864                             info_dict['__postprocessors'].append(merger)
2865                             info_dict['__files_to_merge'] = downloaded
2866                             # Even if there were no downloads, it is being merged only now
2867                             info_dict['__real_download'] = True
2868                         else:
2869                             for file in downloaded:
2870                                 files_to_move[file] = None
2871                 else:
2872                     # Just a single file
2873                     dl_filename = existing_file(full_filename, temp_filename)
2874                     if dl_filename is None or dl_filename == temp_filename:
2875                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2876                         # So we should try to resume the download
2877                         success, real_download = self.dl(temp_filename, info_dict)
2878                         info_dict['__real_download'] = real_download
2879                     else:
2880                         self.report_file_already_downloaded(dl_filename)
2881
2882                 dl_filename = dl_filename or temp_filename
2883                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2884
2885             except network_exceptions as err:
2886                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2887                 return
2888             except (OSError, IOError) as err:
2889                 raise UnavailableVideoError(err)
2890             except (ContentTooShortError, ) as err:
2891                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2892                 return
2893
2894             if success and full_filename != '-':
2895
2896                 def fixup():
2897                     do_fixup = True
2898                     fixup_policy = self.params.get('fixup')
2899                     vid = info_dict['id']
2900
2901                     if fixup_policy in ('ignore', 'never'):
2902                         return
2903                     elif fixup_policy == 'warn':
2904                         do_fixup = False
2905                     elif fixup_policy != 'force':
2906                         assert fixup_policy in ('detect_or_warn', None)
2907                         if not info_dict.get('__real_download'):
2908                             do_fixup = False
2909
2910                     def ffmpeg_fixup(cndn, msg, cls):
2911                         if not cndn:
2912                             return
2913                         if not do_fixup:
2914                             self.report_warning(f'{vid}: {msg}')
2915                             return
2916                         pp = cls(self)
2917                         if pp.available:
2918                             info_dict['__postprocessors'].append(pp)
2919                         else:
2920                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2921
2922                     stretched_ratio = info_dict.get('stretched_ratio')
2923                     ffmpeg_fixup(
2924                         stretched_ratio not in (1, None),
2925                         f'Non-uniform pixel ratio {stretched_ratio}',
2926                         FFmpegFixupStretchedPP)
2927
2928                     ffmpeg_fixup(
2929                         (info_dict.get('requested_formats') is None
2930                          and info_dict.get('container') == 'm4a_dash'
2931                          and info_dict.get('ext') == 'm4a'),
2932                         'writing DASH m4a. Only some players support this container',
2933                         FFmpegFixupM4aPP)
2934
2935                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2936                     downloader = downloader.__name__ if downloader else None
2937                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2938                                  'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
2939                                  FFmpegFixupM3u8PP)
2940                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
2941                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
2942
2943                 fixup()
2944                 try:
2945                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2946                 except PostProcessingError as err:
2947                     self.report_error('Postprocessing: %s' % str(err))
2948                     return
2949                 try:
2950                     for ph in self._post_hooks:
2951                         ph(info_dict['filepath'])
2952                 except Exception as err:
2953                     self.report_error('post hooks: %s' % str(err))
2954                     return
2955                 must_record_download_archive = True
2956
2957         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2958             self.record_download_archive(info_dict)
2959         max_downloads = self.params.get('max_downloads')
2960         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2961             raise MaxDownloadsReached()
2962
2963     def __download_wrapper(self, func):
2964         @functools.wraps(func)
2965         def wrapper(*args, **kwargs):
2966             try:
2967                 res = func(*args, **kwargs)
2968             except UnavailableVideoError as e:
2969                 self.report_error(e)
2970             except DownloadCancelled as e:
2971                 self.to_screen(f'[info] {e}')
2972                 raise
2973             else:
2974                 if self.params.get('dump_single_json', False):
2975                     self.post_extract(res)
2976                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2977         return wrapper
2978
2979     def download(self, url_list):
2980         """Download a given list of URLs."""
2981         url_list = variadic(url_list)  # Passing a single URL is a common mistake
2982         outtmpl = self.outtmpl_dict['default']
2983         if (len(url_list) > 1
2984                 and outtmpl != '-'
2985                 and '%' not in outtmpl
2986                 and self.params.get('max_downloads') != 1):
2987             raise SameFileError(outtmpl)
2988
2989         for url in url_list:
2990             self.__download_wrapper(self.extract_info)(
2991                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2992
2993         return self._download_retcode
2994
2995     def download_with_info_file(self, info_filename):
2996         with contextlib.closing(fileinput.FileInput(
2997                 [info_filename], mode='r',
2998                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2999             # FileInput doesn't have a read method, we can't call json.load
3000             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3001         try:
3002             self.__download_wrapper(self.process_ie_result)(info, download=True)
3003         except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e:
3004             if not isinstance(e, EntryNotInPlaylist):
3005                 self.to_stderr('\r')
3006             webpage_url = info.get('webpage_url')
3007             if webpage_url is not None:
3008                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3009                 return self.download([webpage_url])
3010             else:
3011                 raise
3012         return self._download_retcode
3013
3014     @staticmethod
3015     def sanitize_info(info_dict, remove_private_keys=False):
3016         ''' Sanitize the infodict for converting to json '''
3017         if info_dict is None:
3018             return info_dict
3019         info_dict.setdefault('epoch', int(time.time()))
3020         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3021         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3022         if remove_private_keys:
3023             remove_keys |= {
3024                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3025                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3026             }
3027             empty_values = (None, {}, [], set(), tuple())
3028             reject = lambda k, v: k not in keep_keys and (
3029                 k.startswith('_') or k in remove_keys or v in empty_values)
3030         else:
3031             reject = lambda k, v: k in remove_keys
3032         filter_fn = lambda obj: (
3033             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3034             else obj if not isinstance(obj, dict)
3035             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3036         return filter_fn(info_dict)
3037
3038     @staticmethod
3039     def filter_requested_info(info_dict, actually_filter=True):
3040         ''' Alias of sanitize_info for backward compatibility '''
3041         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3042
3043     def run_pp(self, pp, infodict):
3044         files_to_delete = []
3045         if '__files_to_move' not in infodict:
3046             infodict['__files_to_move'] = {}
3047         try:
3048             files_to_delete, infodict = pp.run(infodict)
3049         except PostProcessingError as e:
3050             # Must be True and not 'only_download'
3051             if self.params.get('ignoreerrors') is True:
3052                 self.report_error(e)
3053                 return infodict
3054             raise
3055
3056         if not files_to_delete:
3057             return infodict
3058         if self.params.get('keepvideo', False):
3059             for f in files_to_delete:
3060                 infodict['__files_to_move'].setdefault(f, '')
3061         else:
3062             for old_filename in set(files_to_delete):
3063                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3064                 try:
3065                     os.remove(encodeFilename(old_filename))
3066                 except (IOError, OSError):
3067                     self.report_warning('Unable to remove downloaded original file')
3068                 if old_filename in infodict['__files_to_move']:
3069                     del infodict['__files_to_move'][old_filename]
3070         return infodict
3071
3072     @staticmethod
3073     def post_extract(info_dict):
3074         def actual_post_extract(info_dict):
3075             if info_dict.get('_type') in ('playlist', 'multi_video'):
3076                 for video_dict in info_dict.get('entries', {}):
3077                     actual_post_extract(video_dict or {})
3078                 return
3079
3080             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3081             extra = post_extractor().items()
3082             info_dict.update(extra)
3083             info_dict.pop('__post_extractor', None)
3084
3085             original_infodict = info_dict.get('__original_infodict') or {}
3086             original_infodict.update(extra)
3087             original_infodict.pop('__post_extractor', None)
3088
3089         actual_post_extract(info_dict or {})
3090
3091     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3092         info = dict(ie_info)
3093         info['__files_to_move'] = files_to_move or {}
3094         for pp in self._pps[key]:
3095             info = self.run_pp(pp, info)
3096         return info, info.pop('__files_to_move', None)
3097
3098     def post_process(self, filename, ie_info, files_to_move=None):
3099         """Run all the postprocessors on the given file."""
3100         info = dict(ie_info)
3101         info['filepath'] = filename
3102         info['__files_to_move'] = files_to_move or {}
3103
3104         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3105             info = self.run_pp(pp, info)
3106         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3107         del info['__files_to_move']
3108         for pp in self._pps['after_move']:
3109             info = self.run_pp(pp, info)
3110         return info
3111
3112     def _make_archive_id(self, info_dict):
3113         video_id = info_dict.get('id')
3114         if not video_id:
3115             return
3116         # Future-proof against any change in case
3117         # and backwards compatibility with prior versions
3118         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3119         if extractor is None:
3120             url = str_or_none(info_dict.get('url'))
3121             if not url:
3122                 return
3123             # Try to find matching extractor for the URL and take its ie_key
3124             for ie_key, ie in self._ies.items():
3125                 if ie.suitable(url):
3126                     extractor = ie_key
3127                     break
3128             else:
3129                 return
3130         return '%s %s' % (extractor.lower(), video_id)
3131
3132     def in_download_archive(self, info_dict):
3133         fn = self.params.get('download_archive')
3134         if fn is None:
3135             return False
3136
3137         vid_id = self._make_archive_id(info_dict)
3138         if not vid_id:
3139             return False  # Incomplete video information
3140
3141         return vid_id in self.archive
3142
3143     def record_download_archive(self, info_dict):
3144         fn = self.params.get('download_archive')
3145         if fn is None:
3146             return
3147         vid_id = self._make_archive_id(info_dict)
3148         assert vid_id
3149         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3150             archive_file.write(vid_id + '\n')
3151         self.archive.add(vid_id)
3152
3153     @staticmethod
3154     def format_resolution(format, default='unknown'):
3155         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3156             return 'audio only'
3157         if format.get('resolution') is not None:
3158             return format['resolution']
3159         if format.get('width') and format.get('height'):
3160             return '%dx%d' % (format['width'], format['height'])
3161         elif format.get('height'):
3162             return '%sp' % format['height']
3163         elif format.get('width'):
3164             return '%dx?' % format['width']
3165         return default
3166
3167     def _format_note(self, fdict):
3168         res = ''
3169         if fdict.get('ext') in ['f4f', 'f4m']:
3170             res += '(unsupported) '
3171         if fdict.get('language'):
3172             if res:
3173                 res += ' '
3174             res += '[%s] ' % fdict['language']
3175         if fdict.get('format_note') is not None:
3176             res += fdict['format_note'] + ' '
3177         if fdict.get('tbr') is not None:
3178             res += '%4dk ' % fdict['tbr']
3179         if fdict.get('container') is not None:
3180             if res:
3181                 res += ', '
3182             res += '%s container' % fdict['container']
3183         if (fdict.get('vcodec') is not None
3184                 and fdict.get('vcodec') != 'none'):
3185             if res:
3186                 res += ', '
3187             res += fdict['vcodec']
3188             if fdict.get('vbr') is not None:
3189                 res += '@'
3190         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3191             res += 'video@'
3192         if fdict.get('vbr') is not None:
3193             res += '%4dk' % fdict['vbr']
3194         if fdict.get('fps') is not None:
3195             if res:
3196                 res += ', '
3197             res += '%sfps' % fdict['fps']
3198         if fdict.get('acodec') is not None:
3199             if res:
3200                 res += ', '
3201             if fdict['acodec'] == 'none':
3202                 res += 'video only'
3203             else:
3204                 res += '%-5s' % fdict['acodec']
3205         elif fdict.get('abr') is not None:
3206             if res:
3207                 res += ', '
3208             res += 'audio'
3209         if fdict.get('abr') is not None:
3210             res += '@%3dk' % fdict['abr']
3211         if fdict.get('asr') is not None:
3212             res += ' (%5dHz)' % fdict['asr']
3213         if fdict.get('filesize') is not None:
3214             if res:
3215                 res += ', '
3216             res += format_bytes(fdict['filesize'])
3217         elif fdict.get('filesize_approx') is not None:
3218             if res:
3219                 res += ', '
3220             res += '~' + format_bytes(fdict['filesize_approx'])
3221         return res
3222
3223     def _list_format_headers(self, *headers):
3224         if self.params.get('listformats_table', True) is not False:
3225             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3226         return headers
3227
3228     def list_formats(self, info_dict):
3229         formats = info_dict.get('formats', [info_dict])
3230         new_format = self.params.get('listformats_table', True) is not False
3231         if new_format:
3232             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3233             table = [
3234                 [
3235                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3236                     format_field(f, 'ext'),
3237                     format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3238                     format_field(f, 'fps', '\t%d'),
3239                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3240                     delim,
3241                     format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3242                     format_field(f, 'tbr', '\t%dk'),
3243                     shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
3244                     delim,
3245                     format_field(f, 'vcodec', default='unknown').replace(
3246                         'none',
3247                         'images' if f.get('acodec') == 'none'
3248                         else self._format_screen('audio only', self.Styles.SUPPRESS)),
3249                     format_field(f, 'vbr', '\t%dk'),
3250                     format_field(f, 'acodec', default='unknown').replace(
3251                         'none',
3252                         '' if f.get('vcodec') == 'none'
3253                         else self._format_screen('video only', self.Styles.SUPPRESS)),
3254                     format_field(f, 'abr', '\t%dk'),
3255                     format_field(f, 'asr', '\t%dHz'),
3256                     join_nonempty(
3257                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3258                         format_field(f, 'language', '[%s]'),
3259                         join_nonempty(
3260                             format_field(f, 'format_note'),
3261                             format_field(f, 'container', ignore=(None, f.get('ext'))),
3262                             delim=', '),
3263                         delim=' '),
3264                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3265             header_line = self._list_format_headers(
3266                 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3267                 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3268         else:
3269             table = [
3270                 [
3271                     format_field(f, 'format_id'),
3272                     format_field(f, 'ext'),
3273                     self.format_resolution(f),
3274                     self._format_note(f)]
3275                 for f in formats
3276                 if f.get('preference') is None or f['preference'] >= -1000]
3277             header_line = ['format code', 'extension', 'resolution', 'note']
3278
3279         self.to_screen(
3280             '[info] Available formats for %s:' % info_dict['id'])
3281         self.to_stdout(render_table(
3282             header_line, table,
3283             extra_gap=(0 if new_format else 1),
3284             hide_empty=new_format,
3285             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3286
3287     def list_thumbnails(self, info_dict):
3288         thumbnails = list(info_dict.get('thumbnails'))
3289         if not thumbnails:
3290             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3291             return
3292
3293         self.to_screen(
3294             '[info] Thumbnails for %s:' % info_dict['id'])
3295         self.to_stdout(render_table(
3296             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3297             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3298
3299     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3300         if not subtitles:
3301             self.to_screen('%s has no %s' % (video_id, name))
3302             return
3303         self.to_screen(
3304             'Available %s for %s:' % (name, video_id))
3305
3306         def _row(lang, formats):
3307             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3308             if len(set(names)) == 1:
3309                 names = [] if names[0] == 'unknown' else names[:1]
3310             return [lang, ', '.join(names), ', '.join(exts)]
3311
3312         self.to_stdout(render_table(
3313             self._list_format_headers('Language', 'Name', 'Formats'),
3314             [_row(lang, formats) for lang, formats in subtitles.items()],
3315             hide_empty=True))
3316
3317     def urlopen(self, req):
3318         """ Start an HTTP download """
3319         if isinstance(req, compat_basestring):
3320             req = sanitized_Request(req)
3321         return self._opener.open(req, timeout=self._socket_timeout)
3322
3323     def print_debug_header(self):
3324         if not self.params.get('verbose'):
3325             return
3326
3327         def get_encoding(stream):
3328             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3329             if not supports_terminal_sequences(stream):
3330                 ret += ' (No ANSI)'
3331             return ret
3332
3333         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3334             locale.getpreferredencoding(),
3335             sys.getfilesystemencoding(),
3336             get_encoding(self._screen_file), get_encoding(self._err_file),
3337             self.get_encoding())
3338
3339         logger = self.params.get('logger')
3340         if logger:
3341             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3342             write_debug(encoding_str)
3343         else:
3344             write_string(f'[debug] {encoding_str}\n', encoding=None)
3345             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3346
3347         source = detect_variant()
3348         write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
3349         if not _LAZY_LOADER:
3350             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3351                 write_debug('Lazy loading extractors is forcibly disabled')
3352             else:
3353                 write_debug('Lazy loading extractors is disabled')
3354         if plugin_extractors or plugin_postprocessors:
3355             write_debug('Plugins: %s' % [
3356                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3357                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3358         if self.params.get('compat_opts'):
3359             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3360         try:
3361             sp = Popen(
3362                 ['git', 'rev-parse', '--short', 'HEAD'],
3363                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3364                 cwd=os.path.dirname(os.path.abspath(__file__)))
3365             out, err = sp.communicate_or_kill()
3366             out = out.decode().strip()
3367             if re.match('[0-9a-f]+', out):
3368                 write_debug('Git HEAD: %s' % out)
3369         except Exception:
3370             try:
3371                 sys.exc_clear()
3372             except Exception:
3373                 pass
3374
3375         def python_implementation():
3376             impl_name = platform.python_implementation()
3377             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3378                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3379             return impl_name
3380
3381         write_debug('Python version %s (%s %s) - %s' % (
3382             platform.python_version(),
3383             python_implementation(),
3384             platform.architecture()[0],
3385             platform_name()))
3386
3387         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3388         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3389         if ffmpeg_features:
3390             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3391
3392         exe_versions['rtmpdump'] = rtmpdump_version()
3393         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3394         exe_str = ', '.join(
3395             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3396         ) or 'none'
3397         write_debug('exe versions: %s' % exe_str)
3398
3399         from .downloader.websocket import has_websockets
3400         from .postprocessor.embedthumbnail import has_mutagen
3401         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3402
3403         lib_str = join_nonempty(
3404             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3405             KEYRING_AVAILABLE and 'keyring',
3406             has_mutagen and 'mutagen',
3407             SQLITE_AVAILABLE and 'sqlite',
3408             has_websockets and 'websockets',
3409             delim=', ') or 'none'
3410         write_debug('Optional libraries: %s' % lib_str)
3411
3412         proxy_map = {}
3413         for handler in self._opener.handlers:
3414             if hasattr(handler, 'proxies'):
3415                 proxy_map.update(handler.proxies)
3416         write_debug(f'Proxy map: {proxy_map}')
3417
3418         # Not implemented
3419         if False and self.params.get('call_home'):
3420             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3421             write_debug('Public IP address: %s' % ipaddr)
3422             latest_version = self.urlopen(
3423                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3424             if version_tuple(latest_version) > version_tuple(__version__):
3425                 self.report_warning(
3426                     'You are using an outdated version (newest version: %s)! '
3427                     'See https://yt-dl.org/update if you need help updating.' %
3428                     latest_version)
3429
3430     def _setup_opener(self):
3431         timeout_val = self.params.get('socket_timeout')
3432         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3433
3434         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3435         opts_cookiefile = self.params.get('cookiefile')
3436         opts_proxy = self.params.get('proxy')
3437
3438         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3439
3440         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3441         if opts_proxy is not None:
3442             if opts_proxy == '':
3443                 proxies = {}
3444             else:
3445                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3446         else:
3447             proxies = compat_urllib_request.getproxies()
3448             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3449             if 'http' in proxies and 'https' not in proxies:
3450                 proxies['https'] = proxies['http']
3451         proxy_handler = PerRequestProxyHandler(proxies)
3452
3453         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3454         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3455         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3456         redirect_handler = YoutubeDLRedirectHandler()
3457         data_handler = compat_urllib_request_DataHandler()
3458
3459         # When passing our own FileHandler instance, build_opener won't add the
3460         # default FileHandler and allows us to disable the file protocol, which
3461         # can be used for malicious purposes (see
3462         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3463         file_handler = compat_urllib_request.FileHandler()
3464
3465         def file_open(*args, **kwargs):
3466             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3467         file_handler.file_open = file_open
3468
3469         opener = compat_urllib_request.build_opener(
3470             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3471
3472         # Delete the default user-agent header, which would otherwise apply in
3473         # cases where our custom HTTP handler doesn't come into play
3474         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3475         opener.addheaders = []
3476         self._opener = opener
3477
3478     def encode(self, s):
3479         if isinstance(s, bytes):
3480             return s  # Already encoded
3481
3482         try:
3483             return s.encode(self.get_encoding())
3484         except UnicodeEncodeError as err:
3485             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3486             raise
3487
3488     def get_encoding(self):
3489         encoding = self.params.get('encoding')
3490         if encoding is None:
3491             encoding = preferredencoding()
3492         return encoding
3493
3494     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3495         ''' Write infojson and returns True = written, False = skip, None = error '''
3496         if overwrite is None:
3497             overwrite = self.params.get('overwrites', True)
3498         if not self.params.get('writeinfojson'):
3499             return False
3500         elif not infofn:
3501             self.write_debug(f'Skipping writing {label} infojson')
3502             return False
3503         elif not self._ensure_dir_exists(infofn):
3504             return None
3505         elif not overwrite and os.path.exists(infofn):
3506             self.to_screen(f'[info] {label.title()} metadata is already present')
3507         else:
3508             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3509             try:
3510                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3511             except (OSError, IOError):
3512                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3513                 return None
3514         return True
3515
3516     def _write_description(self, label, ie_result, descfn):
3517         ''' Write description and returns True = written, False = skip, None = error '''
3518         if not self.params.get('writedescription'):
3519             return False
3520         elif not descfn:
3521             self.write_debug(f'Skipping writing {label} description')
3522             return False
3523         elif not self._ensure_dir_exists(descfn):
3524             return None
3525         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3526             self.to_screen(f'[info] {label.title()} description is already present')
3527         elif ie_result.get('description') is None:
3528             self.report_warning(f'There\'s no {label} description to write')
3529             return False
3530         else:
3531             try:
3532                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3533                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3534                     descfile.write(ie_result['description'])
3535             except (OSError, IOError):
3536                 self.report_error(f'Cannot write {label} description file {descfn}')
3537                 return None
3538         return True
3539
3540     def _write_subtitles(self, info_dict, filename):
3541         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3542         ret = []
3543         subtitles = info_dict.get('requested_subtitles')
3544         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3545             # subtitles download errors are already managed as troubles in relevant IE
3546             # that way it will silently go on when used with unsupporting IE
3547             return ret
3548
3549         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3550         if not sub_filename_base:
3551             self.to_screen('[info] Skipping writing video subtitles')
3552             return ret
3553         for sub_lang, sub_info in subtitles.items():
3554             sub_format = sub_info['ext']
3555             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3556             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3557             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3558                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3559                 sub_info['filepath'] = sub_filename
3560                 ret.append((sub_filename, sub_filename_final))
3561                 continue
3562
3563             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3564             if sub_info.get('data') is not None:
3565                 try:
3566                     # Use newline='' to prevent conversion of newline characters
3567                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3568                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3569                         subfile.write(sub_info['data'])
3570                     sub_info['filepath'] = sub_filename
3571                     ret.append((sub_filename, sub_filename_final))
3572                     continue
3573                 except (OSError, IOError):
3574                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3575                     return None
3576
3577             try:
3578                 sub_copy = sub_info.copy()
3579                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3580                 self.dl(sub_filename, sub_copy, subtitle=True)
3581                 sub_info['filepath'] = sub_filename
3582                 ret.append((sub_filename, sub_filename_final))
3583             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3584                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3585                 continue
3586         return ret
3587
3588     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3589         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3590         write_all = self.params.get('write_all_thumbnails', False)
3591         thumbnails, ret = [], []
3592         if write_all or self.params.get('writethumbnail', False):
3593             thumbnails = info_dict.get('thumbnails') or []
3594         multiple = write_all and len(thumbnails) > 1
3595
3596         if thumb_filename_base is None:
3597             thumb_filename_base = filename
3598         if thumbnails and not thumb_filename_base:
3599             self.write_debug(f'Skipping writing {label} thumbnail')
3600             return ret
3601
3602         for t in thumbnails[::-1]:
3603             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3604             thumb_display_id = f'{label} thumbnail {t["id"]}'
3605             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3606             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3607
3608             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3609                 ret.append((thumb_filename, thumb_filename_final))
3610                 t['filepath'] = thumb_filename
3611                 self.to_screen('[info] %s is already present' % (
3612                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3613             else:
3614                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3615                 try:
3616                     uf = self.urlopen(t['url'])
3617                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3618                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3619                         shutil.copyfileobj(uf, thumbf)
3620                     ret.append((thumb_filename, thumb_filename_final))
3621                     t['filepath'] = thumb_filename
3622                 except network_exceptions as err:
3623                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3624             if ret and not write_all:
3625                 break
3626         return ret