yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     join_nonempty,
  78     LazyList,
  79     LINK_TEMPLATES,
  80     locked_file,
  81     make_dir,
  82     make_HTTPS_handler,
  83     MaxDownloadsReached,
  84     network_exceptions,
  85     number_of_digits,
  86     orderedSet,
  87     OUTTMPL_TYPES,
  88     PagedList,
  89     parse_filesize,
  90     PerRequestProxyHandler,
  91     platform_name,
  92     Popen,
  93     PostProcessingError,
  94     preferredencoding,
  95     prepend_extension,
  96     register_socks_protocols,
  97     RejectedVideoReached,
  98     render_table,
  99     replace_extension,
 100     SameFileError,
 101     sanitize_filename,
 102     sanitize_path,
 103     sanitize_url,
 104     sanitized_Request,
 105     std_headers,
 106     STR_FORMAT_RE_TMPL,
 107     STR_FORMAT_TYPES,
 108     str_or_none,
 109     strftime_or_none,
 110     subtitles_filename,
 111     supports_terminal_sequences,
 112     ThrottledDownload,
 113     to_high_limit_path,
 114     traverse_obj,
 115     try_get,
 116     UnavailableVideoError,
 117     url_basename,
 118     variadic,
 119     version_tuple,
 120     write_json_file,
 121     write_string,
 122     YoutubeDLCookieProcessor,
 123     YoutubeDLHandler,
 124     YoutubeDLRedirectHandler,
 125 )
 126 from .cache import Cache
 127 from .minicurses import format_text
 128 from .extractor import (
 129     gen_extractor_classes,
 130     get_info_extractor,
 131     _LAZY_LOADER,
 132     _PLUGIN_CLASSES as plugin_extractors
 133 )
 134 from .extractor.openload import PhantomJSwrapper
 135 from .downloader import (
 136     FFmpegFD,
 137     get_suitable_downloader,
 138     shorten_protocol_name
 139 )
 140 from .downloader.rtmp import rtmpdump_version
 141 from .postprocessor import (
 142     get_postprocessor,
 143     EmbedThumbnailPP,
 144     FFmpegFixupDurationPP,
 145     FFmpegFixupM3u8PP,
 146     FFmpegFixupM4aPP,
 147     FFmpegFixupStretchedPP,
 148     FFmpegFixupTimestampPP,
 149     FFmpegMergerPP,
 150     FFmpegPostProcessor,
 151     MoveFilesAfterDownloadPP,
 152     _PLUGIN_CLASSES as plugin_postprocessors
 153 )
 154 from .update import detect_variant
 155 from .version import __version__
 156
 157 if compat_os_name == 'nt':
 158     import ctypes
 159
 160
 161 class YoutubeDL(object):
 162     """YoutubeDL class.
 163
 164     YoutubeDL objects are the ones responsible of downloading the
 165     actual video file and writing it to disk if the user has requested
 166     it, among some other tasks. In most cases there should be one per
 167     program. As, given a video URL, the downloader doesn't know how to
 168     extract all the needed information, task that InfoExtractors do, it
 169     has to pass the URL to one of them.
 170
 171     For this, YoutubeDL objects have a method that allows
 172     InfoExtractors to be registered in a given order. When it is passed
 173     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 174     finds that reports being able to handle it. The InfoExtractor extracts
 175     all the information about the video or videos the URL refers to, and
 176     YoutubeDL process the extracted information, possibly using a File
 177     Downloader to download the video.
 178
 179     YoutubeDL objects accept a lot of parameters. In order not to saturate
 180     the object constructor with arguments, it receives a dictionary of
 181     options instead. These options are available through the params
 182     attribute for the InfoExtractors to use. The YoutubeDL also
 183     registers itself as the downloader in charge for the InfoExtractors
 184     that are added to it, so this is a "mutual registration".
 185
 186     Available options:
 187
 188     username:          Username for authentication purposes.
 189     password:          Password for authentication purposes.
 190     videopassword:     Password for accessing a video.
 191     ap_mso:            Adobe Pass multiple-system operator identifier.
 192     ap_username:       Multiple-system operator account username.
 193     ap_password:       Multiple-system operator account password.
 194     usenetrc:          Use netrc for authentication instead.
 195     verbose:           Print additional info to stdout.
 196     quiet:             Do not print messages to stdout.
 197     no_warnings:       Do not print out anything for warnings.
 198     forceprint:        A list of templates to force print
 199     forceurl:          Force printing final URL. (Deprecated)
 200     forcetitle:        Force printing title. (Deprecated)
 201     forceid:           Force printing ID. (Deprecated)
 202     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 203     forcedescription:  Force printing description. (Deprecated)
 204     forcefilename:     Force printing final filename. (Deprecated)
 205     forceduration:     Force printing duration. (Deprecated)
 206     forcejson:         Force printing info_dict as JSON.
 207     dump_single_json:  Force printing the info_dict of the whole playlist
 208                        (or video) as a single JSON line.
 209     force_write_download_archive: Force writing download archive regardless
 210                        of 'skip_download' or 'simulate'.
 211     simulate:          Do not download the video files. If unset (or None),
 212                        simulate only if listsubtitles, listformats or list_thumbnails is used
 213     format:            Video format code. see "FORMAT SELECTION" for more details.
 214                        You can also pass a function. The function takes 'ctx' as
 215                        argument and returns the formats to download.
 216                        See "build_format_selector" for an implementation
 217     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 218     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 219                        extracting metadata even if the video is not actually
 220                        available for download (experimental)
 221     format_sort:       A list of fields by which to sort the video formats.
 222                        See "Sorting Formats" for more details.
 223     format_sort_force: Force the given format_sort. see "Sorting Formats"
 224                        for more details.
 225     allow_multiple_video_streams:   Allow multiple video streams to be merged
 226                        into a single file
 227     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 228                        into a single file
 229     check_formats      Whether to test if the formats are downloadable.
 230                        Can be True (check all), False (check none),
 231                        'selected' (check selected formats),
 232                        or None (check only if requested by extractor)
 233     paths:             Dictionary of output paths. The allowed keys are 'home'
 234                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 235     outtmpl:           Dictionary of templates for output names. Allowed keys
 236                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 237                        For compatibility with youtube-dl, a single string can also be used
 238     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 239     restrictfilenames: Do not allow "&" and spaces in file names
 240     trim_file_name:    Limit length of filename (extension excluded)
 241     windowsfilenames:  Force the filenames to be windows compatible
 242     ignoreerrors:      Do not stop on download/postprocessing errors.
 243                        Can be 'only_download' to ignore only download errors.
 244                        Default is 'only_download' for CLI, but False for API
 245     skip_playlist_after_errors: Number of allowed failures until the rest of
 246                        the playlist is skipped
 247     force_generic_extractor: Force downloader to use the generic extractor
 248     overwrites:        Overwrite all video and metadata files if True,
 249                        overwrite only non-video files if None
 250                        and don't overwrite any file if False
 251                        For compatibility with youtube-dl,
 252                        "nooverwrites" may also be used instead
 253     playliststart:     Playlist item to start at.
 254     playlistend:       Playlist item to end at.
 255     playlist_items:    Specific indices of playlist to download.
 256     playlistreverse:   Download playlist items in reverse order.
 257     playlistrandom:    Download playlist items in random order.
 258     matchtitle:        Download only matching titles.
 259     rejecttitle:       Reject downloads for matching titles.
 260     logger:            Log messages to a logging.Logger instance.
 261     logtostderr:       Log messages to stderr instead of stdout.
 262     consoletitle:       Display progress in console window's titlebar.
 263     writedescription:  Write the video description to a .description file
 264     writeinfojson:     Write the video description to a .info.json file
 265     clean_infojson:    Remove private fields from the infojson
 266     getcomments:       Extract video comments. This will not be written to disk
 267                        unless writeinfojson is also given
 268     writeannotations:  Write the video annotations to a .annotations.xml file
 269     writethumbnail:    Write the thumbnail image to a file
 270     allow_playlist_files: Whether to write playlists' description, infojson etc
 271                        also to disk when using the 'write*' options
 272     write_all_thumbnails:  Write all thumbnail formats to files
 273     writelink:         Write an internet shortcut file, depending on the
 274                        current platform (.url/.webloc/.desktop)
 275     writeurllink:      Write a Windows internet shortcut file (.url)
 276     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 277     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 278     writesubtitles:    Write the video subtitles to a file
 279     writeautomaticsub: Write the automatically generated subtitles to a file
 280     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 281                        Downloads all the subtitles of the video
 282                        (requires writesubtitles or writeautomaticsub)
 283     listsubtitles:     Lists all available subtitles for the video
 284     subtitlesformat:   The format code for subtitles
 285     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 286                        The list may contain "all" to refer to all the available
 287                        subtitles. The language can be prefixed with a "-" to
 288                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 289     keepvideo:         Keep the video file after post-processing
 290     daterange:         A DateRange object, download only if the upload_date is in the range.
 291     skip_download:     Skip the actual download of the video file
 292     cachedir:          Location of the cache files in the filesystem.
 293                        False to disable filesystem cache.
 294     noplaylist:        Download single video instead of a playlist if in doubt.
 295     age_limit:         An integer representing the user's age in years.
 296                        Unsuitable videos for the given age are skipped.
 297     min_views:         An integer representing the minimum view count the video
 298                        must have in order to not be skipped.
 299                        Videos without view count information are always
 300                        downloaded. None for no limit.
 301     max_views:         An integer representing the maximum view count.
 302                        Videos that are more popular than that are not
 303                        downloaded.
 304                        Videos without view count information are always
 305                        downloaded. None for no limit.
 306     download_archive:  File name of a file where all downloads are recorded.
 307                        Videos already present in the file are not downloaded
 308                        again.
 309     break_on_existing: Stop the download process after attempting to download a
 310                        file that is in the archive.
 311     break_on_reject:   Stop the download process when encountering a video that
 312                        has been filtered out.
 313     cookiefile:        File name where cookies should be read from and dumped to
 314     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 315                        name/path from where cookies are loaded.
 316                        Eg: ('chrome', ) or ('vivaldi', 'default')
 317     nocheckcertificate:Do not verify SSL certificates
 318     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 319                        At the moment, this is only supported by YouTube.
 320     proxy:             URL of the proxy server to use
 321     geo_verification_proxy:  URL of the proxy to use for IP address verification
 322                        on geo-restricted sites.
 323     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 324     bidi_workaround:   Work around buggy terminals without bidirectional text
 325                        support, using fridibi
 326     debug_printtraffic:Print out sent and received HTTP traffic
 327     include_ads:       Download ads as well
 328     default_search:    Prepend this string if an input url is not valid.
 329                        'auto' for elaborate guessing
 330     encoding:          Use this encoding instead of the system-specified.
 331     extract_flat:      Do not resolve URLs, return the immediate result.
 332                        Pass in 'in_playlist' to only show this behavior for
 333                        playlist items.
 334     postprocessors:    A list of dictionaries, each with an entry
 335                        * key:  The name of the postprocessor. See
 336                                yt_dlp/postprocessor/__init__.py for a list.
 337                        * when: When to run the postprocessor. Can be one of
 338                                pre_process|before_dl|post_process|after_move.
 339                                Assumed to be 'post_process' if not given
 340     post_hooks:        Deprecated - Register a custom postprocessor instead
 341                        A list of functions that get called as the final step
 342                        for each video file, after all postprocessors have been
 343                        called. The filename will be passed as the only argument.
 344     progress_hooks:    A list of functions that get called on download
 345                        progress, with a dictionary with the entries
 346                        * status: One of "downloading", "error", or "finished".
 347                                  Check this first and ignore unknown values.
 348                        * info_dict: The extracted info_dict
 349
 350                        If status is one of "downloading", or "finished", the
 351                        following properties may also be present:
 352                        * filename: The final filename (always present)
 353                        * tmpfilename: The filename we're currently writing to
 354                        * downloaded_bytes: Bytes on disk
 355                        * total_bytes: Size of the whole file, None if unknown
 356                        * total_bytes_estimate: Guess of the eventual file size,
 357                                                None if unavailable.
 358                        * elapsed: The number of seconds since download started.
 359                        * eta: The estimated time in seconds, None if unknown
 360                        * speed: The download speed in bytes/second, None if
 361                                 unknown
 362                        * fragment_index: The counter of the currently
 363                                          downloaded video fragment.
 364                        * fragment_count: The number of fragments (= individual
 365                                          files that will be merged)
 366
 367                        Progress hooks are guaranteed to be called at least once
 368                        (with status "finished") if the download is successful.
 369     postprocessor_hooks:  A list of functions that get called on postprocessing
 370                        progress, with a dictionary with the entries
 371                        * status: One of "started", "processing", or "finished".
 372                                  Check this first and ignore unknown values.
 373                        * postprocessor: Name of the postprocessor
 374                        * info_dict: The extracted info_dict
 375
 376                        Progress hooks are guaranteed to be called at least twice
 377                        (with status "started" and "finished") if the processing is successful.
 378     merge_output_format: Extension to use when merging formats.
 379     final_ext:         Expected final extension; used to detect when the file was
 380                        already downloaded and converted
 381     fixup:             Automatically correct known faults of the file.
 382                        One of:
 383                        - "never": do nothing
 384                        - "warn": only emit a warning
 385                        - "detect_or_warn": check whether we can do anything
 386                                            about it, warn otherwise (default)
 387     source_address:    Client-side IP address to bind to.
 388     call_home:         Boolean, true iff we are allowed to contact the
 389                        yt-dlp servers for debugging. (BROKEN)
 390     sleep_interval_requests: Number of seconds to sleep between requests
 391                        during extraction
 392     sleep_interval:    Number of seconds to sleep before each download when
 393                        used alone or a lower bound of a range for randomized
 394                        sleep before each download (minimum possible number
 395                        of seconds to sleep) when used along with
 396                        max_sleep_interval.
 397     max_sleep_interval:Upper bound of a range for randomized sleep before each
 398                        download (maximum possible number of seconds to sleep).
 399                        Must only be used along with sleep_interval.
 400                        Actual sleep time will be a random float from range
 401                        [sleep_interval; max_sleep_interval].
 402     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 403     listformats:       Print an overview of available video formats and exit.
 404     list_thumbnails:   Print a table of all thumbnails and exit.
 405     match_filter:      A function that gets called with the info_dict of
 406                        every video.
 407                        If it returns a message, the video is ignored.
 408                        If it returns None, the video is downloaded.
 409                        match_filter_func in utils.py is one example for this.
 410     no_color:          Do not emit color codes in output.
 411     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 412                        HTTP header
 413     geo_bypass_country:
 414                        Two-letter ISO 3166-2 country code that will be used for
 415                        explicit geographic restriction bypassing via faking
 416                        X-Forwarded-For HTTP header
 417     geo_bypass_ip_block:
 418                        IP range in CIDR notation that will be used similarly to
 419                        geo_bypass_country
 420
 421     The following options determine which downloader is picked:
 422     external_downloader: A dictionary of protocol keys and the executable of the
 423                        external downloader to use for it. The allowed protocols
 424                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 425                        Set the value to 'native' to use the native downloader
 426     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 427                        or {'m3u8': 'ffmpeg'} instead.
 428                        Use the native HLS downloader instead of ffmpeg/avconv
 429                        if True, otherwise use ffmpeg/avconv if False, otherwise
 430                        use downloader suggested by extractor if None.
 431     compat_opts:       Compatibility options. See "Differences in default behavior".
 432                        The following options do not work when used through the API:
 433                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 434                        no-clean-infojson, no-playlist-metafiles, no-keep-subs.
 435                        Refer __init__.py for their implementation
 436     progress_template: Dictionary of templates for progress outputs.
 437                        Allowed keys are 'download', 'postprocess',
 438                        'download-title' (console title) and 'postprocess-title'.
 439                        The template is mapped on a dictionary with keys 'progress' and 'info'
 440
 441     The following parameters are not used by YoutubeDL itself, they are used by
 442     the downloader (see yt_dlp/downloader/common.py):
 443     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 444     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 445     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 446     external_downloader_args, concurrent_fragment_downloads.
 447
 448     The following options are used by the post processors:
 449     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 450                        otherwise prefer ffmpeg. (avconv support is deprecated)
 451     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 452                        to the binary or its containing directory.
 453     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 454                        and a list of additional command-line arguments for the
 455                        postprocessor/executable. The dict can also have "PP+EXE" keys
 456                        which are used when the given exe is used by the given PP.
 457                        Use 'default' as the name for arguments to passed to all PP
 458                        For compatibility with youtube-dl, a single list of args
 459                        can also be used
 460
 461     The following options are used by the extractors:
 462     extractor_retries: Number of times to retry for known errors
 463     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 464     hls_split_discontinuity: Split HLS playlists to different formats at
 465                        discontinuities such as ad breaks (default: False)
 466     extractor_args:    A dictionary of arguments to be passed to the extractors.
 467                        See "EXTRACTOR ARGUMENTS" for details.
 468                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 469     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 470                        If True (default), DASH manifests and related
 471                        data will be downloaded and processed by extractor.
 472                        You can reduce network I/O by disabling it if you don't
 473                        care about DASH. (only for youtube)
 474     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 475                        If True (default), HLS manifests and related
 476                        data will be downloaded and processed by extractor.
 477                        You can reduce network I/O by disabling it if you don't
 478                        care about HLS. (only for youtube)
 479     """
 480
 481     _NUMERIC_FIELDS = set((
 482         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 483         'timestamp', 'release_timestamp',
 484         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 485         'average_rating', 'comment_count', 'age_limit',
 486         'start_time', 'end_time',
 487         'chapter_number', 'season_number', 'episode_number',
 488         'track_number', 'disc_number', 'release_year',
 489     ))
 490
 491     _format_selection_exts = {
 492         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 493         'video': {'mp4', 'flv', 'webm', '3gp'},
 494         'storyboards': {'mhtml'},
 495     }
 496
 497     params = None
 498     _ies = {}
 499     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 500     _printed_messages = set()
 501     _first_webpage_request = True
 502     _download_retcode = None
 503     _num_downloads = None
 504     _playlist_level = 0
 505     _playlist_urls = set()
 506     _screen_file = None
 507
 508     def __init__(self, params=None, auto_init=True):
 509         """Create a FileDownloader object with the given options.
 510         @param auto_init    Whether to load the default extractors and print header (if verbose).
 511                             Set to 'no_verbose_header' to not print the header
 512         """
 513         if params is None:
 514             params = {}
 515         self._ies = {}
 516         self._ies_instances = {}
 517         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 518         self._printed_messages = set()
 519         self._first_webpage_request = True
 520         self._post_hooks = []
 521         self._progress_hooks = []
 522         self._postprocessor_hooks = []
 523         self._download_retcode = 0
 524         self._num_downloads = 0
 525         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 526         self._err_file = sys.stderr
 527         self.params = params
 528         self.cache = Cache(self)
 529
 530         windows_enable_vt_mode()
 531         # FIXME: This will break if we ever print color to stdout
 532         self._allow_colors = {
 533             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 534             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 535         }
 536
 537         if sys.version_info < (3, 6):
 538             self.report_warning(
 539                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 540
 541         if self.params.get('allow_unplayable_formats'):
 542             self.report_warning(
 543                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 544                 'This is a developer option intended for debugging. \n'
 545                 '         If you experience any issues while using this option, '
 546                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 547
 548         def check_deprecated(param, option, suggestion):
 549             if self.params.get(param) is not None:
 550                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 551                 return True
 552             return False
 553
 554         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 555             if self.params.get('geo_verification_proxy') is None:
 556                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 557
 558         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 559         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 560         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 561
 562         for msg in self.params.get('_warnings', []):
 563             self.report_warning(msg)
 564
 565         if 'list-formats' in self.params.get('compat_opts', []):
 566             self.params['listformats_table'] = False
 567
 568         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 569             # nooverwrites was unnecessarily changed to overwrites
 570             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 571             # This ensures compatibility with both keys
 572             self.params['overwrites'] = not self.params['nooverwrites']
 573         elif self.params.get('overwrites') is None:
 574             self.params.pop('overwrites', None)
 575         else:
 576             self.params['nooverwrites'] = not self.params['overwrites']
 577
 578         if params.get('bidi_workaround', False):
 579             try:
 580                 import pty
 581                 master, slave = pty.openpty()
 582                 width = compat_get_terminal_size().columns
 583                 if width is None:
 584                     width_args = []
 585                 else:
 586                     width_args = ['-w', str(width)]
 587                 sp_kwargs = dict(
 588                     stdin=subprocess.PIPE,
 589                     stdout=slave,
 590                     stderr=self._err_file)
 591                 try:
 592                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 593                 except OSError:
 594                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 595                 self._output_channel = os.fdopen(master, 'rb')
 596             except OSError as ose:
 597                 if ose.errno == errno.ENOENT:
 598                     self.report_warning(
 599                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 600                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 601                 else:
 602                     raise
 603
 604         if (sys.platform != 'win32'
 605                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 606                 and not params.get('restrictfilenames', False)):
 607             # Unicode filesystem API will throw errors (#1474, #13027)
 608             self.report_warning(
 609                 'Assuming --restrict-filenames since file system encoding '
 610                 'cannot encode all characters. '
 611                 'Set the LC_ALL environment variable to fix this.')
 612             self.params['restrictfilenames'] = True
 613
 614         self.outtmpl_dict = self.parse_outtmpl()
 615
 616         # Creating format selector here allows us to catch syntax errors before the extraction
 617         self.format_selector = (
 618             None if self.params.get('format') is None
 619             else self.params['format'] if callable(self.params['format'])
 620             else self.build_format_selector(self.params['format']))
 621
 622         self._setup_opener()
 623
 624         if auto_init:
 625             if auto_init != 'no_verbose_header':
 626                 self.print_debug_header()
 627             self.add_default_info_extractors()
 628
 629         for pp_def_raw in self.params.get('postprocessors', []):
 630             pp_def = dict(pp_def_raw)
 631             when = pp_def.pop('when', 'post_process')
 632             pp_class = get_postprocessor(pp_def.pop('key'))
 633             pp = pp_class(self, **compat_kwargs(pp_def))
 634             self.add_post_processor(pp, when=when)
 635
 636         for ph in self.params.get('post_hooks', []):
 637             self.add_post_hook(ph)
 638
 639         for ph in self.params.get('progress_hooks', []):
 640             self.add_progress_hook(ph)
 641
 642         register_socks_protocols()
 643
 644         def preload_download_archive(fn):
 645             """Preload the archive, if any is specified"""
 646             if fn is None:
 647                 return False
 648             self.write_debug(f'Loading archive file {fn!r}')
 649             try:
 650                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 651                     for line in archive_file:
 652                         self.archive.add(line.strip())
 653             except IOError as ioe:
 654                 if ioe.errno != errno.ENOENT:
 655                     raise
 656                 return False
 657             return True
 658
 659         self.archive = set()
 660         preload_download_archive(self.params.get('download_archive'))
 661
 662     def warn_if_short_id(self, argv):
 663         # short YouTube ID starting with dash?
 664         idxs = [
 665             i for i, a in enumerate(argv)
 666             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 667         if idxs:
 668             correct_argv = (
 669                 ['yt-dlp']
 670                 + [a for i, a in enumerate(argv) if i not in idxs]
 671                 + ['--'] + [argv[i] for i in idxs]
 672             )
 673             self.report_warning(
 674                 'Long argument string detected. '
 675                 'Use -- to separate parameters and URLs, like this:\n%s' %
 676                 args_to_str(correct_argv))
 677
 678     def add_info_extractor(self, ie):
 679         """Add an InfoExtractor object to the end of the list."""
 680         ie_key = ie.ie_key()
 681         self._ies[ie_key] = ie
 682         if not isinstance(ie, type):
 683             self._ies_instances[ie_key] = ie
 684             ie.set_downloader(self)
 685
 686     def _get_info_extractor_class(self, ie_key):
 687         ie = self._ies.get(ie_key)
 688         if ie is None:
 689             ie = get_info_extractor(ie_key)
 690             self.add_info_extractor(ie)
 691         return ie
 692
 693     def get_info_extractor(self, ie_key):
 694         """
 695         Get an instance of an IE with name ie_key, it will try to get one from
 696         the _ies list, if there's no instance it will create a new one and add
 697         it to the extractor list.
 698         """
 699         ie = self._ies_instances.get(ie_key)
 700         if ie is None:
 701             ie = get_info_extractor(ie_key)()
 702             self.add_info_extractor(ie)
 703         return ie
 704
 705     def add_default_info_extractors(self):
 706         """
 707         Add the InfoExtractors returned by gen_extractors to the end of the list
 708         """
 709         for ie in gen_extractor_classes():
 710             self.add_info_extractor(ie)
 711
 712     def add_post_processor(self, pp, when='post_process'):
 713         """Add a PostProcessor object to the end of the chain."""
 714         self._pps[when].append(pp)
 715         pp.set_downloader(self)
 716
 717     def add_post_hook(self, ph):
 718         """Add the post hook"""
 719         self._post_hooks.append(ph)
 720
 721     def add_progress_hook(self, ph):
 722         """Add the download progress hook"""
 723         self._progress_hooks.append(ph)
 724
 725     def add_postprocessor_hook(self, ph):
 726         """Add the postprocessing progress hook"""
 727         self._postprocessor_hooks.append(ph)
 728
 729     def _bidi_workaround(self, message):
 730         if not hasattr(self, '_output_channel'):
 731             return message
 732
 733         assert hasattr(self, '_output_process')
 734         assert isinstance(message, compat_str)
 735         line_count = message.count('\n') + 1
 736         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 737         self._output_process.stdin.flush()
 738         res = ''.join(self._output_channel.readline().decode('utf-8')
 739                       for _ in range(line_count))
 740         return res[:-len('\n')]
 741
 742     def _write_string(self, message, out=None, only_once=False):
 743         if only_once:
 744             if message in self._printed_messages:
 745                 return
 746             self._printed_messages.add(message)
 747         write_string(message, out=out, encoding=self.params.get('encoding'))
 748
 749     def to_stdout(self, message, skip_eol=False, quiet=False):
 750         """Print message to stdout"""
 751         if self.params.get('logger'):
 752             self.params['logger'].debug(message)
 753         elif not quiet or self.params.get('verbose'):
 754             self._write_string(
 755                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 756                 self._err_file if quiet else self._screen_file)
 757
 758     def to_stderr(self, message, only_once=False):
 759         """Print message to stderr"""
 760         assert isinstance(message, compat_str)
 761         if self.params.get('logger'):
 762             self.params['logger'].error(message)
 763         else:
 764             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 765
 766     def to_console_title(self, message):
 767         if not self.params.get('consoletitle', False):
 768             return
 769         if compat_os_name == 'nt':
 770             if ctypes.windll.kernel32.GetConsoleWindow():
 771                 # c_wchar_p() might not be necessary if `message` is
 772                 # already of type unicode()
 773                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 774         elif 'TERM' in os.environ:
 775             self._write_string('\033]0;%s\007' % message, self._screen_file)
 776
 777     def save_console_title(self):
 778         if not self.params.get('consoletitle', False):
 779             return
 780         if self.params.get('simulate'):
 781             return
 782         if compat_os_name != 'nt' and 'TERM' in os.environ:
 783             # Save the title on stack
 784             self._write_string('\033[22;0t', self._screen_file)
 785
 786     def restore_console_title(self):
 787         if not self.params.get('consoletitle', False):
 788             return
 789         if self.params.get('simulate'):
 790             return
 791         if compat_os_name != 'nt' and 'TERM' in os.environ:
 792             # Restore the title from stack
 793             self._write_string('\033[23;0t', self._screen_file)
 794
 795     def __enter__(self):
 796         self.save_console_title()
 797         return self
 798
 799     def __exit__(self, *args):
 800         self.restore_console_title()
 801
 802         if self.params.get('cookiefile') is not None:
 803             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 804
 805     def trouble(self, message=None, tb=None):
 806         """Determine action to take when a download problem appears.
 807
 808         Depending on if the downloader has been configured to ignore
 809         download errors or not, this method may throw an exception or
 810         not when errors are found, after printing the message.
 811
 812         tb, if given, is additional traceback information.
 813         """
 814         if message is not None:
 815             self.to_stderr(message)
 816         if self.params.get('verbose'):
 817             if tb is None:
 818                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 819                     tb = ''
 820                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 821                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 822                     tb += encode_compat_str(traceback.format_exc())
 823                 else:
 824                     tb_data = traceback.format_list(traceback.extract_stack())
 825                     tb = ''.join(tb_data)
 826             if tb:
 827                 self.to_stderr(tb)
 828         if not self.params.get('ignoreerrors'):
 829             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 830                 exc_info = sys.exc_info()[1].exc_info
 831             else:
 832                 exc_info = sys.exc_info()
 833             raise DownloadError(message, exc_info)
 834         self._download_retcode = 1
 835
 836     def to_screen(self, message, skip_eol=False):
 837         """Print message to stdout if not in quiet mode"""
 838         self.to_stdout(
 839             message, skip_eol, quiet=self.params.get('quiet', False))
 840
 841     class Styles(Enum):
 842         HEADERS = 'yellow'
 843         EMPHASIS = 'blue'
 844         ID = 'green'
 845         DELIM = 'blue'
 846         ERROR = 'red'
 847         WARNING = 'yellow'
 848
 849     def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
 850         assert out in ('screen', 'err')
 851         if test_encoding:
 852             original_text = text
 853             handle = self._screen_file if out == 'screen' else self._err_file
 854             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 855             text = text.encode(encoding, 'ignore').decode(encoding)
 856             if fallback is not None and text != original_text:
 857                 text = fallback
 858         if isinstance(f, self.Styles):
 859             f = f._value_
 860         return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
 861
 862     def _format_screen(self, *args, **kwargs):
 863         return self.__format_text('screen', *args, **kwargs)
 864
 865     def _format_err(self, *args, **kwargs):
 866         return self.__format_text('err', *args, **kwargs)
 867
 868     def report_warning(self, message, only_once=False):
 869         '''
 870         Print the message to stderr, it will be prefixed with 'WARNING:'
 871         If stderr is a tty file the 'WARNING:' will be colored
 872         '''
 873         if self.params.get('logger') is not None:
 874             self.params['logger'].warning(message)
 875         else:
 876             if self.params.get('no_warnings'):
 877                 return
 878             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 879
 880     def report_error(self, message, tb=None):
 881         '''
 882         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 883         in red if stderr is a tty file.
 884         '''
 885         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 886
 887     def write_debug(self, message, only_once=False):
 888         '''Log debug message or Print message to stderr'''
 889         if not self.params.get('verbose', False):
 890             return
 891         message = '[debug] %s' % message
 892         if self.params.get('logger'):
 893             self.params['logger'].debug(message)
 894         else:
 895             self.to_stderr(message, only_once)
 896
 897     def report_file_already_downloaded(self, file_name):
 898         """Report file has already been fully downloaded."""
 899         try:
 900             self.to_screen('[download] %s has already been downloaded' % file_name)
 901         except UnicodeEncodeError:
 902             self.to_screen('[download] The file has already been downloaded')
 903
 904     def report_file_delete(self, file_name):
 905         """Report that existing file will be deleted."""
 906         try:
 907             self.to_screen('Deleting existing file %s' % file_name)
 908         except UnicodeEncodeError:
 909             self.to_screen('Deleting existing file')
 910
 911     def raise_no_formats(self, info, forced=False):
 912         has_drm = info.get('__has_drm')
 913         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 914         expected = self.params.get('ignore_no_formats_error')
 915         if forced or not expected:
 916             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 917                                  expected=has_drm or expected)
 918         else:
 919             self.report_warning(msg)
 920
 921     def parse_outtmpl(self):
 922         outtmpl_dict = self.params.get('outtmpl', {})
 923         if not isinstance(outtmpl_dict, dict):
 924             outtmpl_dict = {'default': outtmpl_dict}
 925         # Remove spaces in the default template
 926         if self.params.get('restrictfilenames'):
 927             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 928         else:
 929             sanitize = lambda x: x
 930         outtmpl_dict.update({
 931             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 932             if outtmpl_dict.get(k) is None})
 933         for key, val in outtmpl_dict.items():
 934             if isinstance(val, bytes):
 935                 self.report_warning(
 936                     'Parameter outtmpl is bytes, but should be a unicode string. '
 937                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 938         return outtmpl_dict
 939
 940     def get_output_path(self, dir_type='', filename=None):
 941         paths = self.params.get('paths', {})
 942         assert isinstance(paths, dict)
 943         path = os.path.join(
 944             expand_path(paths.get('home', '').strip()),
 945             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 946             filename or '')
 947
 948         # Temporary fix for #4787
 949         # 'Treat' all problem characters by passing filename through preferredencoding
 950         # to workaround encoding issues with subprocess on python2 @ Windows
 951         if sys.version_info < (3, 0) and sys.platform == 'win32':
 952             path = encodeFilename(path, True).decode(preferredencoding())
 953         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 954
 955     @staticmethod
 956     def _outtmpl_expandpath(outtmpl):
 957         # expand_path translates '%%' into '%' and '$$' into '$'
 958         # correspondingly that is not what we want since we need to keep
 959         # '%%' intact for template dict substitution step. Working around
 960         # with boundary-alike separator hack.
 961         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 962         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 963
 964         # outtmpl should be expand_path'ed before template dict substitution
 965         # because meta fields may contain env variables we don't want to
 966         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 967         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 968         return expand_path(outtmpl).replace(sep, '')
 969
 970     @staticmethod
 971     def escape_outtmpl(outtmpl):
 972         ''' Escape any remaining strings like %s, %abc% etc. '''
 973         return re.sub(
 974             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 975             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 976             outtmpl)
 977
 978     @classmethod
 979     def validate_outtmpl(cls, outtmpl):
 980         ''' @return None or Exception object '''
 981         outtmpl = re.sub(
 982             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 983             lambda mobj: f'{mobj.group(0)[:-1]}s',
 984             cls._outtmpl_expandpath(outtmpl))
 985         try:
 986             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 987             return None
 988         except ValueError as err:
 989             return err
 990
 991     @staticmethod
 992     def _copy_infodict(info_dict):
 993         info_dict = dict(info_dict)
 994         for key in ('__original_infodict', '__postprocessors'):
 995             info_dict.pop(key, None)
 996         return info_dict
 997
 998     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
 999         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
1000         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1001
1002         info_dict = self._copy_infodict(info_dict)
1003         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1004             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1005             if info_dict.get('duration', None) is not None
1006             else None)
1007         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1008         if info_dict.get('resolution') is None:
1009             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1010
1011         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1012         # of %(field)s to %(field)0Nd for backward compatibility
1013         field_size_compat_map = {
1014             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1015             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1016             'autonumber': self.params.get('autonumber_size') or 5,
1017         }
1018
1019         TMPL_DICT = {}
1020         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1021         MATH_FUNCTIONS = {
1022             '+': float.__add__,
1023             '-': float.__sub__,
1024         }
1025         # Field is of the form key1.key2...
1026         # where keys (except first) can be string, int or slice
1027         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1028         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1029         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1030         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1031             (?P<negate>-)?
1032             (?P<fields>{field})
1033             (?P<maths>(?:{math_op}{math_field})*)
1034             (?:>(?P<strf_format>.+?))?
1035             (?P<alternate>(?<!\\),[^|)]+)?
1036             (?:\|(?P<default>.*?))?
1037             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1038
1039         def _traverse_infodict(k):
1040             k = k.split('.')
1041             if k[0] == '':
1042                 k.pop(0)
1043             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1044
1045         def get_value(mdict):
1046             # Object traversal
1047             value = _traverse_infodict(mdict['fields'])
1048             # Negative
1049             if mdict['negate']:
1050                 value = float_or_none(value)
1051                 if value is not None:
1052                     value *= -1
1053             # Do maths
1054             offset_key = mdict['maths']
1055             if offset_key:
1056                 value = float_or_none(value)
1057                 operator = None
1058                 while offset_key:
1059                     item = re.match(
1060                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1061                         offset_key).group(0)
1062                     offset_key = offset_key[len(item):]
1063                     if operator is None:
1064                         operator = MATH_FUNCTIONS[item]
1065                         continue
1066                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1067                     offset = float_or_none(item)
1068                     if offset is None:
1069                         offset = float_or_none(_traverse_infodict(item))
1070                     try:
1071                         value = operator(value, multiplier * offset)
1072                     except (TypeError, ZeroDivisionError):
1073                         return None
1074                     operator = None
1075             # Datetime formatting
1076             if mdict['strf_format']:
1077                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1078
1079             return value
1080
1081         na = self.params.get('outtmpl_na_placeholder', 'NA')
1082
1083         def _dumpjson_default(obj):
1084             if isinstance(obj, (set, LazyList)):
1085                 return list(obj)
1086             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1087
1088         def create_key(outer_mobj):
1089             if not outer_mobj.group('has_key'):
1090                 return outer_mobj.group(0)
1091             key = outer_mobj.group('key')
1092             mobj = re.match(INTERNAL_FORMAT_RE, key)
1093             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1094             value, default = None, na
1095             while mobj:
1096                 mobj = mobj.groupdict()
1097                 default = mobj['default'] if mobj['default'] is not None else default
1098                 value = get_value(mobj)
1099                 if value is None and mobj['alternate']:
1100                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1101                 else:
1102                     break
1103
1104             fmt = outer_mobj.group('format')
1105             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1106                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1107
1108             value = default if value is None else value
1109
1110             flags = outer_mobj.group('conversion') or ''
1111             str_fmt = f'{fmt[:-1]}s'
1112             if fmt[-1] == 'l':  # list
1113                 delim = '\n' if '#' in flags else ', '
1114                 value, fmt = delim.join(variadic(value)), str_fmt
1115             elif fmt[-1] == 'j':  # json
1116                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1117             elif fmt[-1] == 'q':  # quoted
1118                 value = map(str, variadic(value) if '#' in flags else [value])
1119                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1120             elif fmt[-1] == 'B':  # bytes
1121                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1122                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1123             elif fmt[-1] == 'U':  # unicode normalized
1124                 value, fmt = unicodedata.normalize(
1125                     # "+" = compatibility equivalence, "#" = NFD
1126                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1127                     value), str_fmt
1128             elif fmt[-1] == 'c':
1129                 if value:
1130                     value = str(value)[0]
1131                 else:
1132                     fmt = str_fmt
1133             elif fmt[-1] not in 'rs':  # numeric
1134                 value = float_or_none(value)
1135                 if value is None:
1136                     value, fmt = default, 's'
1137
1138             if sanitize:
1139                 if fmt[-1] == 'r':
1140                     # If value is an object, sanitize might convert it to a string
1141                     # So we convert it to repr first
1142                     value, fmt = repr(value), str_fmt
1143                 if fmt[-1] in 'csr':
1144                     value = sanitize(initial_field, value)
1145
1146             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1147             TMPL_DICT[key] = value
1148             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1149
1150         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1151
1152     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1153         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1154         return self.escape_outtmpl(outtmpl) % info_dict
1155
1156     def _prepare_filename(self, info_dict, tmpl_type='default'):
1157         try:
1158             sanitize = lambda k, v: sanitize_filename(
1159                 compat_str(v),
1160                 restricted=self.params.get('restrictfilenames'),
1161                 is_id=(k == 'id' or k.endswith('_id')))
1162             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1163             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1164
1165             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1166             if filename and force_ext is not None:
1167                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1168
1169             # https://github.com/blackjack4494/youtube-dlc/issues/85
1170             trim_file_name = self.params.get('trim_file_name', False)
1171             if trim_file_name:
1172                 fn_groups = filename.rsplit('.')
1173                 ext = fn_groups[-1]
1174                 sub_ext = ''
1175                 if len(fn_groups) > 2:
1176                     sub_ext = fn_groups[-2]
1177                 filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')
1178
1179             return filename
1180         except ValueError as err:
1181             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1182             return None
1183
1184     def prepare_filename(self, info_dict, dir_type='', warn=False):
1185         """Generate the output filename."""
1186
1187         filename = self._prepare_filename(info_dict, dir_type or 'default')
1188         if not filename and dir_type not in ('', 'temp'):
1189             return ''
1190
1191         if warn:
1192             if not self.params.get('paths'):
1193                 pass
1194             elif filename == '-':
1195                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1196             elif os.path.isabs(filename):
1197                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1198         if filename == '-' or not filename:
1199             return filename
1200
1201         return self.get_output_path(dir_type, filename)
1202
1203     def _match_entry(self, info_dict, incomplete=False, silent=False):
1204         """ Returns None if the file should be downloaded """
1205
1206         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1207
1208         def check_filter():
1209             if 'title' in info_dict:
1210                 # This can happen when we're just evaluating the playlist
1211                 title = info_dict['title']
1212                 matchtitle = self.params.get('matchtitle', False)
1213                 if matchtitle:
1214                     if not re.search(matchtitle, title, re.IGNORECASE):
1215                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1216                 rejecttitle = self.params.get('rejecttitle', False)
1217                 if rejecttitle:
1218                     if re.search(rejecttitle, title, re.IGNORECASE):
1219                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1220             date = info_dict.get('upload_date')
1221             if date is not None:
1222                 dateRange = self.params.get('daterange', DateRange())
1223                 if date not in dateRange:
1224                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1225             view_count = info_dict.get('view_count')
1226             if view_count is not None:
1227                 min_views = self.params.get('min_views')
1228                 if min_views is not None and view_count < min_views:
1229                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1230                 max_views = self.params.get('max_views')
1231                 if max_views is not None and view_count > max_views:
1232                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1233             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1234                 return 'Skipping "%s" because it is age restricted' % video_title
1235
1236             match_filter = self.params.get('match_filter')
1237             if match_filter is not None:
1238                 try:
1239                     ret = match_filter(info_dict, incomplete=incomplete)
1240                 except TypeError:
1241                     # For backward compatibility
1242                     ret = None if incomplete else match_filter(info_dict)
1243                 if ret is not None:
1244                     return ret
1245             return None
1246
1247         if self.in_download_archive(info_dict):
1248             reason = '%s has already been recorded in the archive' % video_title
1249             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1250         else:
1251             reason = check_filter()
1252             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1253         if reason is not None:
1254             if not silent:
1255                 self.to_screen('[download] ' + reason)
1256             if self.params.get(break_opt, False):
1257                 raise break_err()
1258         return reason
1259
1260     @staticmethod
1261     def add_extra_info(info_dict, extra_info):
1262         '''Set the keys from extra_info in info dict if they are missing'''
1263         for key, value in extra_info.items():
1264             info_dict.setdefault(key, value)
1265
1266     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1267                      process=True, force_generic_extractor=False):
1268         """
1269         Return a list with a dictionary for each video extracted.
1270
1271         Arguments:
1272         url -- URL to extract
1273
1274         Keyword arguments:
1275         download -- whether to download videos during extraction
1276         ie_key -- extractor key hint
1277         extra_info -- dictionary containing the extra values to add to each result
1278         process -- whether to resolve all unresolved references (URLs, playlist items),
1279             must be True for download to work.
1280         force_generic_extractor -- force using the generic extractor
1281         """
1282
1283         if extra_info is None:
1284             extra_info = {}
1285
1286         if not ie_key and force_generic_extractor:
1287             ie_key = 'Generic'
1288
1289         if ie_key:
1290             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1291         else:
1292             ies = self._ies
1293
1294         for ie_key, ie in ies.items():
1295             if not ie.suitable(url):
1296                 continue
1297
1298             if not ie.working():
1299                 self.report_warning('The program functionality for this site has been marked as broken, '
1300                                     'and will probably not work.')
1301
1302             temp_id = ie.get_temp_id(url)
1303             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1304                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1305                                ie_key, temp_id))
1306                 break
1307             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1308         else:
1309             self.report_error('no suitable InfoExtractor for URL %s' % url)
1310
1311     def __handle_extraction_exceptions(func):
1312         @functools.wraps(func)
1313         def wrapper(self, *args, **kwargs):
1314             try:
1315                 return func(self, *args, **kwargs)
1316             except GeoRestrictedError as e:
1317                 msg = e.msg
1318                 if e.countries:
1319                     msg += '\nThis video is available in %s.' % ', '.join(
1320                         map(ISO3166Utils.short2full, e.countries))
1321                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1322                 self.report_error(msg)
1323             except ExtractorError as e:  # An error we somewhat expected
1324                 self.report_error(compat_str(e), e.format_traceback())
1325             except ThrottledDownload as e:
1326                 self.to_stderr('\r')
1327                 self.report_warning(f'{e}; Re-extracting data')
1328                 return wrapper(self, *args, **kwargs)
1329             except (DownloadCancelled, LazyList.IndexError):
1330                 raise
1331             except Exception as e:
1332                 if self.params.get('ignoreerrors'):
1333                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1334                 else:
1335                     raise
1336         return wrapper
1337
1338     @__handle_extraction_exceptions
1339     def __extract_info(self, url, ie, download, extra_info, process):
1340         ie_result = ie.extract(url)
1341         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1342             return
1343         if isinstance(ie_result, list):
1344             # Backwards compatibility: old IE result format
1345             ie_result = {
1346                 '_type': 'compat_list',
1347                 'entries': ie_result,
1348             }
1349         if extra_info.get('original_url'):
1350             ie_result.setdefault('original_url', extra_info['original_url'])
1351         self.add_default_extra_info(ie_result, ie, url)
1352         if process:
1353             return self.process_ie_result(ie_result, download, extra_info)
1354         else:
1355             return ie_result
1356
1357     def add_default_extra_info(self, ie_result, ie, url):
1358         if url is not None:
1359             self.add_extra_info(ie_result, {
1360                 'webpage_url': url,
1361                 'original_url': url,
1362                 'webpage_url_basename': url_basename(url),
1363             })
1364         if ie is not None:
1365             self.add_extra_info(ie_result, {
1366                 'extractor': ie.IE_NAME,
1367                 'extractor_key': ie.ie_key(),
1368             })
1369
1370     def process_ie_result(self, ie_result, download=True, extra_info=None):
1371         """
1372         Take the result of the ie(may be modified) and resolve all unresolved
1373         references (URLs, playlist items).
1374
1375         It will also download the videos if 'download'.
1376         Returns the resolved ie_result.
1377         """
1378         if extra_info is None:
1379             extra_info = {}
1380         result_type = ie_result.get('_type', 'video')
1381
1382         if result_type in ('url', 'url_transparent'):
1383             ie_result['url'] = sanitize_url(ie_result['url'])
1384             if ie_result.get('original_url'):
1385                 extra_info.setdefault('original_url', ie_result['original_url'])
1386
1387             extract_flat = self.params.get('extract_flat', False)
1388             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1389                     or extract_flat is True):
1390                 info_copy = ie_result.copy()
1391                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1392                 if ie and not ie_result.get('id'):
1393                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1394                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1395                 self.add_extra_info(info_copy, extra_info)
1396                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1397                 if self.params.get('force_write_download_archive', False):
1398                     self.record_download_archive(info_copy)
1399                 return ie_result
1400
1401         if result_type == 'video':
1402             self.add_extra_info(ie_result, extra_info)
1403             ie_result = self.process_video_result(ie_result, download=download)
1404             additional_urls = (ie_result or {}).get('additional_urls')
1405             if additional_urls:
1406                 # TODO: Improve MetadataParserPP to allow setting a list
1407                 if isinstance(additional_urls, compat_str):
1408                     additional_urls = [additional_urls]
1409                 self.to_screen(
1410                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1411                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1412                 ie_result['additional_entries'] = [
1413                     self.extract_info(
1414                         url, download, extra_info,
1415                         force_generic_extractor=self.params.get('force_generic_extractor'))
1416                     for url in additional_urls
1417                 ]
1418             return ie_result
1419         elif result_type == 'url':
1420             # We have to add extra_info to the results because it may be
1421             # contained in a playlist
1422             return self.extract_info(
1423                 ie_result['url'], download,
1424                 ie_key=ie_result.get('ie_key'),
1425                 extra_info=extra_info)
1426         elif result_type == 'url_transparent':
1427             # Use the information from the embedding page
1428             info = self.extract_info(
1429                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1430                 extra_info=extra_info, download=False, process=False)
1431
1432             # extract_info may return None when ignoreerrors is enabled and
1433             # extraction failed with an error, don't crash and return early
1434             # in this case
1435             if not info:
1436                 return info
1437
1438             force_properties = dict(
1439                 (k, v) for k, v in ie_result.items() if v is not None)
1440             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1441                 if f in force_properties:
1442                     del force_properties[f]
1443             new_result = info.copy()
1444             new_result.update(force_properties)
1445
1446             # Extracted info may not be a video result (i.e.
1447             # info.get('_type', 'video') != video) but rather an url or
1448             # url_transparent. In such cases outer metadata (from ie_result)
1449             # should be propagated to inner one (info). For this to happen
1450             # _type of info should be overridden with url_transparent. This
1451             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1452             if new_result.get('_type') == 'url':
1453                 new_result['_type'] = 'url_transparent'
1454
1455             return self.process_ie_result(
1456                 new_result, download=download, extra_info=extra_info)
1457         elif result_type in ('playlist', 'multi_video'):
1458             # Protect from infinite recursion due to recursively nested playlists
1459             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1460             webpage_url = ie_result['webpage_url']
1461             if webpage_url in self._playlist_urls:
1462                 self.to_screen(
1463                     '[download] Skipping already downloaded playlist: %s'
1464                     % ie_result.get('title') or ie_result.get('id'))
1465                 return
1466
1467             self._playlist_level += 1
1468             self._playlist_urls.add(webpage_url)
1469             self._sanitize_thumbnails(ie_result)
1470             try:
1471                 return self.__process_playlist(ie_result, download)
1472             finally:
1473                 self._playlist_level -= 1
1474                 if not self._playlist_level:
1475                     self._playlist_urls.clear()
1476         elif result_type == 'compat_list':
1477             self.report_warning(
1478                 'Extractor %s returned a compat_list result. '
1479                 'It needs to be updated.' % ie_result.get('extractor'))
1480
1481             def _fixup(r):
1482                 self.add_extra_info(r, {
1483                     'extractor': ie_result['extractor'],
1484                     'webpage_url': ie_result['webpage_url'],
1485                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1486                     'extractor_key': ie_result['extractor_key'],
1487                 })
1488                 return r
1489             ie_result['entries'] = [
1490                 self.process_ie_result(_fixup(r), download, extra_info)
1491                 for r in ie_result['entries']
1492             ]
1493             return ie_result
1494         else:
1495             raise Exception('Invalid result type: %s' % result_type)
1496
1497     def _ensure_dir_exists(self, path):
1498         return make_dir(path, self.report_error)
1499
1500     def __process_playlist(self, ie_result, download):
1501         # We process each entry in the playlist
1502         playlist = ie_result.get('title') or ie_result.get('id')
1503         self.to_screen('[download] Downloading playlist: %s' % playlist)
1504
1505         if 'entries' not in ie_result:
1506             raise EntryNotInPlaylist('There are no entries')
1507         incomplete_entries = bool(ie_result.get('requested_entries'))
1508         if incomplete_entries:
1509             def fill_missing_entries(entries, indexes):
1510                 ret = [None] * max(*indexes)
1511                 for i, entry in zip(indexes, entries):
1512                     ret[i - 1] = entry
1513                 return ret
1514             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1515
1516         playlist_results = []
1517
1518         playliststart = self.params.get('playliststart', 1)
1519         playlistend = self.params.get('playlistend')
1520         # For backwards compatibility, interpret -1 as whole list
1521         if playlistend == -1:
1522             playlistend = None
1523
1524         playlistitems_str = self.params.get('playlist_items')
1525         playlistitems = None
1526         if playlistitems_str is not None:
1527             def iter_playlistitems(format):
1528                 for string_segment in format.split(','):
1529                     if '-' in string_segment:
1530                         start, end = string_segment.split('-')
1531                         for item in range(int(start), int(end) + 1):
1532                             yield int(item)
1533                     else:
1534                         yield int(string_segment)
1535             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1536
1537         ie_entries = ie_result['entries']
1538         msg = (
1539             'Downloading %d videos' if not isinstance(ie_entries, list)
1540             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1541
1542         if isinstance(ie_entries, list):
1543             def get_entry(i):
1544                 return ie_entries[i - 1]
1545         else:
1546             if not isinstance(ie_entries, (PagedList, LazyList)):
1547                 ie_entries = LazyList(ie_entries)
1548
1549             def get_entry(i):
1550                 return YoutubeDL.__handle_extraction_exceptions(
1551                     lambda self, i: ie_entries[i - 1]
1552                 )(self, i)
1553
1554         entries = []
1555         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1556         for i in items:
1557             if i == 0:
1558                 continue
1559             if playlistitems is None and playlistend is not None and playlistend < i:
1560                 break
1561             entry = None
1562             try:
1563                 entry = get_entry(i)
1564                 if entry is None:
1565                     raise EntryNotInPlaylist()
1566             except (IndexError, EntryNotInPlaylist):
1567                 if incomplete_entries:
1568                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1569                 elif not playlistitems:
1570                     break
1571             entries.append(entry)
1572             try:
1573                 if entry is not None:
1574                     self._match_entry(entry, incomplete=True, silent=True)
1575             except (ExistingVideoReached, RejectedVideoReached):
1576                 break
1577         ie_result['entries'] = entries
1578
1579         # Save playlist_index before re-ordering
1580         entries = [
1581             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1582             for i, entry in enumerate(entries, 1)
1583             if entry is not None]
1584         n_entries = len(entries)
1585
1586         if not playlistitems and (playliststart or playlistend):
1587             playlistitems = list(range(playliststart, playliststart + n_entries))
1588         ie_result['requested_entries'] = playlistitems
1589
1590         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1591             ie_copy = {
1592                 'playlist': playlist,
1593                 'playlist_id': ie_result.get('id'),
1594                 'playlist_title': ie_result.get('title'),
1595                 'playlist_uploader': ie_result.get('uploader'),
1596                 'playlist_uploader_id': ie_result.get('uploader_id'),
1597                 'playlist_index': 0,
1598                 'n_entries': n_entries,
1599             }
1600             ie_copy.update(dict(ie_result))
1601
1602             if self._write_info_json('playlist', ie_result,
1603                                      self.prepare_filename(ie_copy, 'pl_infojson')) is None:
1604                 return
1605             if self._write_description('playlist', ie_result,
1606                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1607                 return
1608             # TODO: This should be passed to ThumbnailsConvertor if necessary
1609             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1610
1611         if self.params.get('playlistreverse', False):
1612             entries = entries[::-1]
1613         if self.params.get('playlistrandom', False):
1614             random.shuffle(entries)
1615
1616         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1617
1618         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1619         failures = 0
1620         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1621         for i, entry_tuple in enumerate(entries, 1):
1622             playlist_index, entry = entry_tuple
1623             if 'playlist-index' in self.params.get('compat_opts', []):
1624                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1625             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1626             # This __x_forwarded_for_ip thing is a bit ugly but requires
1627             # minimal changes
1628             if x_forwarded_for:
1629                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1630             extra = {
1631                 'n_entries': n_entries,
1632                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1633                 'playlist_index': playlist_index,
1634                 'playlist_autonumber': i,
1635                 'playlist': playlist,
1636                 'playlist_id': ie_result.get('id'),
1637                 'playlist_title': ie_result.get('title'),
1638                 'playlist_uploader': ie_result.get('uploader'),
1639                 'playlist_uploader_id': ie_result.get('uploader_id'),
1640                 'extractor': ie_result['extractor'],
1641                 'webpage_url': ie_result['webpage_url'],
1642                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1643                 'extractor_key': ie_result['extractor_key'],
1644             }
1645
1646             if self._match_entry(entry, incomplete=True) is not None:
1647                 continue
1648
1649             entry_result = self.__process_iterable_entry(entry, download, extra)
1650             if not entry_result:
1651                 failures += 1
1652             if failures >= max_failures:
1653                 self.report_error(
1654                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1655                 break
1656             # TODO: skip failed (empty) entries?
1657             playlist_results.append(entry_result)
1658         ie_result['entries'] = playlist_results
1659         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1660         return ie_result
1661
1662     @__handle_extraction_exceptions
1663     def __process_iterable_entry(self, entry, download, extra_info):
1664         return self.process_ie_result(
1665             entry, download=download, extra_info=extra_info)
1666
1667     def _build_format_filter(self, filter_spec):
1668         " Returns a function to filter the formats according to the filter_spec "
1669
1670         OPERATORS = {
1671             '<': operator.lt,
1672             '<=': operator.le,
1673             '>': operator.gt,
1674             '>=': operator.ge,
1675             '=': operator.eq,
1676             '!=': operator.ne,
1677         }
1678         operator_rex = re.compile(r'''(?x)\s*
1679             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1680             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1681             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1682             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1683         m = operator_rex.fullmatch(filter_spec)
1684         if m:
1685             try:
1686                 comparison_value = int(m.group('value'))
1687             except ValueError:
1688                 comparison_value = parse_filesize(m.group('value'))
1689                 if comparison_value is None:
1690                     comparison_value = parse_filesize(m.group('value') + 'B')
1691                 if comparison_value is None:
1692                     raise ValueError(
1693                         'Invalid value %r in format specification %r' % (
1694                             m.group('value'), filter_spec))
1695             op = OPERATORS[m.group('op')]
1696
1697         if not m:
1698             STR_OPERATORS = {
1699                 '=': operator.eq,
1700                 '^=': lambda attr, value: attr.startswith(value),
1701                 '$=': lambda attr, value: attr.endswith(value),
1702                 '*=': lambda attr, value: value in attr,
1703             }
1704             str_operator_rex = re.compile(r'''(?x)\s*
1705                 (?P<key>[a-zA-Z0-9._-]+)\s*
1706                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1707                 (?P<value>[a-zA-Z0-9._-]+)\s*
1708                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1709             m = str_operator_rex.fullmatch(filter_spec)
1710             if m:
1711                 comparison_value = m.group('value')
1712                 str_op = STR_OPERATORS[m.group('op')]
1713                 if m.group('negation'):
1714                     op = lambda attr, value: not str_op(attr, value)
1715                 else:
1716                     op = str_op
1717
1718         if not m:
1719             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1720
1721         def _filter(f):
1722             actual_value = f.get(m.group('key'))
1723             if actual_value is None:
1724                 return m.group('none_inclusive')
1725             return op(actual_value, comparison_value)
1726         return _filter
1727
1728     def _check_formats(self, formats):
1729         for f in formats:
1730             self.to_screen('[info] Testing format %s' % f['format_id'])
1731             temp_file = tempfile.NamedTemporaryFile(
1732                 suffix='.tmp', delete=False,
1733                 dir=self.get_output_path('temp') or None)
1734             temp_file.close()
1735             try:
1736                 success, _ = self.dl(temp_file.name, f, test=True)
1737             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1738                 success = False
1739             finally:
1740                 if os.path.exists(temp_file.name):
1741                     try:
1742                         os.remove(temp_file.name)
1743                     except OSError:
1744                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1745             if success:
1746                 yield f
1747             else:
1748                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1749
1750     def _default_format_spec(self, info_dict, download=True):
1751
1752         def can_merge():
1753             merger = FFmpegMergerPP(self)
1754             return merger.available and merger.can_merge()
1755
1756         prefer_best = (
1757             not self.params.get('simulate')
1758             and download
1759             and (
1760                 not can_merge()
1761                 or info_dict.get('is_live', False)
1762                 or self.outtmpl_dict['default'] == '-'))
1763         compat = (
1764             prefer_best
1765             or self.params.get('allow_multiple_audio_streams', False)
1766             or 'format-spec' in self.params.get('compat_opts', []))
1767
1768         return (
1769             'best/bestvideo+bestaudio' if prefer_best
1770             else 'bestvideo*+bestaudio/best' if not compat
1771             else 'bestvideo+bestaudio/best')
1772
1773     def build_format_selector(self, format_spec):
1774         def syntax_error(note, start):
1775             message = (
1776                 'Invalid format specification: '
1777                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1778             return SyntaxError(message)
1779
1780         PICKFIRST = 'PICKFIRST'
1781         MERGE = 'MERGE'
1782         SINGLE = 'SINGLE'
1783         GROUP = 'GROUP'
1784         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1785
1786         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1787                                   'video': self.params.get('allow_multiple_video_streams', False)}
1788
1789         check_formats = self.params.get('check_formats') == 'selected'
1790
1791         def _parse_filter(tokens):
1792             filter_parts = []
1793             for type, string, start, _, _ in tokens:
1794                 if type == tokenize.OP and string == ']':
1795                     return ''.join(filter_parts)
1796                 else:
1797                     filter_parts.append(string)
1798
1799         def _remove_unused_ops(tokens):
1800             # Remove operators that we don't use and join them with the surrounding strings
1801             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1802             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1803             last_string, last_start, last_end, last_line = None, None, None, None
1804             for type, string, start, end, line in tokens:
1805                 if type == tokenize.OP and string == '[':
1806                     if last_string:
1807                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1808                         last_string = None
1809                     yield type, string, start, end, line
1810                     # everything inside brackets will be handled by _parse_filter
1811                     for type, string, start, end, line in tokens:
1812                         yield type, string, start, end, line
1813                         if type == tokenize.OP and string == ']':
1814                             break
1815                 elif type == tokenize.OP and string in ALLOWED_OPS:
1816                     if last_string:
1817                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1818                         last_string = None
1819                     yield type, string, start, end, line
1820                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1821                     if not last_string:
1822                         last_string = string
1823                         last_start = start
1824                         last_end = end
1825                     else:
1826                         last_string += string
1827             if last_string:
1828                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1829
1830         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1831             selectors = []
1832             current_selector = None
1833             for type, string, start, _, _ in tokens:
1834                 # ENCODING is only defined in python 3.x
1835                 if type == getattr(tokenize, 'ENCODING', None):
1836                     continue
1837                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1838                     current_selector = FormatSelector(SINGLE, string, [])
1839                 elif type == tokenize.OP:
1840                     if string == ')':
1841                         if not inside_group:
1842                             # ')' will be handled by the parentheses group
1843                             tokens.restore_last_token()
1844                         break
1845                     elif inside_merge and string in ['/', ',']:
1846                         tokens.restore_last_token()
1847                         break
1848                     elif inside_choice and string == ',':
1849                         tokens.restore_last_token()
1850                         break
1851                     elif string == ',':
1852                         if not current_selector:
1853                             raise syntax_error('"," must follow a format selector', start)
1854                         selectors.append(current_selector)
1855                         current_selector = None
1856                     elif string == '/':
1857                         if not current_selector:
1858                             raise syntax_error('"/" must follow a format selector', start)
1859                         first_choice = current_selector
1860                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1861                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1862                     elif string == '[':
1863                         if not current_selector:
1864                             current_selector = FormatSelector(SINGLE, 'best', [])
1865                         format_filter = _parse_filter(tokens)
1866                         current_selector.filters.append(format_filter)
1867                     elif string == '(':
1868                         if current_selector:
1869                             raise syntax_error('Unexpected "("', start)
1870                         group = _parse_format_selection(tokens, inside_group=True)
1871                         current_selector = FormatSelector(GROUP, group, [])
1872                     elif string == '+':
1873                         if not current_selector:
1874                             raise syntax_error('Unexpected "+"', start)
1875                         selector_1 = current_selector
1876                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1877                         if not selector_2:
1878                             raise syntax_error('Expected a selector', start)
1879                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1880                     else:
1881                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1882                 elif type == tokenize.ENDMARKER:
1883                     break
1884             if current_selector:
1885                 selectors.append(current_selector)
1886             return selectors
1887
1888         def _merge(formats_pair):
1889             format_1, format_2 = formats_pair
1890
1891             formats_info = []
1892             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1893             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1894
1895             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1896                 get_no_more = {'video': False, 'audio': False}
1897                 for (i, fmt_info) in enumerate(formats_info):
1898                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1899                         formats_info.pop(i)
1900                         continue
1901                     for aud_vid in ['audio', 'video']:
1902                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1903                             if get_no_more[aud_vid]:
1904                                 formats_info.pop(i)
1905                                 break
1906                             get_no_more[aud_vid] = True
1907
1908             if len(formats_info) == 1:
1909                 return formats_info[0]
1910
1911             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1912             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1913
1914             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1915             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1916
1917             output_ext = self.params.get('merge_output_format')
1918             if not output_ext:
1919                 if the_only_video:
1920                     output_ext = the_only_video['ext']
1921                 elif the_only_audio and not video_fmts:
1922                     output_ext = the_only_audio['ext']
1923                 else:
1924                     output_ext = 'mkv'
1925
1926             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1927
1928             new_dict = {
1929                 'requested_formats': formats_info,
1930                 'format': '+'.join(filtered('format')),
1931                 'format_id': '+'.join(filtered('format_id')),
1932                 'ext': output_ext,
1933                 'protocol': '+'.join(map(determine_protocol, formats_info)),
1934                 'language': '+'.join(orderedSet(filtered('language'))) or None,
1935                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
1936                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
1937                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1938             }
1939
1940             if the_only_video:
1941                 new_dict.update({
1942                     'width': the_only_video.get('width'),
1943                     'height': the_only_video.get('height'),
1944                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1945                     'fps': the_only_video.get('fps'),
1946                     'dynamic_range': the_only_video.get('dynamic_range'),
1947                     'vcodec': the_only_video.get('vcodec'),
1948                     'vbr': the_only_video.get('vbr'),
1949                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1950                 })
1951
1952             if the_only_audio:
1953                 new_dict.update({
1954                     'acodec': the_only_audio.get('acodec'),
1955                     'abr': the_only_audio.get('abr'),
1956                     'asr': the_only_audio.get('asr'),
1957                 })
1958
1959             return new_dict
1960
1961         def _check_formats(formats):
1962             if not check_formats:
1963                 yield from formats
1964                 return
1965             yield from self._check_formats(formats)
1966
1967         def _build_selector_function(selector):
1968             if isinstance(selector, list):  # ,
1969                 fs = [_build_selector_function(s) for s in selector]
1970
1971                 def selector_function(ctx):
1972                     for f in fs:
1973                         yield from f(ctx)
1974                 return selector_function
1975
1976             elif selector.type == GROUP:  # ()
1977                 selector_function = _build_selector_function(selector.selector)
1978
1979             elif selector.type == PICKFIRST:  # /
1980                 fs = [_build_selector_function(s) for s in selector.selector]
1981
1982                 def selector_function(ctx):
1983                     for f in fs:
1984                         picked_formats = list(f(ctx))
1985                         if picked_formats:
1986                             return picked_formats
1987                     return []
1988
1989             elif selector.type == MERGE:  # +
1990                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1991
1992                 def selector_function(ctx):
1993                     for pair in itertools.product(
1994                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1995                         yield _merge(pair)
1996
1997             elif selector.type == SINGLE:  # atom
1998                 format_spec = selector.selector or 'best'
1999
2000                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2001                 if format_spec == 'all':
2002                     def selector_function(ctx):
2003                         yield from _check_formats(ctx['formats'])
2004                 elif format_spec == 'mergeall':
2005                     def selector_function(ctx):
2006                         formats = list(_check_formats(ctx['formats']))
2007                         if not formats:
2008                             return
2009                         merged_format = formats[-1]
2010                         for f in formats[-2::-1]:
2011                             merged_format = _merge((merged_format, f))
2012                         yield merged_format
2013
2014                 else:
2015                     format_fallback, format_reverse, format_idx = False, True, 1
2016                     mobj = re.match(
2017                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2018                         format_spec)
2019                     if mobj is not None:
2020                         format_idx = int_or_none(mobj.group('n'), default=1)
2021                         format_reverse = mobj.group('bw')[0] == 'b'
2022                         format_type = (mobj.group('type') or [None])[0]
2023                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2024                         format_modified = mobj.group('mod') is not None
2025
2026                         format_fallback = not format_type and not format_modified  # for b, w
2027                         _filter_f = (
2028                             (lambda f: f.get('%scodec' % format_type) != 'none')
2029                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2030                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2031                             if format_type  # bv, ba, wv, wa
2032                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2033                             if not format_modified  # b, w
2034                             else lambda f: True)  # b*, w*
2035                         filter_f = lambda f: _filter_f(f) and (
2036                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2037                     else:
2038                         if format_spec in self._format_selection_exts['audio']:
2039                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2040                         elif format_spec in self._format_selection_exts['video']:
2041                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2042                         elif format_spec in self._format_selection_exts['storyboards']:
2043                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2044                         else:
2045                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2046
2047                     def selector_function(ctx):
2048                         formats = list(ctx['formats'])
2049                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2050                         if format_fallback and ctx['incomplete_formats'] and not matches:
2051                             # for extractors with incomplete formats (audio only (soundcloud)
2052                             # or video only (imgur)) best/worst will fallback to
2053                             # best/worst {video,audio}-only format
2054                             matches = formats
2055                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2056                         try:
2057                             yield matches[format_idx - 1]
2058                         except IndexError:
2059                             return
2060
2061             filters = [self._build_format_filter(f) for f in selector.filters]
2062
2063             def final_selector(ctx):
2064                 ctx_copy = copy.deepcopy(ctx)
2065                 for _filter in filters:
2066                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2067                 return selector_function(ctx_copy)
2068             return final_selector
2069
2070         stream = io.BytesIO(format_spec.encode('utf-8'))
2071         try:
2072             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2073         except tokenize.TokenError:
2074             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2075
2076         class TokenIterator(object):
2077             def __init__(self, tokens):
2078                 self.tokens = tokens
2079                 self.counter = 0
2080
2081             def __iter__(self):
2082                 return self
2083
2084             def __next__(self):
2085                 if self.counter >= len(self.tokens):
2086                     raise StopIteration()
2087                 value = self.tokens[self.counter]
2088                 self.counter += 1
2089                 return value
2090
2091             next = __next__
2092
2093             def restore_last_token(self):
2094                 self.counter -= 1
2095
2096         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2097         return _build_selector_function(parsed_selector)
2098
2099     def _calc_headers(self, info_dict):
2100         res = std_headers.copy()
2101
2102         add_headers = info_dict.get('http_headers')
2103         if add_headers:
2104             res.update(add_headers)
2105
2106         cookies = self._calc_cookies(info_dict)
2107         if cookies:
2108             res['Cookie'] = cookies
2109
2110         if 'X-Forwarded-For' not in res:
2111             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2112             if x_forwarded_for_ip:
2113                 res['X-Forwarded-For'] = x_forwarded_for_ip
2114
2115         return res
2116
2117     def _calc_cookies(self, info_dict):
2118         pr = sanitized_Request(info_dict['url'])
2119         self.cookiejar.add_cookie_header(pr)
2120         return pr.get_header('Cookie')
2121
2122     def _sort_thumbnails(self, thumbnails):
2123         thumbnails.sort(key=lambda t: (
2124             t.get('preference') if t.get('preference') is not None else -1,
2125             t.get('width') if t.get('width') is not None else -1,
2126             t.get('height') if t.get('height') is not None else -1,
2127             t.get('id') if t.get('id') is not None else '',
2128             t.get('url')))
2129
2130     def _sanitize_thumbnails(self, info_dict):
2131         thumbnails = info_dict.get('thumbnails')
2132         if thumbnails is None:
2133             thumbnail = info_dict.get('thumbnail')
2134             if thumbnail:
2135                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2136         if not thumbnails:
2137             return
2138
2139         def check_thumbnails(thumbnails):
2140             for t in thumbnails:
2141                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2142                 try:
2143                     self.urlopen(HEADRequest(t['url']))
2144                 except network_exceptions as err:
2145                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2146                     continue
2147                 yield t
2148
2149         self._sort_thumbnails(thumbnails)
2150         for i, t in enumerate(thumbnails):
2151             if t.get('id') is None:
2152                 t['id'] = '%d' % i
2153             if t.get('width') and t.get('height'):
2154                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2155             t['url'] = sanitize_url(t['url'])
2156
2157         if self.params.get('check_formats') is True:
2158             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse()
2159         else:
2160             info_dict['thumbnails'] = thumbnails
2161
2162     def process_video_result(self, info_dict, download=True):
2163         assert info_dict.get('_type', 'video') == 'video'
2164
2165         if 'id' not in info_dict:
2166             raise ExtractorError('Missing "id" field in extractor result')
2167         if 'title' not in info_dict:
2168             raise ExtractorError('Missing "title" field in extractor result',
2169                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2170
2171         def report_force_conversion(field, field_not, conversion):
2172             self.report_warning(
2173                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2174                 % (field, field_not, conversion))
2175
2176         def sanitize_string_field(info, string_field):
2177             field = info.get(string_field)
2178             if field is None or isinstance(field, compat_str):
2179                 return
2180             report_force_conversion(string_field, 'a string', 'string')
2181             info[string_field] = compat_str(field)
2182
2183         def sanitize_numeric_fields(info):
2184             for numeric_field in self._NUMERIC_FIELDS:
2185                 field = info.get(numeric_field)
2186                 if field is None or isinstance(field, compat_numeric_types):
2187                     continue
2188                 report_force_conversion(numeric_field, 'numeric', 'int')
2189                 info[numeric_field] = int_or_none(field)
2190
2191         sanitize_string_field(info_dict, 'id')
2192         sanitize_numeric_fields(info_dict)
2193
2194         if 'playlist' not in info_dict:
2195             # It isn't part of a playlist
2196             info_dict['playlist'] = None
2197             info_dict['playlist_index'] = None
2198
2199         self._sanitize_thumbnails(info_dict)
2200
2201         thumbnail = info_dict.get('thumbnail')
2202         thumbnails = info_dict.get('thumbnails')
2203         if thumbnail:
2204             info_dict['thumbnail'] = sanitize_url(thumbnail)
2205         elif thumbnails:
2206             info_dict['thumbnail'] = thumbnails[-1]['url']
2207
2208         if info_dict.get('display_id') is None and 'id' in info_dict:
2209             info_dict['display_id'] = info_dict['id']
2210
2211         if info_dict.get('duration') is not None:
2212             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2213
2214         for ts_key, date_key in (
2215                 ('timestamp', 'upload_date'),
2216                 ('release_timestamp', 'release_date'),
2217         ):
2218             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2219                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2220                 # see http://bugs.python.org/issue1646728)
2221                 try:
2222                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2223                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2224                 except (ValueError, OverflowError, OSError):
2225                     pass
2226
2227         live_keys = ('is_live', 'was_live')
2228         live_status = info_dict.get('live_status')
2229         if live_status is None:
2230             for key in live_keys:
2231                 if info_dict.get(key) is False:
2232                     continue
2233                 if info_dict.get(key):
2234                     live_status = key
2235                 break
2236             if all(info_dict.get(key) is False for key in live_keys):
2237                 live_status = 'not_live'
2238         if live_status:
2239             info_dict['live_status'] = live_status
2240             for key in live_keys:
2241                 if info_dict.get(key) is None:
2242                     info_dict[key] = (live_status == key)
2243
2244         # Auto generate title fields corresponding to the *_number fields when missing
2245         # in order to always have clean titles. This is very common for TV series.
2246         for field in ('chapter', 'season', 'episode'):
2247             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2248                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2249
2250         for cc_kind in ('subtitles', 'automatic_captions'):
2251             cc = info_dict.get(cc_kind)
2252             if cc:
2253                 for _, subtitle in cc.items():
2254                     for subtitle_format in subtitle:
2255                         if subtitle_format.get('url'):
2256                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2257                         if subtitle_format.get('ext') is None:
2258                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2259
2260         automatic_captions = info_dict.get('automatic_captions')
2261         subtitles = info_dict.get('subtitles')
2262
2263         info_dict['requested_subtitles'] = self.process_subtitles(
2264             info_dict['id'], subtitles, automatic_captions)
2265
2266         if info_dict.get('formats') is None:
2267             # There's only one format available
2268             formats = [info_dict]
2269         else:
2270             formats = info_dict['formats']
2271
2272         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2273         if not self.params.get('allow_unplayable_formats'):
2274             formats = [f for f in formats if not f.get('has_drm')]
2275
2276         if not formats:
2277             self.raise_no_formats(info_dict)
2278
2279         def is_wellformed(f):
2280             url = f.get('url')
2281             if not url:
2282                 self.report_warning(
2283                     '"url" field is missing or empty - skipping format, '
2284                     'there is an error in extractor')
2285                 return False
2286             if isinstance(url, bytes):
2287                 sanitize_string_field(f, 'url')
2288             return True
2289
2290         # Filter out malformed formats for better extraction robustness
2291         formats = list(filter(is_wellformed, formats))
2292
2293         formats_dict = {}
2294
2295         # We check that all the formats have the format and format_id fields
2296         for i, format in enumerate(formats):
2297             sanitize_string_field(format, 'format_id')
2298             sanitize_numeric_fields(format)
2299             format['url'] = sanitize_url(format['url'])
2300             if not format.get('format_id'):
2301                 format['format_id'] = compat_str(i)
2302             else:
2303                 # Sanitize format_id from characters used in format selector expression
2304                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2305             format_id = format['format_id']
2306             if format_id not in formats_dict:
2307                 formats_dict[format_id] = []
2308             formats_dict[format_id].append(format)
2309
2310         # Make sure all formats have unique format_id
2311         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2312         for format_id, ambiguous_formats in formats_dict.items():
2313             ambigious_id = len(ambiguous_formats) > 1
2314             for i, format in enumerate(ambiguous_formats):
2315                 if ambigious_id:
2316                     format['format_id'] = '%s-%d' % (format_id, i)
2317                 if format.get('ext') is None:
2318                     format['ext'] = determine_ext(format['url']).lower()
2319                 # Ensure there is no conflict between id and ext in format selection
2320                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2321                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2322                     format['format_id'] = 'f%s' % format['format_id']
2323
2324         for i, format in enumerate(formats):
2325             if format.get('format') is None:
2326                 format['format'] = '{id} - {res}{note}'.format(
2327                     id=format['format_id'],
2328                     res=self.format_resolution(format),
2329                     note=format_field(format, 'format_note', ' (%s)'),
2330                 )
2331             if format.get('protocol') is None:
2332                 format['protocol'] = determine_protocol(format)
2333             if format.get('resolution') is None:
2334                 format['resolution'] = self.format_resolution(format, default=None)
2335             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2336                 format['dynamic_range'] = 'SDR'
2337             if (info_dict.get('duration') and format.get('tbr')
2338                     and not format.get('filesize') and not format.get('filesize_approx')):
2339                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2340
2341             # Add HTTP headers, so that external programs can use them from the
2342             # json output
2343             full_format_info = info_dict.copy()
2344             full_format_info.update(format)
2345             format['http_headers'] = self._calc_headers(full_format_info)
2346         # Remove private housekeeping stuff
2347         if '__x_forwarded_for_ip' in info_dict:
2348             del info_dict['__x_forwarded_for_ip']
2349
2350         # TODO Central sorting goes here
2351
2352         if self.params.get('check_formats') is True:
2353             formats = LazyList(self._check_formats(formats[::-1])).reverse()
2354
2355         if not formats or formats[0] is not info_dict:
2356             # only set the 'formats' fields if the original info_dict list them
2357             # otherwise we end up with a circular reference, the first (and unique)
2358             # element in the 'formats' field in info_dict is info_dict itself,
2359             # which can't be exported to json
2360             info_dict['formats'] = formats
2361
2362         info_dict, _ = self.pre_process(info_dict)
2363
2364         # The pre-processors may have modified the formats
2365         formats = info_dict.get('formats', [info_dict])
2366
2367         if self.params.get('list_thumbnails'):
2368             self.list_thumbnails(info_dict)
2369         if self.params.get('listformats'):
2370             if not info_dict.get('formats') and not info_dict.get('url'):
2371                 self.to_screen('%s has no formats' % info_dict['id'])
2372             else:
2373                 self.list_formats(info_dict)
2374         if self.params.get('listsubtitles'):
2375             if 'automatic_captions' in info_dict:
2376                 self.list_subtitles(
2377                     info_dict['id'], automatic_captions, 'automatic captions')
2378             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2379         list_only = self.params.get('simulate') is None and (
2380             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2381         if list_only:
2382             # Without this printing, -F --print-json will not work
2383             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2384             return
2385
2386         format_selector = self.format_selector
2387         if format_selector is None:
2388             req_format = self._default_format_spec(info_dict, download=download)
2389             self.write_debug('Default format spec: %s' % req_format)
2390             format_selector = self.build_format_selector(req_format)
2391
2392         # While in format selection we may need to have an access to the original
2393         # format set in order to calculate some metrics or do some processing.
2394         # For now we need to be able to guess whether original formats provided
2395         # by extractor are incomplete or not (i.e. whether extractor provides only
2396         # video-only or audio-only formats) for proper formats selection for
2397         # extractors with such incomplete formats (see
2398         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2399         # Since formats may be filtered during format selection and may not match
2400         # the original formats the results may be incorrect. Thus original formats
2401         # or pre-calculated metrics should be passed to format selection routines
2402         # as well.
2403         # We will pass a context object containing all necessary additional data
2404         # instead of just formats.
2405         # This fixes incorrect format selection issue (see
2406         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2407         incomplete_formats = (
2408             # All formats are video-only or
2409             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2410             # all formats are audio-only
2411             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2412
2413         ctx = {
2414             'formats': formats,
2415             'incomplete_formats': incomplete_formats,
2416         }
2417
2418         formats_to_download = list(format_selector(ctx))
2419         if not formats_to_download:
2420             if not self.params.get('ignore_no_formats_error'):
2421                 raise ExtractorError('Requested format is not available', expected=True,
2422                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2423             else:
2424                 self.report_warning('Requested format is not available')
2425                 # Process what we can, even without any available formats.
2426                 self.process_info(dict(info_dict))
2427         elif download:
2428             self.to_screen(
2429                 '[info] %s: Downloading %d format(s): %s' % (
2430                     info_dict['id'], len(formats_to_download),
2431                     ", ".join([f['format_id'] for f in formats_to_download])))
2432             for fmt in formats_to_download:
2433                 new_info = dict(info_dict)
2434                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2435                 new_info['__original_infodict'] = info_dict
2436                 new_info.update(fmt)
2437                 self.process_info(new_info)
2438         # We update the info dict with the selected best quality format (backwards compatibility)
2439         if formats_to_download:
2440             info_dict.update(formats_to_download[-1])
2441         return info_dict
2442
2443     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2444         """Select the requested subtitles and their format"""
2445         available_subs = {}
2446         if normal_subtitles and self.params.get('writesubtitles'):
2447             available_subs.update(normal_subtitles)
2448         if automatic_captions and self.params.get('writeautomaticsub'):
2449             for lang, cap_info in automatic_captions.items():
2450                 if lang not in available_subs:
2451                     available_subs[lang] = cap_info
2452
2453         if (not self.params.get('writesubtitles') and not
2454                 self.params.get('writeautomaticsub') or not
2455                 available_subs):
2456             return None
2457
2458         all_sub_langs = available_subs.keys()
2459         if self.params.get('allsubtitles', False):
2460             requested_langs = all_sub_langs
2461         elif self.params.get('subtitleslangs', False):
2462             # A list is used so that the order of languages will be the same as
2463             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2464             requested_langs = []
2465             for lang_re in self.params.get('subtitleslangs'):
2466                 if lang_re == 'all':
2467                     requested_langs.extend(all_sub_langs)
2468                     continue
2469                 discard = lang_re[0] == '-'
2470                 if discard:
2471                     lang_re = lang_re[1:]
2472                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2473                 if discard:
2474                     for lang in current_langs:
2475                         while lang in requested_langs:
2476                             requested_langs.remove(lang)
2477                 else:
2478                     requested_langs.extend(current_langs)
2479             requested_langs = orderedSet(requested_langs)
2480         elif 'en' in available_subs:
2481             requested_langs = ['en']
2482         else:
2483             requested_langs = [list(all_sub_langs)[0]]
2484         if requested_langs:
2485             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2486
2487         formats_query = self.params.get('subtitlesformat', 'best')
2488         formats_preference = formats_query.split('/') if formats_query else []
2489         subs = {}
2490         for lang in requested_langs:
2491             formats = available_subs.get(lang)
2492             if formats is None:
2493                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2494                 continue
2495             for ext in formats_preference:
2496                 if ext == 'best':
2497                     f = formats[-1]
2498                     break
2499                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2500                 if matches:
2501                     f = matches[-1]
2502                     break
2503             else:
2504                 f = formats[-1]
2505                 self.report_warning(
2506                     'No subtitle format found matching "%s" for language %s, '
2507                     'using %s' % (formats_query, lang, f['ext']))
2508             subs[lang] = f
2509         return subs
2510
2511     def __forced_printings(self, info_dict, filename, incomplete):
2512         def print_mandatory(field, actual_field=None):
2513             if actual_field is None:
2514                 actual_field = field
2515             if (self.params.get('force%s' % field, False)
2516                     and (not incomplete or info_dict.get(actual_field) is not None)):
2517                 self.to_stdout(info_dict[actual_field])
2518
2519         def print_optional(field):
2520             if (self.params.get('force%s' % field, False)
2521                     and info_dict.get(field) is not None):
2522                 self.to_stdout(info_dict[field])
2523
2524         info_dict = info_dict.copy()
2525         if filename is not None:
2526             info_dict['filename'] = filename
2527         if info_dict.get('requested_formats') is not None:
2528             # For RTMP URLs, also include the playpath
2529             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2530         elif 'url' in info_dict:
2531             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2532
2533         if self.params.get('forceprint') or self.params.get('forcejson'):
2534             self.post_extract(info_dict)
2535         for tmpl in self.params.get('forceprint', []):
2536             mobj = re.match(r'\w+(=?)$', tmpl)
2537             if mobj and mobj.group(1):
2538                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2539             elif mobj:
2540                 tmpl = '%({})s'.format(tmpl)
2541             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2542
2543         print_mandatory('title')
2544         print_mandatory('id')
2545         print_mandatory('url', 'urls')
2546         print_optional('thumbnail')
2547         print_optional('description')
2548         print_optional('filename')
2549         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2550             self.to_stdout(formatSeconds(info_dict['duration']))
2551         print_mandatory('format')
2552
2553         if self.params.get('forcejson'):
2554             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2555
2556     def dl(self, name, info, subtitle=False, test=False):
2557         if not info.get('url'):
2558             self.raise_no_formats(info, True)
2559
2560         if test:
2561             verbose = self.params.get('verbose')
2562             params = {
2563                 'test': True,
2564                 'quiet': self.params.get('quiet') or not verbose,
2565                 'verbose': verbose,
2566                 'noprogress': not verbose,
2567                 'nopart': True,
2568                 'skip_unavailable_fragments': False,
2569                 'keep_fragments': False,
2570                 'overwrites': True,
2571                 '_no_ytdl_file': True,
2572             }
2573         else:
2574             params = self.params
2575         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2576         if not test:
2577             for ph in self._progress_hooks:
2578                 fd.add_progress_hook(ph)
2579             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2580             self.write_debug('Invoking downloader on "%s"' % urls)
2581
2582         new_info = copy.deepcopy(self._copy_infodict(info))
2583         if new_info.get('http_headers') is None:
2584             new_info['http_headers'] = self._calc_headers(new_info)
2585         return fd.download(name, new_info, subtitle)
2586
2587     def process_info(self, info_dict):
2588         """Process a single resolved IE result."""
2589
2590         assert info_dict.get('_type', 'video') == 'video'
2591
2592         max_downloads = self.params.get('max_downloads')
2593         if max_downloads is not None:
2594             if self._num_downloads >= int(max_downloads):
2595                 raise MaxDownloadsReached()
2596
2597         # TODO: backward compatibility, to be removed
2598         info_dict['fulltitle'] = info_dict['title']
2599
2600         if 'format' not in info_dict and 'ext' in info_dict:
2601             info_dict['format'] = info_dict['ext']
2602
2603         if self._match_entry(info_dict) is not None:
2604             return
2605
2606         self.post_extract(info_dict)
2607         self._num_downloads += 1
2608
2609         # info_dict['_filename'] needs to be set for backward compatibility
2610         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2611         temp_filename = self.prepare_filename(info_dict, 'temp')
2612         files_to_move = {}
2613
2614         # Forced printings
2615         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2616
2617         if self.params.get('simulate'):
2618             if self.params.get('force_write_download_archive', False):
2619                 self.record_download_archive(info_dict)
2620             # Do nothing else if in simulate mode
2621             return
2622
2623         if full_filename is None:
2624             return
2625         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2626             return
2627         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2628             return
2629
2630         if self._write_description('video', info_dict,
2631                                    self.prepare_filename(info_dict, 'description')) is None:
2632             return
2633
2634         sub_files = self._write_subtitles(info_dict, temp_filename)
2635         if sub_files is None:
2636             return
2637         files_to_move.update(dict(sub_files))
2638
2639         thumb_files = self._write_thumbnails(
2640             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2641         if thumb_files is None:
2642             return
2643         files_to_move.update(dict(thumb_files))
2644
2645         infofn = self.prepare_filename(info_dict, 'infojson')
2646         _infojson_written = self._write_info_json('video', info_dict, infofn)
2647         if _infojson_written:
2648             info_dict['__infojson_filename'] = infofn
2649         elif _infojson_written is None:
2650             return
2651
2652         # Note: Annotations are deprecated
2653         annofn = None
2654         if self.params.get('writeannotations', False):
2655             annofn = self.prepare_filename(info_dict, 'annotation')
2656         if annofn:
2657             if not self._ensure_dir_exists(encodeFilename(annofn)):
2658                 return
2659             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2660                 self.to_screen('[info] Video annotations are already present')
2661             elif not info_dict.get('annotations'):
2662                 self.report_warning('There are no annotations to write.')
2663             else:
2664                 try:
2665                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2666                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2667                         annofile.write(info_dict['annotations'])
2668                 except (KeyError, TypeError):
2669                     self.report_warning('There are no annotations to write.')
2670                 except (OSError, IOError):
2671                     self.report_error('Cannot write annotations file: ' + annofn)
2672                     return
2673
2674         # Write internet shortcut files
2675         def _write_link_file(link_type):
2676             if 'webpage_url' not in info_dict:
2677                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2678                 return False
2679             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2680             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2681                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2682                 return True
2683             try:
2684                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2685                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2686                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2687                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2688                     if link_type == 'desktop':
2689                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2690                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2691             except (OSError, IOError):
2692                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2693                 return False
2694             return True
2695
2696         write_links = {
2697             'url': self.params.get('writeurllink'),
2698             'webloc': self.params.get('writewebloclink'),
2699             'desktop': self.params.get('writedesktoplink'),
2700         }
2701         if self.params.get('writelink'):
2702             link_type = ('webloc' if sys.platform == 'darwin'
2703                          else 'desktop' if sys.platform.startswith('linux')
2704                          else 'url')
2705             write_links[link_type] = True
2706
2707         if any(should_write and not _write_link_file(link_type)
2708                for link_type, should_write in write_links.items()):
2709             return
2710
2711         try:
2712             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2713         except PostProcessingError as err:
2714             self.report_error('Preprocessing: %s' % str(err))
2715             return
2716
2717         must_record_download_archive = False
2718         if self.params.get('skip_download', False):
2719             info_dict['filepath'] = temp_filename
2720             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2721             info_dict['__files_to_move'] = files_to_move
2722             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2723         else:
2724             # Download
2725             info_dict.setdefault('__postprocessors', [])
2726             try:
2727
2728                 def existing_file(*filepaths):
2729                     ext = info_dict.get('ext')
2730                     final_ext = self.params.get('final_ext', ext)
2731                     existing_files = []
2732                     for file in orderedSet(filepaths):
2733                         if final_ext != ext:
2734                             converted = replace_extension(file, final_ext, ext)
2735                             if os.path.exists(encodeFilename(converted)):
2736                                 existing_files.append(converted)
2737                         if os.path.exists(encodeFilename(file)):
2738                             existing_files.append(file)
2739
2740                     if not existing_files or self.params.get('overwrites', False):
2741                         for file in orderedSet(existing_files):
2742                             self.report_file_delete(file)
2743                             os.remove(encodeFilename(file))
2744                         return None
2745
2746                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2747                     return existing_files[0]
2748
2749                 success = True
2750                 if info_dict.get('requested_formats') is not None:
2751
2752                     def compatible_formats(formats):
2753                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2754                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2755                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2756                         if len(video_formats) > 2 or len(audio_formats) > 2:
2757                             return False
2758
2759                         # Check extension
2760                         exts = set(format.get('ext') for format in formats)
2761                         COMPATIBLE_EXTS = (
2762                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2763                             set(('webm',)),
2764                         )
2765                         for ext_sets in COMPATIBLE_EXTS:
2766                             if ext_sets.issuperset(exts):
2767                                 return True
2768                         # TODO: Check acodec/vcodec
2769                         return False
2770
2771                     requested_formats = info_dict['requested_formats']
2772                     old_ext = info_dict['ext']
2773                     if self.params.get('merge_output_format') is None:
2774                         if not compatible_formats(requested_formats):
2775                             info_dict['ext'] = 'mkv'
2776                             self.report_warning(
2777                                 'Requested formats are incompatible for merge and will be merged into mkv')
2778                         if (info_dict['ext'] == 'webm'
2779                                 and info_dict.get('thumbnails')
2780                                 # check with type instead of pp_key, __name__, or isinstance
2781                                 # since we dont want any custom PPs to trigger this
2782                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2783                             info_dict['ext'] = 'mkv'
2784                             self.report_warning(
2785                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2786                     new_ext = info_dict['ext']
2787
2788                     def correct_ext(filename, ext=new_ext):
2789                         if filename == '-':
2790                             return filename
2791                         filename_real_ext = os.path.splitext(filename)[1][1:]
2792                         filename_wo_ext = (
2793                             os.path.splitext(filename)[0]
2794                             if filename_real_ext in (old_ext, new_ext)
2795                             else filename)
2796                         return '%s.%s' % (filename_wo_ext, ext)
2797
2798                     # Ensure filename always has a correct extension for successful merge
2799                     full_filename = correct_ext(full_filename)
2800                     temp_filename = correct_ext(temp_filename)
2801                     dl_filename = existing_file(full_filename, temp_filename)
2802                     info_dict['__real_download'] = False
2803
2804                     if dl_filename is not None:
2805                         self.report_file_already_downloaded(dl_filename)
2806                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2807                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2808                         success, real_download = self.dl(temp_filename, info_dict)
2809                         info_dict['__real_download'] = real_download
2810                     else:
2811                         downloaded = []
2812                         merger = FFmpegMergerPP(self)
2813                         if self.params.get('allow_unplayable_formats'):
2814                             self.report_warning(
2815                                 'You have requested merging of multiple formats '
2816                                 'while also allowing unplayable formats to be downloaded. '
2817                                 'The formats won\'t be merged to prevent data corruption.')
2818                         elif not merger.available:
2819                             self.report_warning(
2820                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2821                                 'The formats won\'t be merged.')
2822
2823                         if temp_filename == '-':
2824                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2825                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2826                                       else 'but ffmpeg is not installed')
2827                             self.report_warning(
2828                                 f'You have requested downloading multiple formats to stdout {reason}. '
2829                                 'The formats will be streamed one after the other')
2830                             fname = temp_filename
2831                         for f in requested_formats:
2832                             new_info = dict(info_dict)
2833                             del new_info['requested_formats']
2834                             new_info.update(f)
2835                             if temp_filename != '-':
2836                                 fname = prepend_extension(
2837                                     correct_ext(temp_filename, new_info['ext']),
2838                                     'f%s' % f['format_id'], new_info['ext'])
2839                                 if not self._ensure_dir_exists(fname):
2840                                     return
2841                                 f['filepath'] = fname
2842                                 downloaded.append(fname)
2843                             partial_success, real_download = self.dl(fname, new_info)
2844                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2845                             success = success and partial_success
2846                         if merger.available and not self.params.get('allow_unplayable_formats'):
2847                             info_dict['__postprocessors'].append(merger)
2848                             info_dict['__files_to_merge'] = downloaded
2849                             # Even if there were no downloads, it is being merged only now
2850                             info_dict['__real_download'] = True
2851                         else:
2852                             for file in downloaded:
2853                                 files_to_move[file] = None
2854                 else:
2855                     # Just a single file
2856                     dl_filename = existing_file(full_filename, temp_filename)
2857                     if dl_filename is None or dl_filename == temp_filename:
2858                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2859                         # So we should try to resume the download
2860                         success, real_download = self.dl(temp_filename, info_dict)
2861                         info_dict['__real_download'] = real_download
2862                     else:
2863                         self.report_file_already_downloaded(dl_filename)
2864
2865                 dl_filename = dl_filename or temp_filename
2866                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2867
2868             except network_exceptions as err:
2869                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2870                 return
2871             except (OSError, IOError) as err:
2872                 raise UnavailableVideoError(err)
2873             except (ContentTooShortError, ) as err:
2874                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2875                 return
2876
2877             if success and full_filename != '-':
2878
2879                 def fixup():
2880                     do_fixup = True
2881                     fixup_policy = self.params.get('fixup')
2882                     vid = info_dict['id']
2883
2884                     if fixup_policy in ('ignore', 'never'):
2885                         return
2886                     elif fixup_policy == 'warn':
2887                         do_fixup = False
2888                     elif fixup_policy != 'force':
2889                         assert fixup_policy in ('detect_or_warn', None)
2890                         if not info_dict.get('__real_download'):
2891                             do_fixup = False
2892
2893                     def ffmpeg_fixup(cndn, msg, cls):
2894                         if not cndn:
2895                             return
2896                         if not do_fixup:
2897                             self.report_warning(f'{vid}: {msg}')
2898                             return
2899                         pp = cls(self)
2900                         if pp.available:
2901                             info_dict['__postprocessors'].append(pp)
2902                         else:
2903                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2904
2905                     stretched_ratio = info_dict.get('stretched_ratio')
2906                     ffmpeg_fixup(
2907                         stretched_ratio not in (1, None),
2908                         f'Non-uniform pixel ratio {stretched_ratio}',
2909                         FFmpegFixupStretchedPP)
2910
2911                     ffmpeg_fixup(
2912                         (info_dict.get('requested_formats') is None
2913                          and info_dict.get('container') == 'm4a_dash'
2914                          and info_dict.get('ext') == 'm4a'),
2915                         'writing DASH m4a. Only some players support this container',
2916                         FFmpegFixupM4aPP)
2917
2918                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2919                     downloader = downloader.__name__ if downloader else None
2920                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2921                                  'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2922                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2923                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2924
2925                 fixup()
2926                 try:
2927                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2928                 except PostProcessingError as err:
2929                     self.report_error('Postprocessing: %s' % str(err))
2930                     return
2931                 try:
2932                     for ph in self._post_hooks:
2933                         ph(info_dict['filepath'])
2934                 except Exception as err:
2935                     self.report_error('post hooks: %s' % str(err))
2936                     return
2937                 must_record_download_archive = True
2938
2939         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2940             self.record_download_archive(info_dict)
2941         max_downloads = self.params.get('max_downloads')
2942         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2943             raise MaxDownloadsReached()
2944
2945     def __download_wrapper(self, func):
2946         @functools.wraps(func)
2947         def wrapper(*args, **kwargs):
2948             try:
2949                 res = func(*args, **kwargs)
2950             except UnavailableVideoError as e:
2951                 self.report_error(e)
2952             except DownloadCancelled as e:
2953                 self.to_screen(f'[info] {e}')
2954                 raise
2955             else:
2956                 if self.params.get('dump_single_json', False):
2957                     self.post_extract(res)
2958                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2959         return wrapper
2960
2961     def download(self, url_list):
2962         """Download a given list of URLs."""
2963         url_list = variadic(url_list)  # Passing a single URL is a common mistake
2964         outtmpl = self.outtmpl_dict['default']
2965         if (len(url_list) > 1
2966                 and outtmpl != '-'
2967                 and '%' not in outtmpl
2968                 and self.params.get('max_downloads') != 1):
2969             raise SameFileError(outtmpl)
2970
2971         for url in url_list:
2972             self.__download_wrapper(self.extract_info)(
2973                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2974
2975         return self._download_retcode
2976
2977     def download_with_info_file(self, info_filename):
2978         with contextlib.closing(fileinput.FileInput(
2979                 [info_filename], mode='r',
2980                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2981             # FileInput doesn't have a read method, we can't call json.load
2982             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2983         try:
2984             self.__download_wrapper(self.process_ie_result)(info, download=True)
2985         except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e:
2986             self.to_stderr('\r')
2987             webpage_url = info.get('webpage_url')
2988             if webpage_url is not None:
2989                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
2990                 return self.download([webpage_url])
2991             else:
2992                 raise
2993         return self._download_retcode
2994
2995     @staticmethod
2996     def sanitize_info(info_dict, remove_private_keys=False):
2997         ''' Sanitize the infodict for converting to json '''
2998         if info_dict is None:
2999             return info_dict
3000         info_dict.setdefault('epoch', int(time.time()))
3001         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3002         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3003         if remove_private_keys:
3004             remove_keys |= {
3005                 'requested_formats', 'requested_subtitles', 'requested_entries',
3006                 'filepath', 'entries', 'original_url', 'playlist_autonumber',
3007             }
3008             empty_values = (None, {}, [], set(), tuple())
3009             reject = lambda k, v: k not in keep_keys and (
3010                 k.startswith('_') or k in remove_keys or v in empty_values)
3011         else:
3012             reject = lambda k, v: k in remove_keys
3013         filter_fn = lambda obj: (
3014             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3015             else obj if not isinstance(obj, dict)
3016             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3017         return filter_fn(info_dict)
3018
3019     @staticmethod
3020     def filter_requested_info(info_dict, actually_filter=True):
3021         ''' Alias of sanitize_info for backward compatibility '''
3022         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3023
3024     def run_pp(self, pp, infodict):
3025         files_to_delete = []
3026         if '__files_to_move' not in infodict:
3027             infodict['__files_to_move'] = {}
3028         try:
3029             files_to_delete, infodict = pp.run(infodict)
3030         except PostProcessingError as e:
3031             # Must be True and not 'only_download'
3032             if self.params.get('ignoreerrors') is True:
3033                 self.report_error(e)
3034                 return infodict
3035             raise
3036
3037         if not files_to_delete:
3038             return infodict
3039         if self.params.get('keepvideo', False):
3040             for f in files_to_delete:
3041                 infodict['__files_to_move'].setdefault(f, '')
3042         else:
3043             for old_filename in set(files_to_delete):
3044                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3045                 try:
3046                     os.remove(encodeFilename(old_filename))
3047                 except (IOError, OSError):
3048                     self.report_warning('Unable to remove downloaded original file')
3049                 if old_filename in infodict['__files_to_move']:
3050                     del infodict['__files_to_move'][old_filename]
3051         return infodict
3052
3053     @staticmethod
3054     def post_extract(info_dict):
3055         def actual_post_extract(info_dict):
3056             if info_dict.get('_type') in ('playlist', 'multi_video'):
3057                 for video_dict in info_dict.get('entries', {}):
3058                     actual_post_extract(video_dict or {})
3059                 return
3060
3061             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3062             extra = post_extractor().items()
3063             info_dict.update(extra)
3064             info_dict.pop('__post_extractor', None)
3065
3066             original_infodict = info_dict.get('__original_infodict') or {}
3067             original_infodict.update(extra)
3068             original_infodict.pop('__post_extractor', None)
3069
3070         actual_post_extract(info_dict or {})
3071
3072     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3073         info = dict(ie_info)
3074         info['__files_to_move'] = files_to_move or {}
3075         for pp in self._pps[key]:
3076             info = self.run_pp(pp, info)
3077         return info, info.pop('__files_to_move', None)
3078
3079     def post_process(self, filename, ie_info, files_to_move=None):
3080         """Run all the postprocessors on the given file."""
3081         info = dict(ie_info)
3082         info['filepath'] = filename
3083         info['__files_to_move'] = files_to_move or {}
3084
3085         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3086             info = self.run_pp(pp, info)
3087         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3088         del info['__files_to_move']
3089         for pp in self._pps['after_move']:
3090             info = self.run_pp(pp, info)
3091         return info
3092
3093     def _make_archive_id(self, info_dict):
3094         video_id = info_dict.get('id')
3095         if not video_id:
3096             return
3097         # Future-proof against any change in case
3098         # and backwards compatibility with prior versions
3099         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3100         if extractor is None:
3101             url = str_or_none(info_dict.get('url'))
3102             if not url:
3103                 return
3104             # Try to find matching extractor for the URL and take its ie_key
3105             for ie_key, ie in self._ies.items():
3106                 if ie.suitable(url):
3107                     extractor = ie_key
3108                     break
3109             else:
3110                 return
3111         return '%s %s' % (extractor.lower(), video_id)
3112
3113     def in_download_archive(self, info_dict):
3114         fn = self.params.get('download_archive')
3115         if fn is None:
3116             return False
3117
3118         vid_id = self._make_archive_id(info_dict)
3119         if not vid_id:
3120             return False  # Incomplete video information
3121
3122         return vid_id in self.archive
3123
3124     def record_download_archive(self, info_dict):
3125         fn = self.params.get('download_archive')
3126         if fn is None:
3127             return
3128         vid_id = self._make_archive_id(info_dict)
3129         assert vid_id
3130         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3131             archive_file.write(vid_id + '\n')
3132         self.archive.add(vid_id)
3133
3134     @staticmethod
3135     def format_resolution(format, default='unknown'):
3136         is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
3137         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3138             return 'audio only'
3139         if format.get('resolution') is not None:
3140             return format['resolution']
3141         if format.get('width') and format.get('height'):
3142             res = '%dx%d' % (format['width'], format['height'])
3143         elif format.get('height'):
3144             res = '%sp' % format['height']
3145         elif format.get('width'):
3146             res = '%dx?' % format['width']
3147         elif is_images:
3148             return 'images'
3149         else:
3150             return default
3151         return f'{res} images' if is_images else res
3152
3153     def _format_note(self, fdict):
3154         res = ''
3155         if fdict.get('ext') in ['f4f', 'f4m']:
3156             res += '(unsupported) '
3157         if fdict.get('language'):
3158             if res:
3159                 res += ' '
3160             res += '[%s] ' % fdict['language']
3161         if fdict.get('format_note') is not None:
3162             res += fdict['format_note'] + ' '
3163         if fdict.get('tbr') is not None:
3164             res += '%4dk ' % fdict['tbr']
3165         if fdict.get('container') is not None:
3166             if res:
3167                 res += ', '
3168             res += '%s container' % fdict['container']
3169         if (fdict.get('vcodec') is not None
3170                 and fdict.get('vcodec') != 'none'):
3171             if res:
3172                 res += ', '
3173             res += fdict['vcodec']
3174             if fdict.get('vbr') is not None:
3175                 res += '@'
3176         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3177             res += 'video@'
3178         if fdict.get('vbr') is not None:
3179             res += '%4dk' % fdict['vbr']
3180         if fdict.get('fps') is not None:
3181             if res:
3182                 res += ', '
3183             res += '%sfps' % fdict['fps']
3184         if fdict.get('acodec') is not None:
3185             if res:
3186                 res += ', '
3187             if fdict['acodec'] == 'none':
3188                 res += 'video only'
3189             else:
3190                 res += '%-5s' % fdict['acodec']
3191         elif fdict.get('abr') is not None:
3192             if res:
3193                 res += ', '
3194             res += 'audio'
3195         if fdict.get('abr') is not None:
3196             res += '@%3dk' % fdict['abr']
3197         if fdict.get('asr') is not None:
3198             res += ' (%5dHz)' % fdict['asr']
3199         if fdict.get('filesize') is not None:
3200             if res:
3201                 res += ', '
3202             res += format_bytes(fdict['filesize'])
3203         elif fdict.get('filesize_approx') is not None:
3204             if res:
3205                 res += ', '
3206             res += '~' + format_bytes(fdict['filesize_approx'])
3207         return res
3208
3209     def _list_format_headers(self, *headers):
3210         if self.params.get('listformats_table', True) is not False:
3211             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3212         return headers
3213
3214     def list_formats(self, info_dict):
3215         formats = info_dict.get('formats', [info_dict])
3216         new_format = self.params.get('listformats_table', True) is not False
3217         if new_format:
3218             tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats))
3219             vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats))
3220             abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats))
3221             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3222             table = [
3223                 [
3224                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3225                     format_field(f, 'ext'),
3226                     self.format_resolution(f),
3227                     format_field(f, 'fps', '%3d'),
3228                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3229                     delim,
3230                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3231                     format_field(f, 'tbr', f'%{tbr_digits}dk'),
3232                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3233                     delim,
3234                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3235                     format_field(f, 'vbr', f'%{vbr_digits}dk'),
3236                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3237                     format_field(f, 'abr', f'%{abr_digits}dk'),
3238                     format_field(f, 'asr', '%5dHz'),
3239                     join_nonempty(
3240                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3241                         format_field(f, 'language', '[%s]'),
3242                         format_field(f, 'format_note'),
3243                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3244                         delim=', '),
3245                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3246             header_line = self._list_format_headers(
3247                 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', '  TBR', 'PROTO',
3248                 delim, 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO')
3249         else:
3250             table = [
3251                 [
3252                     format_field(f, 'format_id'),
3253                     format_field(f, 'ext'),
3254                     self.format_resolution(f),
3255                     self._format_note(f)]
3256                 for f in formats
3257                 if f.get('preference') is None or f['preference'] >= -1000]
3258             header_line = ['format code', 'extension', 'resolution', 'note']
3259
3260         self.to_screen(
3261             '[info] Available formats for %s:' % info_dict['id'])
3262         self.to_stdout(render_table(
3263             header_line, table,
3264             extraGap=(0 if new_format else 1),
3265             hideEmpty=new_format,
3266             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3267
3268     def list_thumbnails(self, info_dict):
3269         thumbnails = list(info_dict.get('thumbnails'))
3270         if not thumbnails:
3271             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3272             return
3273
3274         self.to_screen(
3275             '[info] Thumbnails for %s:' % info_dict['id'])
3276         self.to_stdout(render_table(
3277             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3278             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3279
3280     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3281         if not subtitles:
3282             self.to_screen('%s has no %s' % (video_id, name))
3283             return
3284         self.to_screen(
3285             'Available %s for %s:' % (name, video_id))
3286
3287         def _row(lang, formats):
3288             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3289             if len(set(names)) == 1:
3290                 names = [] if names[0] == 'unknown' else names[:1]
3291             return [lang, ', '.join(names), ', '.join(exts)]
3292
3293         self.to_stdout(render_table(
3294             self._list_format_headers('Language', 'Name', 'Formats'),
3295             [_row(lang, formats) for lang, formats in subtitles.items()],
3296             hideEmpty=True))
3297
3298     def urlopen(self, req):
3299         """ Start an HTTP download """
3300         if isinstance(req, compat_basestring):
3301             req = sanitized_Request(req)
3302         return self._opener.open(req, timeout=self._socket_timeout)
3303
3304     def print_debug_header(self):
3305         if not self.params.get('verbose'):
3306             return
3307
3308         def get_encoding(stream):
3309             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3310             if not supports_terminal_sequences(stream):
3311                 ret += ' (No ANSI)'
3312             return ret
3313
3314         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3315             locale.getpreferredencoding(),
3316             sys.getfilesystemencoding(),
3317             get_encoding(self._screen_file), get_encoding(self._err_file),
3318             self.get_encoding())
3319
3320         logger = self.params.get('logger')
3321         if logger:
3322             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3323             write_debug(encoding_str)
3324         else:
3325             write_string(f'[debug] {encoding_str}\n', encoding=None)
3326             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3327
3328         source = detect_variant()
3329         write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
3330         if not _LAZY_LOADER:
3331             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3332                 write_debug('Lazy loading extractors is forcibly disabled')
3333             else:
3334                 write_debug('Lazy loading extractors is disabled')
3335         if plugin_extractors or plugin_postprocessors:
3336             write_debug('Plugins: %s' % [
3337                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3338                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3339         if self.params.get('compat_opts'):
3340             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3341         try:
3342             sp = Popen(
3343                 ['git', 'rev-parse', '--short', 'HEAD'],
3344                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3345                 cwd=os.path.dirname(os.path.abspath(__file__)))
3346             out, err = sp.communicate_or_kill()
3347             out = out.decode().strip()
3348             if re.match('[0-9a-f]+', out):
3349                 write_debug('Git HEAD: %s' % out)
3350         except Exception:
3351             try:
3352                 sys.exc_clear()
3353             except Exception:
3354                 pass
3355
3356         def python_implementation():
3357             impl_name = platform.python_implementation()
3358             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3359                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3360             return impl_name
3361
3362         write_debug('Python version %s (%s %s) - %s' % (
3363             platform.python_version(),
3364             python_implementation(),
3365             platform.architecture()[0],
3366             platform_name()))
3367
3368         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3369         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3370         if ffmpeg_features:
3371             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3372
3373         exe_versions['rtmpdump'] = rtmpdump_version()
3374         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3375         exe_str = ', '.join(
3376             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3377         ) or 'none'
3378         write_debug('exe versions: %s' % exe_str)
3379
3380         from .downloader.websocket import has_websockets
3381         from .postprocessor.embedthumbnail import has_mutagen
3382         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3383
3384         lib_str = join_nonempty(
3385             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3386             KEYRING_AVAILABLE and 'keyring',
3387             has_mutagen and 'mutagen',
3388             SQLITE_AVAILABLE and 'sqlite',
3389             has_websockets and 'websockets',
3390             delim=', ') or 'none'
3391         write_debug('Optional libraries: %s' % lib_str)
3392
3393         proxy_map = {}
3394         for handler in self._opener.handlers:
3395             if hasattr(handler, 'proxies'):
3396                 proxy_map.update(handler.proxies)
3397         write_debug(f'Proxy map: {proxy_map}')
3398
3399         # Not implemented
3400         if False and self.params.get('call_home'):
3401             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3402             write_debug('Public IP address: %s' % ipaddr)
3403             latest_version = self.urlopen(
3404                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3405             if version_tuple(latest_version) > version_tuple(__version__):
3406                 self.report_warning(
3407                     'You are using an outdated version (newest version: %s)! '
3408                     'See https://yt-dl.org/update if you need help updating.' %
3409                     latest_version)
3410
3411     def _setup_opener(self):
3412         timeout_val = self.params.get('socket_timeout')
3413         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3414
3415         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3416         opts_cookiefile = self.params.get('cookiefile')
3417         opts_proxy = self.params.get('proxy')
3418
3419         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3420
3421         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3422         if opts_proxy is not None:
3423             if opts_proxy == '':
3424                 proxies = {}
3425             else:
3426                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3427         else:
3428             proxies = compat_urllib_request.getproxies()
3429             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3430             if 'http' in proxies and 'https' not in proxies:
3431                 proxies['https'] = proxies['http']
3432         proxy_handler = PerRequestProxyHandler(proxies)
3433
3434         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3435         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3436         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3437         redirect_handler = YoutubeDLRedirectHandler()
3438         data_handler = compat_urllib_request_DataHandler()
3439
3440         # When passing our own FileHandler instance, build_opener won't add the
3441         # default FileHandler and allows us to disable the file protocol, which
3442         # can be used for malicious purposes (see
3443         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3444         file_handler = compat_urllib_request.FileHandler()
3445
3446         def file_open(*args, **kwargs):
3447             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3448         file_handler.file_open = file_open
3449
3450         opener = compat_urllib_request.build_opener(
3451             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3452
3453         # Delete the default user-agent header, which would otherwise apply in
3454         # cases where our custom HTTP handler doesn't come into play
3455         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3456         opener.addheaders = []
3457         self._opener = opener
3458
3459     def encode(self, s):
3460         if isinstance(s, bytes):
3461             return s  # Already encoded
3462
3463         try:
3464             return s.encode(self.get_encoding())
3465         except UnicodeEncodeError as err:
3466             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3467             raise
3468
3469     def get_encoding(self):
3470         encoding = self.params.get('encoding')
3471         if encoding is None:
3472             encoding = preferredencoding()
3473         return encoding
3474
3475     def _write_info_json(self, label, ie_result, infofn):
3476         ''' Write infojson and returns True = written, False = skip, None = error '''
3477         if not self.params.get('writeinfojson'):
3478             return False
3479         elif not infofn:
3480             self.write_debug(f'Skipping writing {label} infojson')
3481             return False
3482         elif not self._ensure_dir_exists(infofn):
3483             return None
3484         elif not self.params.get('overwrites', True) and os.path.exists(infofn):
3485             self.to_screen(f'[info] {label.title()} metadata is already present')
3486         else:
3487             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3488             try:
3489                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3490             except (OSError, IOError):
3491                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3492                 return None
3493         return True
3494
3495     def _write_description(self, label, ie_result, descfn):
3496         ''' Write description and returns True = written, False = skip, None = error '''
3497         if not self.params.get('writedescription'):
3498             return False
3499         elif not descfn:
3500             self.write_debug(f'Skipping writing {label} description')
3501             return False
3502         elif not self._ensure_dir_exists(descfn):
3503             return None
3504         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3505             self.to_screen(f'[info] {label.title()} description is already present')
3506         elif ie_result.get('description') is None:
3507             self.report_warning(f'There\'s no {label} description to write')
3508             return False
3509         else:
3510             try:
3511                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3512                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3513                     descfile.write(ie_result['description'])
3514             except (OSError, IOError):
3515                 self.report_error(f'Cannot write {label} description file {descfn}')
3516                 return None
3517         return True
3518
3519     def _write_subtitles(self, info_dict, filename):
3520         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3521         ret = []
3522         subtitles = info_dict.get('requested_subtitles')
3523         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3524             # subtitles download errors are already managed as troubles in relevant IE
3525             # that way it will silently go on when used with unsupporting IE
3526             return ret
3527
3528         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3529         if not sub_filename_base:
3530             self.to_screen('[info] Skipping writing video subtitles')
3531             return ret
3532         for sub_lang, sub_info in subtitles.items():
3533             sub_format = sub_info['ext']
3534             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3535             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3536             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3537                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3538                 sub_info['filepath'] = sub_filename
3539                 ret.append((sub_filename, sub_filename_final))
3540                 continue
3541
3542             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3543             if sub_info.get('data') is not None:
3544                 try:
3545                     # Use newline='' to prevent conversion of newline characters
3546                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3547                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3548                         subfile.write(sub_info['data'])
3549                     sub_info['filepath'] = sub_filename
3550                     ret.append((sub_filename, sub_filename_final))
3551                     continue
3552                 except (OSError, IOError):
3553                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3554                     return None
3555
3556             try:
3557                 sub_copy = sub_info.copy()
3558                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3559                 self.dl(sub_filename, sub_copy, subtitle=True)
3560                 sub_info['filepath'] = sub_filename
3561                 ret.append((sub_filename, sub_filename_final))
3562             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3563                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3564                 continue
3565         return ret
3566
3567     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3568         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3569         write_all = self.params.get('write_all_thumbnails', False)
3570         thumbnails, ret = [], []
3571         if write_all or self.params.get('writethumbnail', False):
3572             thumbnails = info_dict.get('thumbnails') or []
3573         multiple = write_all and len(thumbnails) > 1
3574
3575         if thumb_filename_base is None:
3576             thumb_filename_base = filename
3577         if thumbnails and not thumb_filename_base:
3578             self.write_debug(f'Skipping writing {label} thumbnail')
3579             return ret
3580
3581         for t in thumbnails[::-1]:
3582             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3583             thumb_display_id = f'{label} thumbnail {t["id"]}'
3584             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3585             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3586
3587             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3588                 ret.append((thumb_filename, thumb_filename_final))
3589                 t['filepath'] = thumb_filename
3590                 self.to_screen('[info] %s is already present' % (
3591                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3592             else:
3593                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3594                 try:
3595                     uf = self.urlopen(t['url'])
3596                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3597                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3598                         shutil.copyfileobj(uf, thumbf)
3599                     ret.append((thumb_filename, thumb_filename_final))
3600                     t['filepath'] = thumb_filename
3601                 except network_exceptions as err:
3602                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3603             if ret and not write_all:
3604                 break
3605         return ret