yt_dlp/YoutubeDL.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import functools
  13 import io
  14 import itertools
  15 import json
  16 import locale
  17 import operator
  18 import os
  19 import platform
  20 import re
  21 import shutil
  22 import subprocess
  23 import sys
  24 import tempfile
  25 import time
  26 import tokenize
  27 import traceback
  28 import random
  29 import unicodedata
  30
  31 from enum import Enum
  32 from string import ascii_letters
  33
  34 from .compat import (
  35     compat_basestring,
  36     compat_get_terminal_size,
  37     compat_kwargs,
  38     compat_numeric_types,
  39     compat_os_name,
  40     compat_pycrypto_AES,
  41     compat_shlex_quote,
  42     compat_str,
  43     compat_tokenize_tokenize,
  44     compat_urllib_error,
  45     compat_urllib_request,
  46     compat_urllib_request_DataHandler,
  47     windows_enable_vt_mode,
  48 )
  49 from .cookies import load_cookies
  50 from .utils import (
  51     age_restricted,
  52     args_to_str,
  53     ContentTooShortError,
  54     date_from_str,
  55     DateRange,
  56     DEFAULT_OUTTMPL,
  57     determine_ext,
  58     determine_protocol,
  59     DownloadCancelled,
  60     DownloadError,
  61     encode_compat_str,
  62     encodeFilename,
  63     EntryNotInPlaylist,
  64     error_to_compat_str,
  65     ExistingVideoReached,
  66     expand_path,
  67     ExtractorError,
  68     float_or_none,
  69     format_bytes,
  70     format_field,
  71     formatSeconds,
  72     GeoRestrictedError,
  73     HEADRequest,
  74     int_or_none,
  75     iri_to_uri,
  76     ISO3166Utils,
  77     join_nonempty,
  78     LazyList,
  79     LINK_TEMPLATES,
  80     locked_file,
  81     make_dir,
  82     make_HTTPS_handler,
  83     MaxDownloadsReached,
  84     network_exceptions,
  85     number_of_digits,
  86     orderedSet,
  87     OUTTMPL_TYPES,
  88     PagedList,
  89     parse_filesize,
  90     PerRequestProxyHandler,
  91     platform_name,
  92     Popen,
  93     PostProcessingError,
  94     preferredencoding,
  95     prepend_extension,
  96     register_socks_protocols,
  97     RejectedVideoReached,
  98     render_table,
  99     replace_extension,
 100     SameFileError,
 101     sanitize_filename,
 102     sanitize_path,
 103     sanitize_url,
 104     sanitized_Request,
 105     std_headers,
 106     STR_FORMAT_RE_TMPL,
 107     STR_FORMAT_TYPES,
 108     str_or_none,
 109     strftime_or_none,
 110     subtitles_filename,
 111     supports_terminal_sequences,
 112     ThrottledDownload,
 113     to_high_limit_path,
 114     traverse_obj,
 115     try_get,
 116     UnavailableVideoError,
 117     url_basename,
 118     variadic,
 119     version_tuple,
 120     write_json_file,
 121     write_string,
 122     YoutubeDLCookieProcessor,
 123     YoutubeDLHandler,
 124     YoutubeDLRedirectHandler,
 125 )
 126 from .cache import Cache
 127 from .minicurses import format_text
 128 from .extractor import (
 129     gen_extractor_classes,
 130     get_info_extractor,
 131     _LAZY_LOADER,
 132     _PLUGIN_CLASSES as plugin_extractors
 133 )
 134 from .extractor.openload import PhantomJSwrapper
 135 from .downloader import (
 136     FFmpegFD,
 137     get_suitable_downloader,
 138     shorten_protocol_name
 139 )
 140 from .downloader.rtmp import rtmpdump_version
 141 from .postprocessor import (
 142     get_postprocessor,
 143     EmbedThumbnailPP,
 144     FFmpegFixupDurationPP,
 145     FFmpegFixupM3u8PP,
 146     FFmpegFixupM4aPP,
 147     FFmpegFixupStretchedPP,
 148     FFmpegFixupTimestampPP,
 149     FFmpegMergerPP,
 150     FFmpegPostProcessor,
 151     MoveFilesAfterDownloadPP,
 152     _PLUGIN_CLASSES as plugin_postprocessors
 153 )
 154 from .update import detect_variant
 155 from .version import __version__
 156
 157 if compat_os_name == 'nt':
 158     import ctypes
 159
 160
 161 class YoutubeDL(object):
 162     """YoutubeDL class.
 163
 164     YoutubeDL objects are the ones responsible of downloading the
 165     actual video file and writing it to disk if the user has requested
 166     it, among some other tasks. In most cases there should be one per
 167     program. As, given a video URL, the downloader doesn't know how to
 168     extract all the needed information, task that InfoExtractors do, it
 169     has to pass the URL to one of them.
 170
 171     For this, YoutubeDL objects have a method that allows
 172     InfoExtractors to be registered in a given order. When it is passed
 173     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 174     finds that reports being able to handle it. The InfoExtractor extracts
 175     all the information about the video or videos the URL refers to, and
 176     YoutubeDL process the extracted information, possibly using a File
 177     Downloader to download the video.
 178
 179     YoutubeDL objects accept a lot of parameters. In order not to saturate
 180     the object constructor with arguments, it receives a dictionary of
 181     options instead. These options are available through the params
 182     attribute for the InfoExtractors to use. The YoutubeDL also
 183     registers itself as the downloader in charge for the InfoExtractors
 184     that are added to it, so this is a "mutual registration".
 185
 186     Available options:
 187
 188     username:          Username for authentication purposes.
 189     password:          Password for authentication purposes.
 190     videopassword:     Password for accessing a video.
 191     ap_mso:            Adobe Pass multiple-system operator identifier.
 192     ap_username:       Multiple-system operator account username.
 193     ap_password:       Multiple-system operator account password.
 194     usenetrc:          Use netrc for authentication instead.
 195     verbose:           Print additional info to stdout.
 196     quiet:             Do not print messages to stdout.
 197     no_warnings:       Do not print out anything for warnings.
 198     forceprint:        A list of templates to force print
 199     forceurl:          Force printing final URL. (Deprecated)
 200     forcetitle:        Force printing title. (Deprecated)
 201     forceid:           Force printing ID. (Deprecated)
 202     forcethumbnail:    Force printing thumbnail URL. (Deprecated)
 203     forcedescription:  Force printing description. (Deprecated)
 204     forcefilename:     Force printing final filename. (Deprecated)
 205     forceduration:     Force printing duration. (Deprecated)
 206     forcejson:         Force printing info_dict as JSON.
 207     dump_single_json:  Force printing the info_dict of the whole playlist
 208                        (or video) as a single JSON line.
 209     force_write_download_archive: Force writing download archive regardless
 210                        of 'skip_download' or 'simulate'.
 211     simulate:          Do not download the video files. If unset (or None),
 212                        simulate only if listsubtitles, listformats or list_thumbnails is used
 213     format:            Video format code. see "FORMAT SELECTION" for more details.
 214                        You can also pass a function. The function takes 'ctx' as
 215                        argument and returns the formats to download.
 216                        See "build_format_selector" for an implementation
 217     allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
 218     ignore_no_formats_error: Ignore "No video formats" error. Usefull for
 219                        extracting metadata even if the video is not actually
 220                        available for download (experimental)
 221     format_sort:       A list of fields by which to sort the video formats.
 222                        See "Sorting Formats" for more details.
 223     format_sort_force: Force the given format_sort. see "Sorting Formats"
 224                        for more details.
 225     allow_multiple_video_streams:   Allow multiple video streams to be merged
 226                        into a single file
 227     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
 228                        into a single file
 229     check_formats      Whether to test if the formats are downloadable.
 230                        Can be True (check all), False (check none),
 231                        'selected' (check selected formats),
 232                        or None (check only if requested by extractor)
 233     paths:             Dictionary of output paths. The allowed keys are 'home'
 234                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
 235     outtmpl:           Dictionary of templates for output names. Allowed keys
 236                        are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
 237                        For compatibility with youtube-dl, a single string can also be used
 238     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
 239     restrictfilenames: Do not allow "&" and spaces in file names
 240     trim_file_name:    Limit length of filename (extension excluded)
 241     windowsfilenames:  Force the filenames to be windows compatible
 242     ignoreerrors:      Do not stop on download/postprocessing errors.
 243                        Can be 'only_download' to ignore only download errors.
 244                        Default is 'only_download' for CLI, but False for API
 245     skip_playlist_after_errors: Number of allowed failures until the rest of
 246                        the playlist is skipped
 247     force_generic_extractor: Force downloader to use the generic extractor
 248     overwrites:        Overwrite all video and metadata files if True,
 249                        overwrite only non-video files if None
 250                        and don't overwrite any file if False
 251                        For compatibility with youtube-dl,
 252                        "nooverwrites" may also be used instead
 253     playliststart:     Playlist item to start at.
 254     playlistend:       Playlist item to end at.
 255     playlist_items:    Specific indices of playlist to download.
 256     playlistreverse:   Download playlist items in reverse order.
 257     playlistrandom:    Download playlist items in random order.
 258     matchtitle:        Download only matching titles.
 259     rejecttitle:       Reject downloads for matching titles.
 260     logger:            Log messages to a logging.Logger instance.
 261     logtostderr:       Log messages to stderr instead of stdout.
 262     consoletitle:       Display progress in console window's titlebar.
 263     writedescription:  Write the video description to a .description file
 264     writeinfojson:     Write the video description to a .info.json file
 265     clean_infojson:    Remove private fields from the infojson
 266     getcomments:       Extract video comments. This will not be written to disk
 267                        unless writeinfojson is also given
 268     writeannotations:  Write the video annotations to a .annotations.xml file
 269     writethumbnail:    Write the thumbnail image to a file
 270     allow_playlist_files: Whether to write playlists' description, infojson etc
 271                        also to disk when using the 'write*' options
 272     write_all_thumbnails:  Write all thumbnail formats to files
 273     writelink:         Write an internet shortcut file, depending on the
 274                        current platform (.url/.webloc/.desktop)
 275     writeurllink:      Write a Windows internet shortcut file (.url)
 276     writewebloclink:   Write a macOS internet shortcut file (.webloc)
 277     writedesktoplink:  Write a Linux internet shortcut file (.desktop)
 278     writesubtitles:    Write the video subtitles to a file
 279     writeautomaticsub: Write the automatically generated subtitles to a file
 280     allsubtitles:      Deprecated - Use subtitleslangs = ['all']
 281                        Downloads all the subtitles of the video
 282                        (requires writesubtitles or writeautomaticsub)
 283     listsubtitles:     Lists all available subtitles for the video
 284     subtitlesformat:   The format code for subtitles
 285     subtitleslangs:    List of languages of the subtitles to download (can be regex).
 286                        The list may contain "all" to refer to all the available
 287                        subtitles. The language can be prefixed with a "-" to
 288                        exclude it from the requested languages. Eg: ['all', '-live_chat']
 289     keepvideo:         Keep the video file after post-processing
 290     daterange:         A DateRange object, download only if the upload_date is in the range.
 291     skip_download:     Skip the actual download of the video file
 292     cachedir:          Location of the cache files in the filesystem.
 293                        False to disable filesystem cache.
 294     noplaylist:        Download single video instead of a playlist if in doubt.
 295     age_limit:         An integer representing the user's age in years.
 296                        Unsuitable videos for the given age are skipped.
 297     min_views:         An integer representing the minimum view count the video
 298                        must have in order to not be skipped.
 299                        Videos without view count information are always
 300                        downloaded. None for no limit.
 301     max_views:         An integer representing the maximum view count.
 302                        Videos that are more popular than that are not
 303                        downloaded.
 304                        Videos without view count information are always
 305                        downloaded. None for no limit.
 306     download_archive:  File name of a file where all downloads are recorded.
 307                        Videos already present in the file are not downloaded
 308                        again.
 309     break_on_existing: Stop the download process after attempting to download a
 310                        file that is in the archive.
 311     break_on_reject:   Stop the download process when encountering a video that
 312                        has been filtered out.
 313     cookiefile:        File name where cookies should be read from and dumped to
 314     cookiesfrombrowser: A tuple containing the name of the browser and the profile
 315                        name/path from where cookies are loaded.
 316                        Eg: ('chrome', ) or ('vivaldi', 'default')
 317     nocheckcertificate:Do not verify SSL certificates
 318     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 319                        At the moment, this is only supported by YouTube.
 320     proxy:             URL of the proxy server to use
 321     geo_verification_proxy:  URL of the proxy to use for IP address verification
 322                        on geo-restricted sites.
 323     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 324     bidi_workaround:   Work around buggy terminals without bidirectional text
 325                        support, using fridibi
 326     debug_printtraffic:Print out sent and received HTTP traffic
 327     include_ads:       Download ads as well
 328     default_search:    Prepend this string if an input url is not valid.
 329                        'auto' for elaborate guessing
 330     encoding:          Use this encoding instead of the system-specified.
 331     extract_flat:      Do not resolve URLs, return the immediate result.
 332                        Pass in 'in_playlist' to only show this behavior for
 333                        playlist items.
 334     postprocessors:    A list of dictionaries, each with an entry
 335                        * key:  The name of the postprocessor. See
 336                                yt_dlp/postprocessor/__init__.py for a list.
 337                        * when: When to run the postprocessor. Can be one of
 338                                pre_process|before_dl|post_process|after_move.
 339                                Assumed to be 'post_process' if not given
 340     post_hooks:        Deprecated - Register a custom postprocessor instead
 341                        A list of functions that get called as the final step
 342                        for each video file, after all postprocessors have been
 343                        called. The filename will be passed as the only argument.
 344     progress_hooks:    A list of functions that get called on download
 345                        progress, with a dictionary with the entries
 346                        * status: One of "downloading", "error", or "finished".
 347                                  Check this first and ignore unknown values.
 348                        * info_dict: The extracted info_dict
 349
 350                        If status is one of "downloading", or "finished", the
 351                        following properties may also be present:
 352                        * filename: The final filename (always present)
 353                        * tmpfilename: The filename we're currently writing to
 354                        * downloaded_bytes: Bytes on disk
 355                        * total_bytes: Size of the whole file, None if unknown
 356                        * total_bytes_estimate: Guess of the eventual file size,
 357                                                None if unavailable.
 358                        * elapsed: The number of seconds since download started.
 359                        * eta: The estimated time in seconds, None if unknown
 360                        * speed: The download speed in bytes/second, None if
 361                                 unknown
 362                        * fragment_index: The counter of the currently
 363                                          downloaded video fragment.
 364                        * fragment_count: The number of fragments (= individual
 365                                          files that will be merged)
 366
 367                        Progress hooks are guaranteed to be called at least once
 368                        (with status "finished") if the download is successful.
 369     postprocessor_hooks:  A list of functions that get called on postprocessing
 370                        progress, with a dictionary with the entries
 371                        * status: One of "started", "processing", or "finished".
 372                                  Check this first and ignore unknown values.
 373                        * postprocessor: Name of the postprocessor
 374                        * info_dict: The extracted info_dict
 375
 376                        Progress hooks are guaranteed to be called at least twice
 377                        (with status "started" and "finished") if the processing is successful.
 378     merge_output_format: Extension to use when merging formats.
 379     final_ext:         Expected final extension; used to detect when the file was
 380                        already downloaded and converted
 381     fixup:             Automatically correct known faults of the file.
 382                        One of:
 383                        - "never": do nothing
 384                        - "warn": only emit a warning
 385                        - "detect_or_warn": check whether we can do anything
 386                                            about it, warn otherwise (default)
 387     source_address:    Client-side IP address to bind to.
 388     call_home:         Boolean, true iff we are allowed to contact the
 389                        yt-dlp servers for debugging. (BROKEN)
 390     sleep_interval_requests: Number of seconds to sleep between requests
 391                        during extraction
 392     sleep_interval:    Number of seconds to sleep before each download when
 393                        used alone or a lower bound of a range for randomized
 394                        sleep before each download (minimum possible number
 395                        of seconds to sleep) when used along with
 396                        max_sleep_interval.
 397     max_sleep_interval:Upper bound of a range for randomized sleep before each
 398                        download (maximum possible number of seconds to sleep).
 399                        Must only be used along with sleep_interval.
 400                        Actual sleep time will be a random float from range
 401                        [sleep_interval; max_sleep_interval].
 402     sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
 403     listformats:       Print an overview of available video formats and exit.
 404     list_thumbnails:   Print a table of all thumbnails and exit.
 405     match_filter:      A function that gets called with the info_dict of
 406                        every video.
 407                        If it returns a message, the video is ignored.
 408                        If it returns None, the video is downloaded.
 409                        match_filter_func in utils.py is one example for this.
 410     no_color:          Do not emit color codes in output.
 411     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 412                        HTTP header
 413     geo_bypass_country:
 414                        Two-letter ISO 3166-2 country code that will be used for
 415                        explicit geographic restriction bypassing via faking
 416                        X-Forwarded-For HTTP header
 417     geo_bypass_ip_block:
 418                        IP range in CIDR notation that will be used similarly to
 419                        geo_bypass_country
 420
 421     The following options determine which downloader is picked:
 422     external_downloader: A dictionary of protocol keys and the executable of the
 423                        external downloader to use for it. The allowed protocols
 424                        are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
 425                        Set the value to 'native' to use the native downloader
 426     hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
 427                        or {'m3u8': 'ffmpeg'} instead.
 428                        Use the native HLS downloader instead of ffmpeg/avconv
 429                        if True, otherwise use ffmpeg/avconv if False, otherwise
 430                        use downloader suggested by extractor if None.
 431     compat_opts:       Compatibility options. See "Differences in default behavior".
 432                        The following options do not work when used through the API:
 433                        filename, abort-on-error, multistreams, no-live-chat, format-sort
 434                        no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
 435                        Refer __init__.py for their implementation
 436     progress_template: Dictionary of templates for progress outputs.
 437                        Allowed keys are 'download', 'postprocess',
 438                        'download-title' (console title) and 'postprocess-title'.
 439                        The template is mapped on a dictionary with keys 'progress' and 'info'
 440
 441     The following parameters are not used by YoutubeDL itself, they are used by
 442     the downloader (see yt_dlp/downloader/common.py):
 443     nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
 444     max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
 445     noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
 446     external_downloader_args, concurrent_fragment_downloads.
 447
 448     The following options are used by the post processors:
 449     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
 450                        otherwise prefer ffmpeg. (avconv support is deprecated)
 451     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
 452                        to the binary or its containing directory.
 453     postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
 454                        and a list of additional command-line arguments for the
 455                        postprocessor/executable. The dict can also have "PP+EXE" keys
 456                        which are used when the given exe is used by the given PP.
 457                        Use 'default' as the name for arguments to passed to all PP
 458                        For compatibility with youtube-dl, a single list of args
 459                        can also be used
 460
 461     The following options are used by the extractors:
 462     extractor_retries: Number of times to retry for known errors
 463     dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
 464     hls_split_discontinuity: Split HLS playlists to different formats at
 465                        discontinuities such as ad breaks (default: False)
 466     extractor_args:    A dictionary of arguments to be passed to the extractors.
 467                        See "EXTRACTOR ARGUMENTS" for details.
 468                        Eg: {'youtube': {'skip': ['dash', 'hls']}}
 469     youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
 470                        If True (default), DASH manifests and related
 471                        data will be downloaded and processed by extractor.
 472                        You can reduce network I/O by disabling it if you don't
 473                        care about DASH. (only for youtube)
 474     youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
 475                        If True (default), HLS manifests and related
 476                        data will be downloaded and processed by extractor.
 477                        You can reduce network I/O by disabling it if you don't
 478                        care about HLS. (only for youtube)
 479     """
 480
 481     _NUMERIC_FIELDS = set((
 482         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 483         'timestamp', 'release_timestamp',
 484         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 485         'average_rating', 'comment_count', 'age_limit',
 486         'start_time', 'end_time',
 487         'chapter_number', 'season_number', 'episode_number',
 488         'track_number', 'disc_number', 'release_year',
 489     ))
 490
 491     _format_selection_exts = {
 492         'audio': {'m4a', 'mp3', 'ogg', 'aac'},
 493         'video': {'mp4', 'flv', 'webm', '3gp'},
 494         'storyboards': {'mhtml'},
 495     }
 496
 497     params = None
 498     _ies = {}
 499     _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 500     _printed_messages = set()
 501     _first_webpage_request = True
 502     _download_retcode = None
 503     _num_downloads = None
 504     _playlist_level = 0
 505     _playlist_urls = set()
 506     _screen_file = None
 507
 508     def __init__(self, params=None, auto_init=True):
 509         """Create a FileDownloader object with the given options.
 510         @param auto_init    Whether to load the default extractors and print header (if verbose).
 511                             Set to 'no_verbose_header' to not print the header
 512         """
 513         if params is None:
 514             params = {}
 515         self._ies = {}
 516         self._ies_instances = {}
 517         self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
 518         self._printed_messages = set()
 519         self._first_webpage_request = True
 520         self._post_hooks = []
 521         self._progress_hooks = []
 522         self._postprocessor_hooks = []
 523         self._download_retcode = 0
 524         self._num_downloads = 0
 525         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 526         self._err_file = sys.stderr
 527         self.params = params
 528         self.cache = Cache(self)
 529
 530         windows_enable_vt_mode()
 531         # FIXME: This will break if we ever print color to stdout
 532         self._allow_colors = {
 533             'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
 534             'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
 535         }
 536
 537         if sys.version_info < (3, 6):
 538             self.report_warning(
 539                 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 540
 541         if self.params.get('allow_unplayable_formats'):
 542             self.report_warning(
 543                 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
 544                 'This is a developer option intended for debugging. \n'
 545                 '         If you experience any issues while using this option, '
 546                 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
 547
 548         def check_deprecated(param, option, suggestion):
 549             if self.params.get(param) is not None:
 550                 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
 551                 return True
 552             return False
 553
 554         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 555             if self.params.get('geo_verification_proxy') is None:
 556                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 557
 558         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 559         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 560         check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
 561
 562         for msg in self.params.get('_warnings', []):
 563             self.report_warning(msg)
 564
 565         if 'list-formats' in self.params.get('compat_opts', []):
 566             self.params['listformats_table'] = False
 567
 568         if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
 569             # nooverwrites was unnecessarily changed to overwrites
 570             # in 0c3d0f51778b153f65c21906031c2e091fcfb641
 571             # This ensures compatibility with both keys
 572             self.params['overwrites'] = not self.params['nooverwrites']
 573         elif self.params.get('overwrites') is None:
 574             self.params.pop('overwrites', None)
 575         else:
 576             self.params['nooverwrites'] = not self.params['overwrites']
 577
 578         if params.get('bidi_workaround', False):
 579             try:
 580                 import pty
 581                 master, slave = pty.openpty()
 582                 width = compat_get_terminal_size().columns
 583                 if width is None:
 584                     width_args = []
 585                 else:
 586                     width_args = ['-w', str(width)]
 587                 sp_kwargs = dict(
 588                     stdin=subprocess.PIPE,
 589                     stdout=slave,
 590                     stderr=self._err_file)
 591                 try:
 592                     self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
 593                 except OSError:
 594                     self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 595                 self._output_channel = os.fdopen(master, 'rb')
 596             except OSError as ose:
 597                 if ose.errno == errno.ENOENT:
 598                     self.report_warning(
 599                         'Could not find fribidi executable, ignoring --bidi-workaround. '
 600                         'Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 601                 else:
 602                     raise
 603
 604         if (sys.platform != 'win32'
 605                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 606                 and not params.get('restrictfilenames', False)):
 607             # Unicode filesystem API will throw errors (#1474, #13027)
 608             self.report_warning(
 609                 'Assuming --restrict-filenames since file system encoding '
 610                 'cannot encode all characters. '
 611                 'Set the LC_ALL environment variable to fix this.')
 612             self.params['restrictfilenames'] = True
 613
 614         self.outtmpl_dict = self.parse_outtmpl()
 615
 616         # Creating format selector here allows us to catch syntax errors before the extraction
 617         self.format_selector = (
 618             None if self.params.get('format') is None
 619             else self.params['format'] if callable(self.params['format'])
 620             else self.build_format_selector(self.params['format']))
 621
 622         self._setup_opener()
 623
 624         if auto_init:
 625             if auto_init != 'no_verbose_header':
 626                 self.print_debug_header()
 627             self.add_default_info_extractors()
 628
 629         for pp_def_raw in self.params.get('postprocessors', []):
 630             pp_def = dict(pp_def_raw)
 631             when = pp_def.pop('when', 'post_process')
 632             pp_class = get_postprocessor(pp_def.pop('key'))
 633             pp = pp_class(self, **compat_kwargs(pp_def))
 634             self.add_post_processor(pp, when=when)
 635
 636         hooks = {
 637             'post_hooks': self.add_post_hook,
 638             'progress_hooks': self.add_progress_hook,
 639             'postprocessor_hooks': self.add_postprocessor_hook,
 640         }
 641         for opt, fn in hooks.items():
 642             for ph in self.params.get(opt, []):
 643                 fn(ph)
 644
 645         register_socks_protocols()
 646
 647         def preload_download_archive(fn):
 648             """Preload the archive, if any is specified"""
 649             if fn is None:
 650                 return False
 651             self.write_debug(f'Loading archive file {fn!r}')
 652             try:
 653                 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 654                     for line in archive_file:
 655                         self.archive.add(line.strip())
 656             except IOError as ioe:
 657                 if ioe.errno != errno.ENOENT:
 658                     raise
 659                 return False
 660             return True
 661
 662         self.archive = set()
 663         preload_download_archive(self.params.get('download_archive'))
 664
 665     def warn_if_short_id(self, argv):
 666         # short YouTube ID starting with dash?
 667         idxs = [
 668             i for i, a in enumerate(argv)
 669             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 670         if idxs:
 671             correct_argv = (
 672                 ['yt-dlp']
 673                 + [a for i, a in enumerate(argv) if i not in idxs]
 674                 + ['--'] + [argv[i] for i in idxs]
 675             )
 676             self.report_warning(
 677                 'Long argument string detected. '
 678                 'Use -- to separate parameters and URLs, like this:\n%s' %
 679                 args_to_str(correct_argv))
 680
 681     def add_info_extractor(self, ie):
 682         """Add an InfoExtractor object to the end of the list."""
 683         ie_key = ie.ie_key()
 684         self._ies[ie_key] = ie
 685         if not isinstance(ie, type):
 686             self._ies_instances[ie_key] = ie
 687             ie.set_downloader(self)
 688
 689     def _get_info_extractor_class(self, ie_key):
 690         ie = self._ies.get(ie_key)
 691         if ie is None:
 692             ie = get_info_extractor(ie_key)
 693             self.add_info_extractor(ie)
 694         return ie
 695
 696     def get_info_extractor(self, ie_key):
 697         """
 698         Get an instance of an IE with name ie_key, it will try to get one from
 699         the _ies list, if there's no instance it will create a new one and add
 700         it to the extractor list.
 701         """
 702         ie = self._ies_instances.get(ie_key)
 703         if ie is None:
 704             ie = get_info_extractor(ie_key)()
 705             self.add_info_extractor(ie)
 706         return ie
 707
 708     def add_default_info_extractors(self):
 709         """
 710         Add the InfoExtractors returned by gen_extractors to the end of the list
 711         """
 712         for ie in gen_extractor_classes():
 713             self.add_info_extractor(ie)
 714
 715     def add_post_processor(self, pp, when='post_process'):
 716         """Add a PostProcessor object to the end of the chain."""
 717         self._pps[when].append(pp)
 718         pp.set_downloader(self)
 719
 720     def add_post_hook(self, ph):
 721         """Add the post hook"""
 722         self._post_hooks.append(ph)
 723
 724     def add_progress_hook(self, ph):
 725         """Add the download progress hook"""
 726         self._progress_hooks.append(ph)
 727
 728     def add_postprocessor_hook(self, ph):
 729         """Add the postprocessing progress hook"""
 730         self._postprocessor_hooks.append(ph)
 731
 732     def _bidi_workaround(self, message):
 733         if not hasattr(self, '_output_channel'):
 734             return message
 735
 736         assert hasattr(self, '_output_process')
 737         assert isinstance(message, compat_str)
 738         line_count = message.count('\n') + 1
 739         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 740         self._output_process.stdin.flush()
 741         res = ''.join(self._output_channel.readline().decode('utf-8')
 742                       for _ in range(line_count))
 743         return res[:-len('\n')]
 744
 745     def _write_string(self, message, out=None, only_once=False):
 746         if only_once:
 747             if message in self._printed_messages:
 748                 return
 749             self._printed_messages.add(message)
 750         write_string(message, out=out, encoding=self.params.get('encoding'))
 751
 752     def to_stdout(self, message, skip_eol=False, quiet=False):
 753         """Print message to stdout"""
 754         if self.params.get('logger'):
 755             self.params['logger'].debug(message)
 756         elif not quiet or self.params.get('verbose'):
 757             self._write_string(
 758                 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
 759                 self._err_file if quiet else self._screen_file)
 760
 761     def to_stderr(self, message, only_once=False):
 762         """Print message to stderr"""
 763         assert isinstance(message, compat_str)
 764         if self.params.get('logger'):
 765             self.params['logger'].error(message)
 766         else:
 767             self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
 768
 769     def to_console_title(self, message):
 770         if not self.params.get('consoletitle', False):
 771             return
 772         if compat_os_name == 'nt':
 773             if ctypes.windll.kernel32.GetConsoleWindow():
 774                 # c_wchar_p() might not be necessary if `message` is
 775                 # already of type unicode()
 776                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 777         elif 'TERM' in os.environ:
 778             self._write_string('\033]0;%s\007' % message, self._screen_file)
 779
 780     def save_console_title(self):
 781         if not self.params.get('consoletitle', False):
 782             return
 783         if self.params.get('simulate'):
 784             return
 785         if compat_os_name != 'nt' and 'TERM' in os.environ:
 786             # Save the title on stack
 787             self._write_string('\033[22;0t', self._screen_file)
 788
 789     def restore_console_title(self):
 790         if not self.params.get('consoletitle', False):
 791             return
 792         if self.params.get('simulate'):
 793             return
 794         if compat_os_name != 'nt' and 'TERM' in os.environ:
 795             # Restore the title from stack
 796             self._write_string('\033[23;0t', self._screen_file)
 797
 798     def __enter__(self):
 799         self.save_console_title()
 800         return self
 801
 802     def __exit__(self, *args):
 803         self.restore_console_title()
 804
 805         if self.params.get('cookiefile') is not None:
 806             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 807
 808     def trouble(self, message=None, tb=None):
 809         """Determine action to take when a download problem appears.
 810
 811         Depending on if the downloader has been configured to ignore
 812         download errors or not, this method may throw an exception or
 813         not when errors are found, after printing the message.
 814
 815         tb, if given, is additional traceback information.
 816         """
 817         if message is not None:
 818             self.to_stderr(message)
 819         if self.params.get('verbose'):
 820             if tb is None:
 821                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 822                     tb = ''
 823                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 824                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 825                     tb += encode_compat_str(traceback.format_exc())
 826                 else:
 827                     tb_data = traceback.format_list(traceback.extract_stack())
 828                     tb = ''.join(tb_data)
 829             if tb:
 830                 self.to_stderr(tb)
 831         if not self.params.get('ignoreerrors'):
 832             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 833                 exc_info = sys.exc_info()[1].exc_info
 834             else:
 835                 exc_info = sys.exc_info()
 836             raise DownloadError(message, exc_info)
 837         self._download_retcode = 1
 838
 839     def to_screen(self, message, skip_eol=False):
 840         """Print message to stdout if not in quiet mode"""
 841         self.to_stdout(
 842             message, skip_eol, quiet=self.params.get('quiet', False))
 843
 844     class Styles(Enum):
 845         HEADERS = 'yellow'
 846         EMPHASIS = 'blue'
 847         ID = 'green'
 848         DELIM = 'blue'
 849         ERROR = 'red'
 850         WARNING = 'yellow'
 851
 852     def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
 853         assert out in ('screen', 'err')
 854         if test_encoding:
 855             original_text = text
 856             handle = self._screen_file if out == 'screen' else self._err_file
 857             encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
 858             text = text.encode(encoding, 'ignore').decode(encoding)
 859             if fallback is not None and text != original_text:
 860                 text = fallback
 861         if isinstance(f, self.Styles):
 862             f = f._value_
 863         return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
 864
 865     def _format_screen(self, *args, **kwargs):
 866         return self.__format_text('screen', *args, **kwargs)
 867
 868     def _format_err(self, *args, **kwargs):
 869         return self.__format_text('err', *args, **kwargs)
 870
 871     def report_warning(self, message, only_once=False):
 872         '''
 873         Print the message to stderr, it will be prefixed with 'WARNING:'
 874         If stderr is a tty file the 'WARNING:' will be colored
 875         '''
 876         if self.params.get('logger') is not None:
 877             self.params['logger'].warning(message)
 878         else:
 879             if self.params.get('no_warnings'):
 880                 return
 881             self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
 882
 883     def report_error(self, message, tb=None):
 884         '''
 885         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 886         in red if stderr is a tty file.
 887         '''
 888         self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
 889
 890     def write_debug(self, message, only_once=False):
 891         '''Log debug message or Print message to stderr'''
 892         if not self.params.get('verbose', False):
 893             return
 894         message = '[debug] %s' % message
 895         if self.params.get('logger'):
 896             self.params['logger'].debug(message)
 897         else:
 898             self.to_stderr(message, only_once)
 899
 900     def report_file_already_downloaded(self, file_name):
 901         """Report file has already been fully downloaded."""
 902         try:
 903             self.to_screen('[download] %s has already been downloaded' % file_name)
 904         except UnicodeEncodeError:
 905             self.to_screen('[download] The file has already been downloaded')
 906
 907     def report_file_delete(self, file_name):
 908         """Report that existing file will be deleted."""
 909         try:
 910             self.to_screen('Deleting existing file %s' % file_name)
 911         except UnicodeEncodeError:
 912             self.to_screen('Deleting existing file')
 913
 914     def raise_no_formats(self, info, forced=False):
 915         has_drm = info.get('__has_drm')
 916         msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
 917         expected = self.params.get('ignore_no_formats_error')
 918         if forced or not expected:
 919             raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
 920                                  expected=has_drm or expected)
 921         else:
 922             self.report_warning(msg)
 923
 924     def parse_outtmpl(self):
 925         outtmpl_dict = self.params.get('outtmpl', {})
 926         if not isinstance(outtmpl_dict, dict):
 927             outtmpl_dict = {'default': outtmpl_dict}
 928         # Remove spaces in the default template
 929         if self.params.get('restrictfilenames'):
 930             sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
 931         else:
 932             sanitize = lambda x: x
 933         outtmpl_dict.update({
 934             k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
 935             if outtmpl_dict.get(k) is None})
 936         for key, val in outtmpl_dict.items():
 937             if isinstance(val, bytes):
 938                 self.report_warning(
 939                     'Parameter outtmpl is bytes, but should be a unicode string. '
 940                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 941         return outtmpl_dict
 942
 943     def get_output_path(self, dir_type='', filename=None):
 944         paths = self.params.get('paths', {})
 945         assert isinstance(paths, dict)
 946         path = os.path.join(
 947             expand_path(paths.get('home', '').strip()),
 948             expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
 949             filename or '')
 950
 951         # Temporary fix for #4787
 952         # 'Treat' all problem characters by passing filename through preferredencoding
 953         # to workaround encoding issues with subprocess on python2 @ Windows
 954         if sys.version_info < (3, 0) and sys.platform == 'win32':
 955             path = encodeFilename(path, True).decode(preferredencoding())
 956         return sanitize_path(path, force=self.params.get('windowsfilenames'))
 957
 958     @staticmethod
 959     def _outtmpl_expandpath(outtmpl):
 960         # expand_path translates '%%' into '%' and '$$' into '$'
 961         # correspondingly that is not what we want since we need to keep
 962         # '%%' intact for template dict substitution step. Working around
 963         # with boundary-alike separator hack.
 964         sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
 965         outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
 966
 967         # outtmpl should be expand_path'ed before template dict substitution
 968         # because meta fields may contain env variables we don't want to
 969         # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
 970         # title "Hello $PATH", we don't want `$PATH` to be expanded.
 971         return expand_path(outtmpl).replace(sep, '')
 972
 973     @staticmethod
 974     def escape_outtmpl(outtmpl):
 975         ''' Escape any remaining strings like %s, %abc% etc. '''
 976         return re.sub(
 977             STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
 978             lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
 979             outtmpl)
 980
 981     @classmethod
 982     def validate_outtmpl(cls, outtmpl):
 983         ''' @return None or Exception object '''
 984         outtmpl = re.sub(
 985             STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
 986             lambda mobj: f'{mobj.group(0)[:-1]}s',
 987             cls._outtmpl_expandpath(outtmpl))
 988         try:
 989             cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
 990             return None
 991         except ValueError as err:
 992             return err
 993
 994     @staticmethod
 995     def _copy_infodict(info_dict):
 996         info_dict = dict(info_dict)
 997         for key in ('__original_infodict', '__postprocessors'):
 998             info_dict.pop(key, None)
 999         return info_dict
1000
1001     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
1002         """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
1003         info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
1004
1005         info_dict = self._copy_infodict(info_dict)
1006         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1007             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1008             if info_dict.get('duration', None) is not None
1009             else None)
1010         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1011         if info_dict.get('resolution') is None:
1012             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1013
1014         # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1015         # of %(field)s to %(field)0Nd for backward compatibility
1016         field_size_compat_map = {
1017             'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1018             'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1019             'autonumber': self.params.get('autonumber_size') or 5,
1020         }
1021
1022         TMPL_DICT = {}
1023         EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
1024         MATH_FUNCTIONS = {
1025             '+': float.__add__,
1026             '-': float.__sub__,
1027         }
1028         # Field is of the form key1.key2...
1029         # where keys (except first) can be string, int or slice
1030         FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1031         MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1032         MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1033         INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1034             (?P<negate>-)?
1035             (?P<fields>{field})
1036             (?P<maths>(?:{math_op}{math_field})*)
1037             (?:>(?P<strf_format>.+?))?
1038             (?P<alternate>(?<!\\),[^|)]+)?
1039             (?:\|(?P<default>.*?))?
1040             $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1041
1042         def _traverse_infodict(k):
1043             k = k.split('.')
1044             if k[0] == '':
1045                 k.pop(0)
1046             return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1047
1048         def get_value(mdict):
1049             # Object traversal
1050             value = _traverse_infodict(mdict['fields'])
1051             # Negative
1052             if mdict['negate']:
1053                 value = float_or_none(value)
1054                 if value is not None:
1055                     value *= -1
1056             # Do maths
1057             offset_key = mdict['maths']
1058             if offset_key:
1059                 value = float_or_none(value)
1060                 operator = None
1061                 while offset_key:
1062                     item = re.match(
1063                         MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1064                         offset_key).group(0)
1065                     offset_key = offset_key[len(item):]
1066                     if operator is None:
1067                         operator = MATH_FUNCTIONS[item]
1068                         continue
1069                     item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1070                     offset = float_or_none(item)
1071                     if offset is None:
1072                         offset = float_or_none(_traverse_infodict(item))
1073                     try:
1074                         value = operator(value, multiplier * offset)
1075                     except (TypeError, ZeroDivisionError):
1076                         return None
1077                     operator = None
1078             # Datetime formatting
1079             if mdict['strf_format']:
1080                 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1081
1082             return value
1083
1084         na = self.params.get('outtmpl_na_placeholder', 'NA')
1085
1086         def _dumpjson_default(obj):
1087             if isinstance(obj, (set, LazyList)):
1088                 return list(obj)
1089             raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
1090
1091         def create_key(outer_mobj):
1092             if not outer_mobj.group('has_key'):
1093                 return outer_mobj.group(0)
1094             key = outer_mobj.group('key')
1095             mobj = re.match(INTERNAL_FORMAT_RE, key)
1096             initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
1097             value, default = None, na
1098             while mobj:
1099                 mobj = mobj.groupdict()
1100                 default = mobj['default'] if mobj['default'] is not None else default
1101                 value = get_value(mobj)
1102                 if value is None and mobj['alternate']:
1103                     mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1104                 else:
1105                     break
1106
1107             fmt = outer_mobj.group('format')
1108             if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1109                 fmt = '0{:d}d'.format(field_size_compat_map[key])
1110
1111             value = default if value is None else value
1112
1113             flags = outer_mobj.group('conversion') or ''
1114             str_fmt = f'{fmt[:-1]}s'
1115             if fmt[-1] == 'l':  # list
1116                 delim = '\n' if '#' in flags else ', '
1117                 value, fmt = delim.join(variadic(value)), str_fmt
1118             elif fmt[-1] == 'j':  # json
1119                 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1120             elif fmt[-1] == 'q':  # quoted
1121                 value = map(str, variadic(value) if '#' in flags else [value])
1122                 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1123             elif fmt[-1] == 'B':  # bytes
1124                 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1125                 value, fmt = value.decode('utf-8', 'ignore'), 's'
1126             elif fmt[-1] == 'U':  # unicode normalized
1127                 value, fmt = unicodedata.normalize(
1128                     # "+" = compatibility equivalence, "#" = NFD
1129                     'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1130                     value), str_fmt
1131             elif fmt[-1] == 'c':
1132                 if value:
1133                     value = str(value)[0]
1134                 else:
1135                     fmt = str_fmt
1136             elif fmt[-1] not in 'rs':  # numeric
1137                 value = float_or_none(value)
1138                 if value is None:
1139                     value, fmt = default, 's'
1140
1141             if sanitize:
1142                 if fmt[-1] == 'r':
1143                     # If value is an object, sanitize might convert it to a string
1144                     # So we convert it to repr first
1145                     value, fmt = repr(value), str_fmt
1146                 if fmt[-1] in 'csr':
1147                     value = sanitize(initial_field, value)
1148
1149             key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1150             TMPL_DICT[key] = value
1151             return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1152
1153         return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1154
1155     def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1156         outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1157         return self.escape_outtmpl(outtmpl) % info_dict
1158
1159     def _prepare_filename(self, info_dict, tmpl_type='default'):
1160         try:
1161             sanitize = lambda k, v: sanitize_filename(
1162                 compat_str(v),
1163                 restricted=self.params.get('restrictfilenames'),
1164                 is_id=(k == 'id' or k.endswith('_id')))
1165             outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1166             filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
1167
1168             force_ext = OUTTMPL_TYPES.get(tmpl_type)
1169             if filename and force_ext is not None:
1170                 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1171
1172             # https://github.com/blackjack4494/youtube-dlc/issues/85
1173             trim_file_name = self.params.get('trim_file_name', False)
1174             if trim_file_name:
1175                 fn_groups = filename.rsplit('.')
1176                 ext = fn_groups[-1]
1177                 sub_ext = ''
1178                 if len(fn_groups) > 2:
1179                     sub_ext = fn_groups[-2]
1180                 filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')
1181
1182             return filename
1183         except ValueError as err:
1184             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1185             return None
1186
1187     def prepare_filename(self, info_dict, dir_type='', warn=False):
1188         """Generate the output filename."""
1189
1190         filename = self._prepare_filename(info_dict, dir_type or 'default')
1191         if not filename and dir_type not in ('', 'temp'):
1192             return ''
1193
1194         if warn:
1195             if not self.params.get('paths'):
1196                 pass
1197             elif filename == '-':
1198                 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1199             elif os.path.isabs(filename):
1200                 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1201         if filename == '-' or not filename:
1202             return filename
1203
1204         return self.get_output_path(dir_type, filename)
1205
1206     def _match_entry(self, info_dict, incomplete=False, silent=False):
1207         """ Returns None if the file should be downloaded """
1208
1209         video_title = info_dict.get('title', info_dict.get('id', 'video'))
1210
1211         def check_filter():
1212             if 'title' in info_dict:
1213                 # This can happen when we're just evaluating the playlist
1214                 title = info_dict['title']
1215                 matchtitle = self.params.get('matchtitle', False)
1216                 if matchtitle:
1217                     if not re.search(matchtitle, title, re.IGNORECASE):
1218                         return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1219                 rejecttitle = self.params.get('rejecttitle', False)
1220                 if rejecttitle:
1221                     if re.search(rejecttitle, title, re.IGNORECASE):
1222                         return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1223             date = info_dict.get('upload_date')
1224             if date is not None:
1225                 dateRange = self.params.get('daterange', DateRange())
1226                 if date not in dateRange:
1227                     return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1228             view_count = info_dict.get('view_count')
1229             if view_count is not None:
1230                 min_views = self.params.get('min_views')
1231                 if min_views is not None and view_count < min_views:
1232                     return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1233                 max_views = self.params.get('max_views')
1234                 if max_views is not None and view_count > max_views:
1235                     return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1236             if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1237                 return 'Skipping "%s" because it is age restricted' % video_title
1238
1239             match_filter = self.params.get('match_filter')
1240             if match_filter is not None:
1241                 try:
1242                     ret = match_filter(info_dict, incomplete=incomplete)
1243                 except TypeError:
1244                     # For backward compatibility
1245                     ret = None if incomplete else match_filter(info_dict)
1246                 if ret is not None:
1247                     return ret
1248             return None
1249
1250         if self.in_download_archive(info_dict):
1251             reason = '%s has already been recorded in the archive' % video_title
1252             break_opt, break_err = 'break_on_existing', ExistingVideoReached
1253         else:
1254             reason = check_filter()
1255             break_opt, break_err = 'break_on_reject', RejectedVideoReached
1256         if reason is not None:
1257             if not silent:
1258                 self.to_screen('[download] ' + reason)
1259             if self.params.get(break_opt, False):
1260                 raise break_err()
1261         return reason
1262
1263     @staticmethod
1264     def add_extra_info(info_dict, extra_info):
1265         '''Set the keys from extra_info in info dict if they are missing'''
1266         for key, value in extra_info.items():
1267             info_dict.setdefault(key, value)
1268
1269     def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1270                      process=True, force_generic_extractor=False):
1271         """
1272         Return a list with a dictionary for each video extracted.
1273
1274         Arguments:
1275         url -- URL to extract
1276
1277         Keyword arguments:
1278         download -- whether to download videos during extraction
1279         ie_key -- extractor key hint
1280         extra_info -- dictionary containing the extra values to add to each result
1281         process -- whether to resolve all unresolved references (URLs, playlist items),
1282             must be True for download to work.
1283         force_generic_extractor -- force using the generic extractor
1284         """
1285
1286         if extra_info is None:
1287             extra_info = {}
1288
1289         if not ie_key and force_generic_extractor:
1290             ie_key = 'Generic'
1291
1292         if ie_key:
1293             ies = {ie_key: self._get_info_extractor_class(ie_key)}
1294         else:
1295             ies = self._ies
1296
1297         for ie_key, ie in ies.items():
1298             if not ie.suitable(url):
1299                 continue
1300
1301             if not ie.working():
1302                 self.report_warning('The program functionality for this site has been marked as broken, '
1303                                     'and will probably not work.')
1304
1305             temp_id = ie.get_temp_id(url)
1306             if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1307                 self.to_screen("[%s] %s: has already been recorded in archive" % (
1308                                ie_key, temp_id))
1309                 break
1310             return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1311         else:
1312             self.report_error('no suitable InfoExtractor for URL %s' % url)
1313
1314     def __handle_extraction_exceptions(func):
1315         @functools.wraps(func)
1316         def wrapper(self, *args, **kwargs):
1317             try:
1318                 return func(self, *args, **kwargs)
1319             except GeoRestrictedError as e:
1320                 msg = e.msg
1321                 if e.countries:
1322                     msg += '\nThis video is available in %s.' % ', '.join(
1323                         map(ISO3166Utils.short2full, e.countries))
1324                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1325                 self.report_error(msg)
1326             except ExtractorError as e:  # An error we somewhat expected
1327                 self.report_error(compat_str(e), e.format_traceback())
1328             except ThrottledDownload as e:
1329                 self.to_stderr('\r')
1330                 self.report_warning(f'{e}; Re-extracting data')
1331                 return wrapper(self, *args, **kwargs)
1332             except (DownloadCancelled, LazyList.IndexError):
1333                 raise
1334             except Exception as e:
1335                 if self.params.get('ignoreerrors'):
1336                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
1337                 else:
1338                     raise
1339         return wrapper
1340
1341     @__handle_extraction_exceptions
1342     def __extract_info(self, url, ie, download, extra_info, process):
1343         ie_result = ie.extract(url)
1344         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
1345             return
1346         if isinstance(ie_result, list):
1347             # Backwards compatibility: old IE result format
1348             ie_result = {
1349                 '_type': 'compat_list',
1350                 'entries': ie_result,
1351             }
1352         if extra_info.get('original_url'):
1353             ie_result.setdefault('original_url', extra_info['original_url'])
1354         self.add_default_extra_info(ie_result, ie, url)
1355         if process:
1356             return self.process_ie_result(ie_result, download, extra_info)
1357         else:
1358             return ie_result
1359
1360     def add_default_extra_info(self, ie_result, ie, url):
1361         if url is not None:
1362             self.add_extra_info(ie_result, {
1363                 'webpage_url': url,
1364                 'original_url': url,
1365                 'webpage_url_basename': url_basename(url),
1366             })
1367         if ie is not None:
1368             self.add_extra_info(ie_result, {
1369                 'extractor': ie.IE_NAME,
1370                 'extractor_key': ie.ie_key(),
1371             })
1372
1373     def process_ie_result(self, ie_result, download=True, extra_info=None):
1374         """
1375         Take the result of the ie(may be modified) and resolve all unresolved
1376         references (URLs, playlist items).
1377
1378         It will also download the videos if 'download'.
1379         Returns the resolved ie_result.
1380         """
1381         if extra_info is None:
1382             extra_info = {}
1383         result_type = ie_result.get('_type', 'video')
1384
1385         if result_type in ('url', 'url_transparent'):
1386             ie_result['url'] = sanitize_url(ie_result['url'])
1387             if ie_result.get('original_url'):
1388                 extra_info.setdefault('original_url', ie_result['original_url'])
1389
1390             extract_flat = self.params.get('extract_flat', False)
1391             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1392                     or extract_flat is True):
1393                 info_copy = ie_result.copy()
1394                 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1395                 if ie and not ie_result.get('id'):
1396                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
1397                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1398                 self.add_extra_info(info_copy, extra_info)
1399                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1400                 if self.params.get('force_write_download_archive', False):
1401                     self.record_download_archive(info_copy)
1402                 return ie_result
1403
1404         if result_type == 'video':
1405             self.add_extra_info(ie_result, extra_info)
1406             ie_result = self.process_video_result(ie_result, download=download)
1407             additional_urls = (ie_result or {}).get('additional_urls')
1408             if additional_urls:
1409                 # TODO: Improve MetadataParserPP to allow setting a list
1410                 if isinstance(additional_urls, compat_str):
1411                     additional_urls = [additional_urls]
1412                 self.to_screen(
1413                     '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1414                 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1415                 ie_result['additional_entries'] = [
1416                     self.extract_info(
1417                         url, download, extra_info,
1418                         force_generic_extractor=self.params.get('force_generic_extractor'))
1419                     for url in additional_urls
1420                 ]
1421             return ie_result
1422         elif result_type == 'url':
1423             # We have to add extra_info to the results because it may be
1424             # contained in a playlist
1425             return self.extract_info(
1426                 ie_result['url'], download,
1427                 ie_key=ie_result.get('ie_key'),
1428                 extra_info=extra_info)
1429         elif result_type == 'url_transparent':
1430             # Use the information from the embedding page
1431             info = self.extract_info(
1432                 ie_result['url'], ie_key=ie_result.get('ie_key'),
1433                 extra_info=extra_info, download=False, process=False)
1434
1435             # extract_info may return None when ignoreerrors is enabled and
1436             # extraction failed with an error, don't crash and return early
1437             # in this case
1438             if not info:
1439                 return info
1440
1441             force_properties = dict(
1442                 (k, v) for k, v in ie_result.items() if v is not None)
1443             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1444                 if f in force_properties:
1445                     del force_properties[f]
1446             new_result = info.copy()
1447             new_result.update(force_properties)
1448
1449             # Extracted info may not be a video result (i.e.
1450             # info.get('_type', 'video') != video) but rather an url or
1451             # url_transparent. In such cases outer metadata (from ie_result)
1452             # should be propagated to inner one (info). For this to happen
1453             # _type of info should be overridden with url_transparent. This
1454             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1455             if new_result.get('_type') == 'url':
1456                 new_result['_type'] = 'url_transparent'
1457
1458             return self.process_ie_result(
1459                 new_result, download=download, extra_info=extra_info)
1460         elif result_type in ('playlist', 'multi_video'):
1461             # Protect from infinite recursion due to recursively nested playlists
1462             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1463             webpage_url = ie_result['webpage_url']
1464             if webpage_url in self._playlist_urls:
1465                 self.to_screen(
1466                     '[download] Skipping already downloaded playlist: %s'
1467                     % ie_result.get('title') or ie_result.get('id'))
1468                 return
1469
1470             self._playlist_level += 1
1471             self._playlist_urls.add(webpage_url)
1472             self._sanitize_thumbnails(ie_result)
1473             try:
1474                 return self.__process_playlist(ie_result, download)
1475             finally:
1476                 self._playlist_level -= 1
1477                 if not self._playlist_level:
1478                     self._playlist_urls.clear()
1479         elif result_type == 'compat_list':
1480             self.report_warning(
1481                 'Extractor %s returned a compat_list result. '
1482                 'It needs to be updated.' % ie_result.get('extractor'))
1483
1484             def _fixup(r):
1485                 self.add_extra_info(r, {
1486                     'extractor': ie_result['extractor'],
1487                     'webpage_url': ie_result['webpage_url'],
1488                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
1489                     'extractor_key': ie_result['extractor_key'],
1490                 })
1491                 return r
1492             ie_result['entries'] = [
1493                 self.process_ie_result(_fixup(r), download, extra_info)
1494                 for r in ie_result['entries']
1495             ]
1496             return ie_result
1497         else:
1498             raise Exception('Invalid result type: %s' % result_type)
1499
1500     def _ensure_dir_exists(self, path):
1501         return make_dir(path, self.report_error)
1502
1503     def __process_playlist(self, ie_result, download):
1504         # We process each entry in the playlist
1505         playlist = ie_result.get('title') or ie_result.get('id')
1506         self.to_screen('[download] Downloading playlist: %s' % playlist)
1507
1508         if 'entries' not in ie_result:
1509             raise EntryNotInPlaylist('There are no entries')
1510
1511         MissingEntry = object()
1512         incomplete_entries = bool(ie_result.get('requested_entries'))
1513         if incomplete_entries:
1514             def fill_missing_entries(entries, indices):
1515                 ret = [MissingEntry] * max(indices)
1516                 for i, entry in zip(indices, entries):
1517                     ret[i - 1] = entry
1518                 return ret
1519             ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1520
1521         playlist_results = []
1522
1523         playliststart = self.params.get('playliststart', 1)
1524         playlistend = self.params.get('playlistend')
1525         # For backwards compatibility, interpret -1 as whole list
1526         if playlistend == -1:
1527             playlistend = None
1528
1529         playlistitems_str = self.params.get('playlist_items')
1530         playlistitems = None
1531         if playlistitems_str is not None:
1532             def iter_playlistitems(format):
1533                 for string_segment in format.split(','):
1534                     if '-' in string_segment:
1535                         start, end = string_segment.split('-')
1536                         for item in range(int(start), int(end) + 1):
1537                             yield int(item)
1538                     else:
1539                         yield int(string_segment)
1540             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1541
1542         ie_entries = ie_result['entries']
1543         msg = (
1544             'Downloading %d videos' if not isinstance(ie_entries, list)
1545             else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1546
1547         if isinstance(ie_entries, list):
1548             def get_entry(i):
1549                 return ie_entries[i - 1]
1550         else:
1551             if not isinstance(ie_entries, (PagedList, LazyList)):
1552                 ie_entries = LazyList(ie_entries)
1553
1554             def get_entry(i):
1555                 return YoutubeDL.__handle_extraction_exceptions(
1556                     lambda self, i: ie_entries[i - 1]
1557                 )(self, i)
1558
1559         entries = []
1560         items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1561         for i in items:
1562             if i == 0:
1563                 continue
1564             if playlistitems is None and playlistend is not None and playlistend < i:
1565                 break
1566             entry = None
1567             try:
1568                 entry = get_entry(i)
1569                 if entry is MissingEntry:
1570                     raise EntryNotInPlaylist()
1571             except (IndexError, EntryNotInPlaylist):
1572                 if incomplete_entries:
1573                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1574                 elif not playlistitems:
1575                     break
1576             entries.append(entry)
1577             try:
1578                 if entry is not None:
1579                     self._match_entry(entry, incomplete=True, silent=True)
1580             except (ExistingVideoReached, RejectedVideoReached):
1581                 break
1582         ie_result['entries'] = entries
1583
1584         # Save playlist_index before re-ordering
1585         entries = [
1586             ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1587             for i, entry in enumerate(entries, 1)
1588             if entry is not None]
1589         n_entries = len(entries)
1590
1591         if not playlistitems and (playliststart != 1 or playlistend):
1592             playlistitems = list(range(playliststart, playliststart + n_entries))
1593         ie_result['requested_entries'] = playlistitems
1594
1595         _infojson_written = False
1596         if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
1597             ie_copy = {
1598                 'playlist': playlist,
1599                 'playlist_id': ie_result.get('id'),
1600                 'playlist_title': ie_result.get('title'),
1601                 'playlist_uploader': ie_result.get('uploader'),
1602                 'playlist_uploader_id': ie_result.get('uploader_id'),
1603                 'playlist_index': 0,
1604                 'n_entries': n_entries,
1605             }
1606             ie_copy.update(dict(ie_result))
1607
1608             _infojson_written = self._write_info_json(
1609                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1610             if _infojson_written is None:
1611                 return
1612             if self._write_description('playlist', ie_result,
1613                                        self.prepare_filename(ie_copy, 'pl_description')) is None:
1614                 return
1615             # TODO: This should be passed to ThumbnailsConvertor if necessary
1616             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1617
1618         if self.params.get('playlistreverse', False):
1619             entries = entries[::-1]
1620         if self.params.get('playlistrandom', False):
1621             random.shuffle(entries)
1622
1623         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1624
1625         self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1626         failures = 0
1627         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1628         for i, entry_tuple in enumerate(entries, 1):
1629             playlist_index, entry = entry_tuple
1630             if 'playlist-index' in self.params.get('compat_opts', []):
1631                 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1632             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1633             # This __x_forwarded_for_ip thing is a bit ugly but requires
1634             # minimal changes
1635             if x_forwarded_for:
1636                 entry['__x_forwarded_for_ip'] = x_forwarded_for
1637             extra = {
1638                 'n_entries': n_entries,
1639                 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1640                 'playlist_index': playlist_index,
1641                 'playlist_autonumber': i,
1642                 'playlist': playlist,
1643                 'playlist_id': ie_result.get('id'),
1644                 'playlist_title': ie_result.get('title'),
1645                 'playlist_uploader': ie_result.get('uploader'),
1646                 'playlist_uploader_id': ie_result.get('uploader_id'),
1647                 'extractor': ie_result['extractor'],
1648                 'webpage_url': ie_result['webpage_url'],
1649                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1650                 'extractor_key': ie_result['extractor_key'],
1651             }
1652
1653             if self._match_entry(entry, incomplete=True) is not None:
1654                 continue
1655
1656             entry_result = self.__process_iterable_entry(entry, download, extra)
1657             if not entry_result:
1658                 failures += 1
1659             if failures >= max_failures:
1660                 self.report_error(
1661                     'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1662                 break
1663             playlist_results.append(entry_result)
1664         ie_result['entries'] = playlist_results
1665
1666         # Write the updated info to json
1667         if _infojson_written and self._write_info_json(
1668                 'updated playlist', ie_result,
1669                 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1670             return
1671         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1672         return ie_result
1673
1674     @__handle_extraction_exceptions
1675     def __process_iterable_entry(self, entry, download, extra_info):
1676         return self.process_ie_result(
1677             entry, download=download, extra_info=extra_info)
1678
1679     def _build_format_filter(self, filter_spec):
1680         " Returns a function to filter the formats according to the filter_spec "
1681
1682         OPERATORS = {
1683             '<': operator.lt,
1684             '<=': operator.le,
1685             '>': operator.gt,
1686             '>=': operator.ge,
1687             '=': operator.eq,
1688             '!=': operator.ne,
1689         }
1690         operator_rex = re.compile(r'''(?x)\s*
1691             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1692             (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1693             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1694             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1695         m = operator_rex.fullmatch(filter_spec)
1696         if m:
1697             try:
1698                 comparison_value = int(m.group('value'))
1699             except ValueError:
1700                 comparison_value = parse_filesize(m.group('value'))
1701                 if comparison_value is None:
1702                     comparison_value = parse_filesize(m.group('value') + 'B')
1703                 if comparison_value is None:
1704                     raise ValueError(
1705                         'Invalid value %r in format specification %r' % (
1706                             m.group('value'), filter_spec))
1707             op = OPERATORS[m.group('op')]
1708
1709         if not m:
1710             STR_OPERATORS = {
1711                 '=': operator.eq,
1712                 '^=': lambda attr, value: attr.startswith(value),
1713                 '$=': lambda attr, value: attr.endswith(value),
1714                 '*=': lambda attr, value: value in attr,
1715             }
1716             str_operator_rex = re.compile(r'''(?x)\s*
1717                 (?P<key>[a-zA-Z0-9._-]+)\s*
1718                 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1719                 (?P<value>[a-zA-Z0-9._-]+)\s*
1720                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1721             m = str_operator_rex.fullmatch(filter_spec)
1722             if m:
1723                 comparison_value = m.group('value')
1724                 str_op = STR_OPERATORS[m.group('op')]
1725                 if m.group('negation'):
1726                     op = lambda attr, value: not str_op(attr, value)
1727                 else:
1728                     op = str_op
1729
1730         if not m:
1731             raise SyntaxError('Invalid filter specification %r' % filter_spec)
1732
1733         def _filter(f):
1734             actual_value = f.get(m.group('key'))
1735             if actual_value is None:
1736                 return m.group('none_inclusive')
1737             return op(actual_value, comparison_value)
1738         return _filter
1739
1740     def _check_formats(self, formats):
1741         for f in formats:
1742             self.to_screen('[info] Testing format %s' % f['format_id'])
1743             temp_file = tempfile.NamedTemporaryFile(
1744                 suffix='.tmp', delete=False,
1745                 dir=self.get_output_path('temp') or None)
1746             temp_file.close()
1747             try:
1748                 success, _ = self.dl(temp_file.name, f, test=True)
1749             except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1750                 success = False
1751             finally:
1752                 if os.path.exists(temp_file.name):
1753                     try:
1754                         os.remove(temp_file.name)
1755                     except OSError:
1756                         self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1757             if success:
1758                 yield f
1759             else:
1760                 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1761
1762     def _default_format_spec(self, info_dict, download=True):
1763
1764         def can_merge():
1765             merger = FFmpegMergerPP(self)
1766             return merger.available and merger.can_merge()
1767
1768         prefer_best = (
1769             not self.params.get('simulate')
1770             and download
1771             and (
1772                 not can_merge()
1773                 or info_dict.get('is_live', False)
1774                 or self.outtmpl_dict['default'] == '-'))
1775         compat = (
1776             prefer_best
1777             or self.params.get('allow_multiple_audio_streams', False)
1778             or 'format-spec' in self.params.get('compat_opts', []))
1779
1780         return (
1781             'best/bestvideo+bestaudio' if prefer_best
1782             else 'bestvideo*+bestaudio/best' if not compat
1783             else 'bestvideo+bestaudio/best')
1784
1785     def build_format_selector(self, format_spec):
1786         def syntax_error(note, start):
1787             message = (
1788                 'Invalid format specification: '
1789                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1790             return SyntaxError(message)
1791
1792         PICKFIRST = 'PICKFIRST'
1793         MERGE = 'MERGE'
1794         SINGLE = 'SINGLE'
1795         GROUP = 'GROUP'
1796         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1797
1798         allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1799                                   'video': self.params.get('allow_multiple_video_streams', False)}
1800
1801         check_formats = self.params.get('check_formats') == 'selected'
1802
1803         def _parse_filter(tokens):
1804             filter_parts = []
1805             for type, string, start, _, _ in tokens:
1806                 if type == tokenize.OP and string == ']':
1807                     return ''.join(filter_parts)
1808                 else:
1809                     filter_parts.append(string)
1810
1811         def _remove_unused_ops(tokens):
1812             # Remove operators that we don't use and join them with the surrounding strings
1813             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1814             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1815             last_string, last_start, last_end, last_line = None, None, None, None
1816             for type, string, start, end, line in tokens:
1817                 if type == tokenize.OP and string == '[':
1818                     if last_string:
1819                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1820                         last_string = None
1821                     yield type, string, start, end, line
1822                     # everything inside brackets will be handled by _parse_filter
1823                     for type, string, start, end, line in tokens:
1824                         yield type, string, start, end, line
1825                         if type == tokenize.OP and string == ']':
1826                             break
1827                 elif type == tokenize.OP and string in ALLOWED_OPS:
1828                     if last_string:
1829                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1830                         last_string = None
1831                     yield type, string, start, end, line
1832                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1833                     if not last_string:
1834                         last_string = string
1835                         last_start = start
1836                         last_end = end
1837                     else:
1838                         last_string += string
1839             if last_string:
1840                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1841
1842         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1843             selectors = []
1844             current_selector = None
1845             for type, string, start, _, _ in tokens:
1846                 # ENCODING is only defined in python 3.x
1847                 if type == getattr(tokenize, 'ENCODING', None):
1848                     continue
1849                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1850                     current_selector = FormatSelector(SINGLE, string, [])
1851                 elif type == tokenize.OP:
1852                     if string == ')':
1853                         if not inside_group:
1854                             # ')' will be handled by the parentheses group
1855                             tokens.restore_last_token()
1856                         break
1857                     elif inside_merge and string in ['/', ',']:
1858                         tokens.restore_last_token()
1859                         break
1860                     elif inside_choice and string == ',':
1861                         tokens.restore_last_token()
1862                         break
1863                     elif string == ',':
1864                         if not current_selector:
1865                             raise syntax_error('"," must follow a format selector', start)
1866                         selectors.append(current_selector)
1867                         current_selector = None
1868                     elif string == '/':
1869                         if not current_selector:
1870                             raise syntax_error('"/" must follow a format selector', start)
1871                         first_choice = current_selector
1872                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1873                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1874                     elif string == '[':
1875                         if not current_selector:
1876                             current_selector = FormatSelector(SINGLE, 'best', [])
1877                         format_filter = _parse_filter(tokens)
1878                         current_selector.filters.append(format_filter)
1879                     elif string == '(':
1880                         if current_selector:
1881                             raise syntax_error('Unexpected "("', start)
1882                         group = _parse_format_selection(tokens, inside_group=True)
1883                         current_selector = FormatSelector(GROUP, group, [])
1884                     elif string == '+':
1885                         if not current_selector:
1886                             raise syntax_error('Unexpected "+"', start)
1887                         selector_1 = current_selector
1888                         selector_2 = _parse_format_selection(tokens, inside_merge=True)
1889                         if not selector_2:
1890                             raise syntax_error('Expected a selector', start)
1891                         current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
1892                     else:
1893                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1894                 elif type == tokenize.ENDMARKER:
1895                     break
1896             if current_selector:
1897                 selectors.append(current_selector)
1898             return selectors
1899
1900         def _merge(formats_pair):
1901             format_1, format_2 = formats_pair
1902
1903             formats_info = []
1904             formats_info.extend(format_1.get('requested_formats', (format_1,)))
1905             formats_info.extend(format_2.get('requested_formats', (format_2,)))
1906
1907             if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1908                 get_no_more = {'video': False, 'audio': False}
1909                 for (i, fmt_info) in enumerate(formats_info):
1910                     if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1911                         formats_info.pop(i)
1912                         continue
1913                     for aud_vid in ['audio', 'video']:
1914                         if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1915                             if get_no_more[aud_vid]:
1916                                 formats_info.pop(i)
1917                                 break
1918                             get_no_more[aud_vid] = True
1919
1920             if len(formats_info) == 1:
1921                 return formats_info[0]
1922
1923             video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1924             audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1925
1926             the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1927             the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1928
1929             output_ext = self.params.get('merge_output_format')
1930             if not output_ext:
1931                 if the_only_video:
1932                     output_ext = the_only_video['ext']
1933                 elif the_only_audio and not video_fmts:
1934                     output_ext = the_only_audio['ext']
1935                 else:
1936                     output_ext = 'mkv'
1937
1938             filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
1939
1940             new_dict = {
1941                 'requested_formats': formats_info,
1942                 'format': '+'.join(filtered('format')),
1943                 'format_id': '+'.join(filtered('format_id')),
1944                 'ext': output_ext,
1945                 'protocol': '+'.join(map(determine_protocol, formats_info)),
1946                 'language': '+'.join(orderedSet(filtered('language'))) or None,
1947                 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
1948                 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
1949                 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
1950             }
1951
1952             if the_only_video:
1953                 new_dict.update({
1954                     'width': the_only_video.get('width'),
1955                     'height': the_only_video.get('height'),
1956                     'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1957                     'fps': the_only_video.get('fps'),
1958                     'dynamic_range': the_only_video.get('dynamic_range'),
1959                     'vcodec': the_only_video.get('vcodec'),
1960                     'vbr': the_only_video.get('vbr'),
1961                     'stretched_ratio': the_only_video.get('stretched_ratio'),
1962                 })
1963
1964             if the_only_audio:
1965                 new_dict.update({
1966                     'acodec': the_only_audio.get('acodec'),
1967                     'abr': the_only_audio.get('abr'),
1968                     'asr': the_only_audio.get('asr'),
1969                 })
1970
1971             return new_dict
1972
1973         def _check_formats(formats):
1974             if not check_formats:
1975                 yield from formats
1976                 return
1977             yield from self._check_formats(formats)
1978
1979         def _build_selector_function(selector):
1980             if isinstance(selector, list):  # ,
1981                 fs = [_build_selector_function(s) for s in selector]
1982
1983                 def selector_function(ctx):
1984                     for f in fs:
1985                         yield from f(ctx)
1986                 return selector_function
1987
1988             elif selector.type == GROUP:  # ()
1989                 selector_function = _build_selector_function(selector.selector)
1990
1991             elif selector.type == PICKFIRST:  # /
1992                 fs = [_build_selector_function(s) for s in selector.selector]
1993
1994                 def selector_function(ctx):
1995                     for f in fs:
1996                         picked_formats = list(f(ctx))
1997                         if picked_formats:
1998                             return picked_formats
1999                     return []
2000
2001             elif selector.type == MERGE:  # +
2002                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2003
2004                 def selector_function(ctx):
2005                     for pair in itertools.product(
2006                             selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
2007                         yield _merge(pair)
2008
2009             elif selector.type == SINGLE:  # atom
2010                 format_spec = selector.selector or 'best'
2011
2012                 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2013                 if format_spec == 'all':
2014                     def selector_function(ctx):
2015                         yield from _check_formats(ctx['formats'])
2016                 elif format_spec == 'mergeall':
2017                     def selector_function(ctx):
2018                         formats = list(_check_formats(ctx['formats']))
2019                         if not formats:
2020                             return
2021                         merged_format = formats[-1]
2022                         for f in formats[-2::-1]:
2023                             merged_format = _merge((merged_format, f))
2024                         yield merged_format
2025
2026                 else:
2027                     format_fallback, format_reverse, format_idx = False, True, 1
2028                     mobj = re.match(
2029                         r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2030                         format_spec)
2031                     if mobj is not None:
2032                         format_idx = int_or_none(mobj.group('n'), default=1)
2033                         format_reverse = mobj.group('bw')[0] == 'b'
2034                         format_type = (mobj.group('type') or [None])[0]
2035                         not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2036                         format_modified = mobj.group('mod') is not None
2037
2038                         format_fallback = not format_type and not format_modified  # for b, w
2039                         _filter_f = (
2040                             (lambda f: f.get('%scodec' % format_type) != 'none')
2041                             if format_type and format_modified  # bv*, ba*, wv*, wa*
2042                             else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2043                             if format_type  # bv, ba, wv, wa
2044                             else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2045                             if not format_modified  # b, w
2046                             else lambda f: True)  # b*, w*
2047                         filter_f = lambda f: _filter_f(f) and (
2048                             f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2049                     else:
2050                         if format_spec in self._format_selection_exts['audio']:
2051                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2052                         elif format_spec in self._format_selection_exts['video']:
2053                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2054                         elif format_spec in self._format_selection_exts['storyboards']:
2055                             filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2056                         else:
2057                             filter_f = lambda f: f.get('format_id') == format_spec  # id
2058
2059                     def selector_function(ctx):
2060                         formats = list(ctx['formats'])
2061                         matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2062                         if format_fallback and ctx['incomplete_formats'] and not matches:
2063                             # for extractors with incomplete formats (audio only (soundcloud)
2064                             # or video only (imgur)) best/worst will fallback to
2065                             # best/worst {video,audio}-only format
2066                             matches = formats
2067                         matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2068                         try:
2069                             yield matches[format_idx - 1]
2070                         except IndexError:
2071                             return
2072
2073             filters = [self._build_format_filter(f) for f in selector.filters]
2074
2075             def final_selector(ctx):
2076                 ctx_copy = copy.deepcopy(ctx)
2077                 for _filter in filters:
2078                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2079                 return selector_function(ctx_copy)
2080             return final_selector
2081
2082         stream = io.BytesIO(format_spec.encode('utf-8'))
2083         try:
2084             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2085         except tokenize.TokenError:
2086             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2087
2088         class TokenIterator(object):
2089             def __init__(self, tokens):
2090                 self.tokens = tokens
2091                 self.counter = 0
2092
2093             def __iter__(self):
2094                 return self
2095
2096             def __next__(self):
2097                 if self.counter >= len(self.tokens):
2098                     raise StopIteration()
2099                 value = self.tokens[self.counter]
2100                 self.counter += 1
2101                 return value
2102
2103             next = __next__
2104
2105             def restore_last_token(self):
2106                 self.counter -= 1
2107
2108         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2109         return _build_selector_function(parsed_selector)
2110
2111     def _calc_headers(self, info_dict):
2112         res = std_headers.copy()
2113
2114         add_headers = info_dict.get('http_headers')
2115         if add_headers:
2116             res.update(add_headers)
2117
2118         cookies = self._calc_cookies(info_dict)
2119         if cookies:
2120             res['Cookie'] = cookies
2121
2122         if 'X-Forwarded-For' not in res:
2123             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2124             if x_forwarded_for_ip:
2125                 res['X-Forwarded-For'] = x_forwarded_for_ip
2126
2127         return res
2128
2129     def _calc_cookies(self, info_dict):
2130         pr = sanitized_Request(info_dict['url'])
2131         self.cookiejar.add_cookie_header(pr)
2132         return pr.get_header('Cookie')
2133
2134     def _sort_thumbnails(self, thumbnails):
2135         thumbnails.sort(key=lambda t: (
2136             t.get('preference') if t.get('preference') is not None else -1,
2137             t.get('width') if t.get('width') is not None else -1,
2138             t.get('height') if t.get('height') is not None else -1,
2139             t.get('id') if t.get('id') is not None else '',
2140             t.get('url')))
2141
2142     def _sanitize_thumbnails(self, info_dict):
2143         thumbnails = info_dict.get('thumbnails')
2144         if thumbnails is None:
2145             thumbnail = info_dict.get('thumbnail')
2146             if thumbnail:
2147                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2148         if not thumbnails:
2149             return
2150
2151         def check_thumbnails(thumbnails):
2152             for t in thumbnails:
2153                 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2154                 try:
2155                     self.urlopen(HEADRequest(t['url']))
2156                 except network_exceptions as err:
2157                     self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2158                     continue
2159                 yield t
2160
2161         self._sort_thumbnails(thumbnails)
2162         for i, t in enumerate(thumbnails):
2163             if t.get('id') is None:
2164                 t['id'] = '%d' % i
2165             if t.get('width') and t.get('height'):
2166                 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2167             t['url'] = sanitize_url(t['url'])
2168
2169         if self.params.get('check_formats') is True:
2170             info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse()
2171         else:
2172             info_dict['thumbnails'] = thumbnails
2173
2174     def process_video_result(self, info_dict, download=True):
2175         assert info_dict.get('_type', 'video') == 'video'
2176
2177         if 'id' not in info_dict:
2178             raise ExtractorError('Missing "id" field in extractor result')
2179         if 'title' not in info_dict:
2180             raise ExtractorError('Missing "title" field in extractor result',
2181                                  video_id=info_dict['id'], ie=info_dict['extractor'])
2182
2183         def report_force_conversion(field, field_not, conversion):
2184             self.report_warning(
2185                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2186                 % (field, field_not, conversion))
2187
2188         def sanitize_string_field(info, string_field):
2189             field = info.get(string_field)
2190             if field is None or isinstance(field, compat_str):
2191                 return
2192             report_force_conversion(string_field, 'a string', 'string')
2193             info[string_field] = compat_str(field)
2194
2195         def sanitize_numeric_fields(info):
2196             for numeric_field in self._NUMERIC_FIELDS:
2197                 field = info.get(numeric_field)
2198                 if field is None or isinstance(field, compat_numeric_types):
2199                     continue
2200                 report_force_conversion(numeric_field, 'numeric', 'int')
2201                 info[numeric_field] = int_or_none(field)
2202
2203         sanitize_string_field(info_dict, 'id')
2204         sanitize_numeric_fields(info_dict)
2205
2206         if 'playlist' not in info_dict:
2207             # It isn't part of a playlist
2208             info_dict['playlist'] = None
2209             info_dict['playlist_index'] = None
2210
2211         self._sanitize_thumbnails(info_dict)
2212
2213         thumbnail = info_dict.get('thumbnail')
2214         thumbnails = info_dict.get('thumbnails')
2215         if thumbnail:
2216             info_dict['thumbnail'] = sanitize_url(thumbnail)
2217         elif thumbnails:
2218             info_dict['thumbnail'] = thumbnails[-1]['url']
2219
2220         if info_dict.get('display_id') is None and 'id' in info_dict:
2221             info_dict['display_id'] = info_dict['id']
2222
2223         if info_dict.get('duration') is not None:
2224             info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2225
2226         for ts_key, date_key in (
2227                 ('timestamp', 'upload_date'),
2228                 ('release_timestamp', 'release_date'),
2229         ):
2230             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2231                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2232                 # see http://bugs.python.org/issue1646728)
2233                 try:
2234                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2235                     info_dict[date_key] = upload_date.strftime('%Y%m%d')
2236                 except (ValueError, OverflowError, OSError):
2237                     pass
2238
2239         live_keys = ('is_live', 'was_live')
2240         live_status = info_dict.get('live_status')
2241         if live_status is None:
2242             for key in live_keys:
2243                 if info_dict.get(key) is False:
2244                     continue
2245                 if info_dict.get(key):
2246                     live_status = key
2247                 break
2248             if all(info_dict.get(key) is False for key in live_keys):
2249                 live_status = 'not_live'
2250         if live_status:
2251             info_dict['live_status'] = live_status
2252             for key in live_keys:
2253                 if info_dict.get(key) is None:
2254                     info_dict[key] = (live_status == key)
2255
2256         # Auto generate title fields corresponding to the *_number fields when missing
2257         # in order to always have clean titles. This is very common for TV series.
2258         for field in ('chapter', 'season', 'episode'):
2259             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2260                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2261
2262         for cc_kind in ('subtitles', 'automatic_captions'):
2263             cc = info_dict.get(cc_kind)
2264             if cc:
2265                 for _, subtitle in cc.items():
2266                     for subtitle_format in subtitle:
2267                         if subtitle_format.get('url'):
2268                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2269                         if subtitle_format.get('ext') is None:
2270                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2271
2272         automatic_captions = info_dict.get('automatic_captions')
2273         subtitles = info_dict.get('subtitles')
2274
2275         info_dict['requested_subtitles'] = self.process_subtitles(
2276             info_dict['id'], subtitles, automatic_captions)
2277
2278         if info_dict.get('formats') is None:
2279             # There's only one format available
2280             formats = [info_dict]
2281         else:
2282             formats = info_dict['formats']
2283
2284         info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2285         if not self.params.get('allow_unplayable_formats'):
2286             formats = [f for f in formats if not f.get('has_drm')]
2287
2288         if not formats:
2289             self.raise_no_formats(info_dict)
2290
2291         def is_wellformed(f):
2292             url = f.get('url')
2293             if not url:
2294                 self.report_warning(
2295                     '"url" field is missing or empty - skipping format, '
2296                     'there is an error in extractor')
2297                 return False
2298             if isinstance(url, bytes):
2299                 sanitize_string_field(f, 'url')
2300             return True
2301
2302         # Filter out malformed formats for better extraction robustness
2303         formats = list(filter(is_wellformed, formats))
2304
2305         formats_dict = {}
2306
2307         # We check that all the formats have the format and format_id fields
2308         for i, format in enumerate(formats):
2309             sanitize_string_field(format, 'format_id')
2310             sanitize_numeric_fields(format)
2311             format['url'] = sanitize_url(format['url'])
2312             if not format.get('format_id'):
2313                 format['format_id'] = compat_str(i)
2314             else:
2315                 # Sanitize format_id from characters used in format selector expression
2316                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2317             format_id = format['format_id']
2318             if format_id not in formats_dict:
2319                 formats_dict[format_id] = []
2320             formats_dict[format_id].append(format)
2321
2322         # Make sure all formats have unique format_id
2323         common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2324         for format_id, ambiguous_formats in formats_dict.items():
2325             ambigious_id = len(ambiguous_formats) > 1
2326             for i, format in enumerate(ambiguous_formats):
2327                 if ambigious_id:
2328                     format['format_id'] = '%s-%d' % (format_id, i)
2329                 if format.get('ext') is None:
2330                     format['ext'] = determine_ext(format['url']).lower()
2331                 # Ensure there is no conflict between id and ext in format selection
2332                 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2333                 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2334                     format['format_id'] = 'f%s' % format['format_id']
2335
2336         for i, format in enumerate(formats):
2337             if format.get('format') is None:
2338                 format['format'] = '{id} - {res}{note}'.format(
2339                     id=format['format_id'],
2340                     res=self.format_resolution(format),
2341                     note=format_field(format, 'format_note', ' (%s)'),
2342                 )
2343             if format.get('protocol') is None:
2344                 format['protocol'] = determine_protocol(format)
2345             if format.get('resolution') is None:
2346                 format['resolution'] = self.format_resolution(format, default=None)
2347             if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2348                 format['dynamic_range'] = 'SDR'
2349             if (info_dict.get('duration') and format.get('tbr')
2350                     and not format.get('filesize') and not format.get('filesize_approx')):
2351                 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2352
2353             # Add HTTP headers, so that external programs can use them from the
2354             # json output
2355             full_format_info = info_dict.copy()
2356             full_format_info.update(format)
2357             format['http_headers'] = self._calc_headers(full_format_info)
2358         # Remove private housekeeping stuff
2359         if '__x_forwarded_for_ip' in info_dict:
2360             del info_dict['__x_forwarded_for_ip']
2361
2362         # TODO Central sorting goes here
2363
2364         if self.params.get('check_formats') is True:
2365             formats = LazyList(self._check_formats(formats[::-1])).reverse()
2366
2367         if not formats or formats[0] is not info_dict:
2368             # only set the 'formats' fields if the original info_dict list them
2369             # otherwise we end up with a circular reference, the first (and unique)
2370             # element in the 'formats' field in info_dict is info_dict itself,
2371             # which can't be exported to json
2372             info_dict['formats'] = formats
2373
2374         info_dict, _ = self.pre_process(info_dict)
2375
2376         # The pre-processors may have modified the formats
2377         formats = info_dict.get('formats', [info_dict])
2378
2379         if self.params.get('list_thumbnails'):
2380             self.list_thumbnails(info_dict)
2381         if self.params.get('listformats'):
2382             if not info_dict.get('formats') and not info_dict.get('url'):
2383                 self.to_screen('%s has no formats' % info_dict['id'])
2384             else:
2385                 self.list_formats(info_dict)
2386         if self.params.get('listsubtitles'):
2387             if 'automatic_captions' in info_dict:
2388                 self.list_subtitles(
2389                     info_dict['id'], automatic_captions, 'automatic captions')
2390             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2391         list_only = self.params.get('simulate') is None and (
2392             self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2393         if list_only:
2394             # Without this printing, -F --print-json will not work
2395             self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2396             return
2397
2398         format_selector = self.format_selector
2399         if format_selector is None:
2400             req_format = self._default_format_spec(info_dict, download=download)
2401             self.write_debug('Default format spec: %s' % req_format)
2402             format_selector = self.build_format_selector(req_format)
2403
2404         # While in format selection we may need to have an access to the original
2405         # format set in order to calculate some metrics or do some processing.
2406         # For now we need to be able to guess whether original formats provided
2407         # by extractor are incomplete or not (i.e. whether extractor provides only
2408         # video-only or audio-only formats) for proper formats selection for
2409         # extractors with such incomplete formats (see
2410         # https://github.com/ytdl-org/youtube-dl/pull/5556).
2411         # Since formats may be filtered during format selection and may not match
2412         # the original formats the results may be incorrect. Thus original formats
2413         # or pre-calculated metrics should be passed to format selection routines
2414         # as well.
2415         # We will pass a context object containing all necessary additional data
2416         # instead of just formats.
2417         # This fixes incorrect format selection issue (see
2418         # https://github.com/ytdl-org/youtube-dl/issues/10083).
2419         incomplete_formats = (
2420             # All formats are video-only or
2421             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2422             # all formats are audio-only
2423             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2424
2425         ctx = {
2426             'formats': formats,
2427             'incomplete_formats': incomplete_formats,
2428         }
2429
2430         formats_to_download = list(format_selector(ctx))
2431         if not formats_to_download:
2432             if not self.params.get('ignore_no_formats_error'):
2433                 raise ExtractorError('Requested format is not available', expected=True,
2434                                      video_id=info_dict['id'], ie=info_dict['extractor'])
2435             else:
2436                 self.report_warning('Requested format is not available')
2437                 # Process what we can, even without any available formats.
2438                 self.process_info(dict(info_dict))
2439         elif download:
2440             self.to_screen(
2441                 '[info] %s: Downloading %d format(s): %s' % (
2442                     info_dict['id'], len(formats_to_download),
2443                     ", ".join([f['format_id'] for f in formats_to_download])))
2444             for fmt in formats_to_download:
2445                 new_info = dict(info_dict)
2446                 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2447                 new_info['__original_infodict'] = info_dict
2448                 new_info.update(fmt)
2449                 self.process_info(new_info)
2450         # We update the info dict with the selected best quality format (backwards compatibility)
2451         if formats_to_download:
2452             info_dict.update(formats_to_download[-1])
2453         return info_dict
2454
2455     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2456         """Select the requested subtitles and their format"""
2457         available_subs = {}
2458         if normal_subtitles and self.params.get('writesubtitles'):
2459             available_subs.update(normal_subtitles)
2460         if automatic_captions and self.params.get('writeautomaticsub'):
2461             for lang, cap_info in automatic_captions.items():
2462                 if lang not in available_subs:
2463                     available_subs[lang] = cap_info
2464
2465         if (not self.params.get('writesubtitles') and not
2466                 self.params.get('writeautomaticsub') or not
2467                 available_subs):
2468             return None
2469
2470         all_sub_langs = available_subs.keys()
2471         if self.params.get('allsubtitles', False):
2472             requested_langs = all_sub_langs
2473         elif self.params.get('subtitleslangs', False):
2474             # A list is used so that the order of languages will be the same as
2475             # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2476             requested_langs = []
2477             for lang_re in self.params.get('subtitleslangs'):
2478                 if lang_re == 'all':
2479                     requested_langs.extend(all_sub_langs)
2480                     continue
2481                 discard = lang_re[0] == '-'
2482                 if discard:
2483                     lang_re = lang_re[1:]
2484                 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2485                 if discard:
2486                     for lang in current_langs:
2487                         while lang in requested_langs:
2488                             requested_langs.remove(lang)
2489                 else:
2490                     requested_langs.extend(current_langs)
2491             requested_langs = orderedSet(requested_langs)
2492         elif 'en' in available_subs:
2493             requested_langs = ['en']
2494         else:
2495             requested_langs = [list(all_sub_langs)[0]]
2496         if requested_langs:
2497             self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2498
2499         formats_query = self.params.get('subtitlesformat', 'best')
2500         formats_preference = formats_query.split('/') if formats_query else []
2501         subs = {}
2502         for lang in requested_langs:
2503             formats = available_subs.get(lang)
2504             if formats is None:
2505                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2506                 continue
2507             for ext in formats_preference:
2508                 if ext == 'best':
2509                     f = formats[-1]
2510                     break
2511                 matches = list(filter(lambda f: f['ext'] == ext, formats))
2512                 if matches:
2513                     f = matches[-1]
2514                     break
2515             else:
2516                 f = formats[-1]
2517                 self.report_warning(
2518                     'No subtitle format found matching "%s" for language %s, '
2519                     'using %s' % (formats_query, lang, f['ext']))
2520             subs[lang] = f
2521         return subs
2522
2523     def __forced_printings(self, info_dict, filename, incomplete):
2524         def print_mandatory(field, actual_field=None):
2525             if actual_field is None:
2526                 actual_field = field
2527             if (self.params.get('force%s' % field, False)
2528                     and (not incomplete or info_dict.get(actual_field) is not None)):
2529                 self.to_stdout(info_dict[actual_field])
2530
2531         def print_optional(field):
2532             if (self.params.get('force%s' % field, False)
2533                     and info_dict.get(field) is not None):
2534                 self.to_stdout(info_dict[field])
2535
2536         info_dict = info_dict.copy()
2537         if filename is not None:
2538             info_dict['filename'] = filename
2539         if info_dict.get('requested_formats') is not None:
2540             # For RTMP URLs, also include the playpath
2541             info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2542         elif 'url' in info_dict:
2543             info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2544
2545         if self.params.get('forceprint') or self.params.get('forcejson'):
2546             self.post_extract(info_dict)
2547         for tmpl in self.params.get('forceprint', []):
2548             mobj = re.match(r'\w+(=?)$', tmpl)
2549             if mobj and mobj.group(1):
2550                 tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'
2551             elif mobj:
2552                 tmpl = '%({})s'.format(tmpl)
2553             self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))
2554
2555         print_mandatory('title')
2556         print_mandatory('id')
2557         print_mandatory('url', 'urls')
2558         print_optional('thumbnail')
2559         print_optional('description')
2560         print_optional('filename')
2561         if self.params.get('forceduration') and info_dict.get('duration') is not None:
2562             self.to_stdout(formatSeconds(info_dict['duration']))
2563         print_mandatory('format')
2564
2565         if self.params.get('forcejson'):
2566             self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2567
2568     def dl(self, name, info, subtitle=False, test=False):
2569         if not info.get('url'):
2570             self.raise_no_formats(info, True)
2571
2572         if test:
2573             verbose = self.params.get('verbose')
2574             params = {
2575                 'test': True,
2576                 'quiet': self.params.get('quiet') or not verbose,
2577                 'verbose': verbose,
2578                 'noprogress': not verbose,
2579                 'nopart': True,
2580                 'skip_unavailable_fragments': False,
2581                 'keep_fragments': False,
2582                 'overwrites': True,
2583                 '_no_ytdl_file': True,
2584             }
2585         else:
2586             params = self.params
2587         fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2588         if not test:
2589             for ph in self._progress_hooks:
2590                 fd.add_progress_hook(ph)
2591             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2592             self.write_debug('Invoking downloader on "%s"' % urls)
2593
2594         new_info = copy.deepcopy(self._copy_infodict(info))
2595         if new_info.get('http_headers') is None:
2596             new_info['http_headers'] = self._calc_headers(new_info)
2597         return fd.download(name, new_info, subtitle)
2598
2599     def process_info(self, info_dict):
2600         """Process a single resolved IE result."""
2601
2602         assert info_dict.get('_type', 'video') == 'video'
2603
2604         max_downloads = self.params.get('max_downloads')
2605         if max_downloads is not None:
2606             if self._num_downloads >= int(max_downloads):
2607                 raise MaxDownloadsReached()
2608
2609         # TODO: backward compatibility, to be removed
2610         info_dict['fulltitle'] = info_dict['title']
2611
2612         if 'format' not in info_dict and 'ext' in info_dict:
2613             info_dict['format'] = info_dict['ext']
2614
2615         if self._match_entry(info_dict) is not None:
2616             return
2617
2618         self.post_extract(info_dict)
2619         self._num_downloads += 1
2620
2621         # info_dict['_filename'] needs to be set for backward compatibility
2622         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2623         temp_filename = self.prepare_filename(info_dict, 'temp')
2624         files_to_move = {}
2625
2626         # Forced printings
2627         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2628
2629         if self.params.get('simulate'):
2630             if self.params.get('force_write_download_archive', False):
2631                 self.record_download_archive(info_dict)
2632             # Do nothing else if in simulate mode
2633             return
2634
2635         if full_filename is None:
2636             return
2637         if not self._ensure_dir_exists(encodeFilename(full_filename)):
2638             return
2639         if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2640             return
2641
2642         if self._write_description('video', info_dict,
2643                                    self.prepare_filename(info_dict, 'description')) is None:
2644             return
2645
2646         sub_files = self._write_subtitles(info_dict, temp_filename)
2647         if sub_files is None:
2648             return
2649         files_to_move.update(dict(sub_files))
2650
2651         thumb_files = self._write_thumbnails(
2652             'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2653         if thumb_files is None:
2654             return
2655         files_to_move.update(dict(thumb_files))
2656
2657         infofn = self.prepare_filename(info_dict, 'infojson')
2658         _infojson_written = self._write_info_json('video', info_dict, infofn)
2659         if _infojson_written:
2660             info_dict['infojson_filename'] = infofn
2661             # For backward compatability, even though it was a private field
2662             info_dict['__infojson_filename'] = infofn
2663         elif _infojson_written is None:
2664             return
2665
2666         # Note: Annotations are deprecated
2667         annofn = None
2668         if self.params.get('writeannotations', False):
2669             annofn = self.prepare_filename(info_dict, 'annotation')
2670         if annofn:
2671             if not self._ensure_dir_exists(encodeFilename(annofn)):
2672                 return
2673             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2674                 self.to_screen('[info] Video annotations are already present')
2675             elif not info_dict.get('annotations'):
2676                 self.report_warning('There are no annotations to write.')
2677             else:
2678                 try:
2679                     self.to_screen('[info] Writing video annotations to: ' + annofn)
2680                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2681                         annofile.write(info_dict['annotations'])
2682                 except (KeyError, TypeError):
2683                     self.report_warning('There are no annotations to write.')
2684                 except (OSError, IOError):
2685                     self.report_error('Cannot write annotations file: ' + annofn)
2686                     return
2687
2688         # Write internet shortcut files
2689         def _write_link_file(link_type):
2690             if 'webpage_url' not in info_dict:
2691                 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2692                 return False
2693             linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2694             if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2695                 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2696                 return True
2697             try:
2698                 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2699                 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2700                              newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2701                     template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2702                     if link_type == 'desktop':
2703                         template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2704                     linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2705             except (OSError, IOError):
2706                 self.report_error(f'Cannot write internet shortcut {linkfn}')
2707                 return False
2708             return True
2709
2710         write_links = {
2711             'url': self.params.get('writeurllink'),
2712             'webloc': self.params.get('writewebloclink'),
2713             'desktop': self.params.get('writedesktoplink'),
2714         }
2715         if self.params.get('writelink'):
2716             link_type = ('webloc' if sys.platform == 'darwin'
2717                          else 'desktop' if sys.platform.startswith('linux')
2718                          else 'url')
2719             write_links[link_type] = True
2720
2721         if any(should_write and not _write_link_file(link_type)
2722                for link_type, should_write in write_links.items()):
2723             return
2724
2725         try:
2726             info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2727         except PostProcessingError as err:
2728             self.report_error('Preprocessing: %s' % str(err))
2729             return
2730
2731         must_record_download_archive = False
2732         if self.params.get('skip_download', False):
2733             info_dict['filepath'] = temp_filename
2734             info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2735             info_dict['__files_to_move'] = files_to_move
2736             info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2737         else:
2738             # Download
2739             info_dict.setdefault('__postprocessors', [])
2740             try:
2741
2742                 def existing_file(*filepaths):
2743                     ext = info_dict.get('ext')
2744                     final_ext = self.params.get('final_ext', ext)
2745                     existing_files = []
2746                     for file in orderedSet(filepaths):
2747                         if final_ext != ext:
2748                             converted = replace_extension(file, final_ext, ext)
2749                             if os.path.exists(encodeFilename(converted)):
2750                                 existing_files.append(converted)
2751                         if os.path.exists(encodeFilename(file)):
2752                             existing_files.append(file)
2753
2754                     if not existing_files or self.params.get('overwrites', False):
2755                         for file in orderedSet(existing_files):
2756                             self.report_file_delete(file)
2757                             os.remove(encodeFilename(file))
2758                         return None
2759
2760                     info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2761                     return existing_files[0]
2762
2763                 success = True
2764                 if info_dict.get('requested_formats') is not None:
2765
2766                     def compatible_formats(formats):
2767                         # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2768                         video_formats = [format for format in formats if format.get('vcodec') != 'none']
2769                         audio_formats = [format for format in formats if format.get('acodec') != 'none']
2770                         if len(video_formats) > 2 or len(audio_formats) > 2:
2771                             return False
2772
2773                         # Check extension
2774                         exts = set(format.get('ext') for format in formats)
2775                         COMPATIBLE_EXTS = (
2776                             set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2777                             set(('webm',)),
2778                         )
2779                         for ext_sets in COMPATIBLE_EXTS:
2780                             if ext_sets.issuperset(exts):
2781                                 return True
2782                         # TODO: Check acodec/vcodec
2783                         return False
2784
2785                     requested_formats = info_dict['requested_formats']
2786                     old_ext = info_dict['ext']
2787                     if self.params.get('merge_output_format') is None:
2788                         if not compatible_formats(requested_formats):
2789                             info_dict['ext'] = 'mkv'
2790                             self.report_warning(
2791                                 'Requested formats are incompatible for merge and will be merged into mkv')
2792                         if (info_dict['ext'] == 'webm'
2793                                 and info_dict.get('thumbnails')
2794                                 # check with type instead of pp_key, __name__, or isinstance
2795                                 # since we dont want any custom PPs to trigger this
2796                                 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
2797                             info_dict['ext'] = 'mkv'
2798                             self.report_warning(
2799                                 'webm doesn\'t support embedding a thumbnail, mkv will be used')
2800                     new_ext = info_dict['ext']
2801
2802                     def correct_ext(filename, ext=new_ext):
2803                         if filename == '-':
2804                             return filename
2805                         filename_real_ext = os.path.splitext(filename)[1][1:]
2806                         filename_wo_ext = (
2807                             os.path.splitext(filename)[0]
2808                             if filename_real_ext in (old_ext, new_ext)
2809                             else filename)
2810                         return '%s.%s' % (filename_wo_ext, ext)
2811
2812                     # Ensure filename always has a correct extension for successful merge
2813                     full_filename = correct_ext(full_filename)
2814                     temp_filename = correct_ext(temp_filename)
2815                     dl_filename = existing_file(full_filename, temp_filename)
2816                     info_dict['__real_download'] = False
2817
2818                     if dl_filename is not None:
2819                         self.report_file_already_downloaded(dl_filename)
2820                     elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
2821                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
2822                         success, real_download = self.dl(temp_filename, info_dict)
2823                         info_dict['__real_download'] = real_download
2824                     else:
2825                         downloaded = []
2826                         merger = FFmpegMergerPP(self)
2827                         if self.params.get('allow_unplayable_formats'):
2828                             self.report_warning(
2829                                 'You have requested merging of multiple formats '
2830                                 'while also allowing unplayable formats to be downloaded. '
2831                                 'The formats won\'t be merged to prevent data corruption.')
2832                         elif not merger.available:
2833                             self.report_warning(
2834                                 'You have requested merging of multiple formats but ffmpeg is not installed. '
2835                                 'The formats won\'t be merged.')
2836
2837                         if temp_filename == '-':
2838                             reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
2839                                       else 'but the formats are incompatible for simultaneous download' if merger.available
2840                                       else 'but ffmpeg is not installed')
2841                             self.report_warning(
2842                                 f'You have requested downloading multiple formats to stdout {reason}. '
2843                                 'The formats will be streamed one after the other')
2844                             fname = temp_filename
2845                         for f in requested_formats:
2846                             new_info = dict(info_dict)
2847                             del new_info['requested_formats']
2848                             new_info.update(f)
2849                             if temp_filename != '-':
2850                                 fname = prepend_extension(
2851                                     correct_ext(temp_filename, new_info['ext']),
2852                                     'f%s' % f['format_id'], new_info['ext'])
2853                                 if not self._ensure_dir_exists(fname):
2854                                     return
2855                                 f['filepath'] = fname
2856                                 downloaded.append(fname)
2857                             partial_success, real_download = self.dl(fname, new_info)
2858                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
2859                             success = success and partial_success
2860                         if merger.available and not self.params.get('allow_unplayable_formats'):
2861                             info_dict['__postprocessors'].append(merger)
2862                             info_dict['__files_to_merge'] = downloaded
2863                             # Even if there were no downloads, it is being merged only now
2864                             info_dict['__real_download'] = True
2865                         else:
2866                             for file in downloaded:
2867                                 files_to_move[file] = None
2868                 else:
2869                     # Just a single file
2870                     dl_filename = existing_file(full_filename, temp_filename)
2871                     if dl_filename is None or dl_filename == temp_filename:
2872                         # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
2873                         # So we should try to resume the download
2874                         success, real_download = self.dl(temp_filename, info_dict)
2875                         info_dict['__real_download'] = real_download
2876                     else:
2877                         self.report_file_already_downloaded(dl_filename)
2878
2879                 dl_filename = dl_filename or temp_filename
2880                 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2881
2882             except network_exceptions as err:
2883                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
2884                 return
2885             except (OSError, IOError) as err:
2886                 raise UnavailableVideoError(err)
2887             except (ContentTooShortError, ) as err:
2888                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2889                 return
2890
2891             if success and full_filename != '-':
2892
2893                 def fixup():
2894                     do_fixup = True
2895                     fixup_policy = self.params.get('fixup')
2896                     vid = info_dict['id']
2897
2898                     if fixup_policy in ('ignore', 'never'):
2899                         return
2900                     elif fixup_policy == 'warn':
2901                         do_fixup = False
2902                     elif fixup_policy != 'force':
2903                         assert fixup_policy in ('detect_or_warn', None)
2904                         if not info_dict.get('__real_download'):
2905                             do_fixup = False
2906
2907                     def ffmpeg_fixup(cndn, msg, cls):
2908                         if not cndn:
2909                             return
2910                         if not do_fixup:
2911                             self.report_warning(f'{vid}: {msg}')
2912                             return
2913                         pp = cls(self)
2914                         if pp.available:
2915                             info_dict['__postprocessors'].append(pp)
2916                         else:
2917                             self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2918
2919                     stretched_ratio = info_dict.get('stretched_ratio')
2920                     ffmpeg_fixup(
2921                         stretched_ratio not in (1, None),
2922                         f'Non-uniform pixel ratio {stretched_ratio}',
2923                         FFmpegFixupStretchedPP)
2924
2925                     ffmpeg_fixup(
2926                         (info_dict.get('requested_formats') is None
2927                          and info_dict.get('container') == 'm4a_dash'
2928                          and info_dict.get('ext') == 'm4a'),
2929                         'writing DASH m4a. Only some players support this container',
2930                         FFmpegFixupM4aPP)
2931
2932                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
2933                     downloader = downloader.__name__ if downloader else None
2934                     ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
2935                                  'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
2936                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2937                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
2938
2939                 fixup()
2940                 try:
2941                     info_dict = self.post_process(dl_filename, info_dict, files_to_move)
2942                 except PostProcessingError as err:
2943                     self.report_error('Postprocessing: %s' % str(err))
2944                     return
2945                 try:
2946                     for ph in self._post_hooks:
2947                         ph(info_dict['filepath'])
2948                 except Exception as err:
2949                     self.report_error('post hooks: %s' % str(err))
2950                     return
2951                 must_record_download_archive = True
2952
2953         if must_record_download_archive or self.params.get('force_write_download_archive', False):
2954             self.record_download_archive(info_dict)
2955         max_downloads = self.params.get('max_downloads')
2956         if max_downloads is not None and self._num_downloads >= int(max_downloads):
2957             raise MaxDownloadsReached()
2958
2959     def __download_wrapper(self, func):
2960         @functools.wraps(func)
2961         def wrapper(*args, **kwargs):
2962             try:
2963                 res = func(*args, **kwargs)
2964             except UnavailableVideoError as e:
2965                 self.report_error(e)
2966             except DownloadCancelled as e:
2967                 self.to_screen(f'[info] {e}')
2968                 raise
2969             else:
2970                 if self.params.get('dump_single_json', False):
2971                     self.post_extract(res)
2972                     self.to_stdout(json.dumps(self.sanitize_info(res)))
2973         return wrapper
2974
2975     def download(self, url_list):
2976         """Download a given list of URLs."""
2977         url_list = variadic(url_list)  # Passing a single URL is a common mistake
2978         outtmpl = self.outtmpl_dict['default']
2979         if (len(url_list) > 1
2980                 and outtmpl != '-'
2981                 and '%' not in outtmpl
2982                 and self.params.get('max_downloads') != 1):
2983             raise SameFileError(outtmpl)
2984
2985         for url in url_list:
2986             self.__download_wrapper(self.extract_info)(
2987                 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2988
2989         return self._download_retcode
2990
2991     def download_with_info_file(self, info_filename):
2992         with contextlib.closing(fileinput.FileInput(
2993                 [info_filename], mode='r',
2994                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2995             # FileInput doesn't have a read method, we can't call json.load
2996             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
2997         try:
2998             self.__download_wrapper(self.process_ie_result)(info, download=True)
2999         except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e:
3000             if not isinstance(e, EntryNotInPlaylist):
3001                 self.to_stderr('\r')
3002             webpage_url = info.get('webpage_url')
3003             if webpage_url is not None:
3004                 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3005                 return self.download([webpage_url])
3006             else:
3007                 raise
3008         return self._download_retcode
3009
3010     @staticmethod
3011     def sanitize_info(info_dict, remove_private_keys=False):
3012         ''' Sanitize the infodict for converting to json '''
3013         if info_dict is None:
3014             return info_dict
3015         info_dict.setdefault('epoch', int(time.time()))
3016         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
3017         keep_keys = ['_type']  # Always keep this to facilitate load-info-json
3018         if remove_private_keys:
3019             remove_keys |= {
3020                 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
3021                 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3022             }
3023             empty_values = (None, {}, [], set(), tuple())
3024             reject = lambda k, v: k not in keep_keys and (
3025                 k.startswith('_') or k in remove_keys or v in empty_values)
3026         else:
3027             reject = lambda k, v: k in remove_keys
3028         filter_fn = lambda obj: (
3029             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
3030             else obj if not isinstance(obj, dict)
3031             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
3032         return filter_fn(info_dict)
3033
3034     @staticmethod
3035     def filter_requested_info(info_dict, actually_filter=True):
3036         ''' Alias of sanitize_info for backward compatibility '''
3037         return YoutubeDL.sanitize_info(info_dict, actually_filter)
3038
3039     def run_pp(self, pp, infodict):
3040         files_to_delete = []
3041         if '__files_to_move' not in infodict:
3042             infodict['__files_to_move'] = {}
3043         try:
3044             files_to_delete, infodict = pp.run(infodict)
3045         except PostProcessingError as e:
3046             # Must be True and not 'only_download'
3047             if self.params.get('ignoreerrors') is True:
3048                 self.report_error(e)
3049                 return infodict
3050             raise
3051
3052         if not files_to_delete:
3053             return infodict
3054         if self.params.get('keepvideo', False):
3055             for f in files_to_delete:
3056                 infodict['__files_to_move'].setdefault(f, '')
3057         else:
3058             for old_filename in set(files_to_delete):
3059                 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3060                 try:
3061                     os.remove(encodeFilename(old_filename))
3062                 except (IOError, OSError):
3063                     self.report_warning('Unable to remove downloaded original file')
3064                 if old_filename in infodict['__files_to_move']:
3065                     del infodict['__files_to_move'][old_filename]
3066         return infodict
3067
3068     @staticmethod
3069     def post_extract(info_dict):
3070         def actual_post_extract(info_dict):
3071             if info_dict.get('_type') in ('playlist', 'multi_video'):
3072                 for video_dict in info_dict.get('entries', {}):
3073                     actual_post_extract(video_dict or {})
3074                 return
3075
3076             post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3077             extra = post_extractor().items()
3078             info_dict.update(extra)
3079             info_dict.pop('__post_extractor', None)
3080
3081             original_infodict = info_dict.get('__original_infodict') or {}
3082             original_infodict.update(extra)
3083             original_infodict.pop('__post_extractor', None)
3084
3085         actual_post_extract(info_dict or {})
3086
3087     def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3088         info = dict(ie_info)
3089         info['__files_to_move'] = files_to_move or {}
3090         for pp in self._pps[key]:
3091             info = self.run_pp(pp, info)
3092         return info, info.pop('__files_to_move', None)
3093
3094     def post_process(self, filename, ie_info, files_to_move=None):
3095         """Run all the postprocessors on the given file."""
3096         info = dict(ie_info)
3097         info['filepath'] = filename
3098         info['__files_to_move'] = files_to_move or {}
3099
3100         for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
3101             info = self.run_pp(pp, info)
3102         info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3103         del info['__files_to_move']
3104         for pp in self._pps['after_move']:
3105             info = self.run_pp(pp, info)
3106         return info
3107
3108     def _make_archive_id(self, info_dict):
3109         video_id = info_dict.get('id')
3110         if not video_id:
3111             return
3112         # Future-proof against any change in case
3113         # and backwards compatibility with prior versions
3114         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
3115         if extractor is None:
3116             url = str_or_none(info_dict.get('url'))
3117             if not url:
3118                 return
3119             # Try to find matching extractor for the URL and take its ie_key
3120             for ie_key, ie in self._ies.items():
3121                 if ie.suitable(url):
3122                     extractor = ie_key
3123                     break
3124             else:
3125                 return
3126         return '%s %s' % (extractor.lower(), video_id)
3127
3128     def in_download_archive(self, info_dict):
3129         fn = self.params.get('download_archive')
3130         if fn is None:
3131             return False
3132
3133         vid_id = self._make_archive_id(info_dict)
3134         if not vid_id:
3135             return False  # Incomplete video information
3136
3137         return vid_id in self.archive
3138
3139     def record_download_archive(self, info_dict):
3140         fn = self.params.get('download_archive')
3141         if fn is None:
3142             return
3143         vid_id = self._make_archive_id(info_dict)
3144         assert vid_id
3145         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3146             archive_file.write(vid_id + '\n')
3147         self.archive.add(vid_id)
3148
3149     @staticmethod
3150     def format_resolution(format, default='unknown'):
3151         is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
3152         if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3153             return 'audio only'
3154         if format.get('resolution') is not None:
3155             return format['resolution']
3156         if format.get('width') and format.get('height'):
3157             res = '%dx%d' % (format['width'], format['height'])
3158         elif format.get('height'):
3159             res = '%sp' % format['height']
3160         elif format.get('width'):
3161             res = '%dx?' % format['width']
3162         elif is_images:
3163             return 'images'
3164         else:
3165             return default
3166         return f'{res} images' if is_images else res
3167
3168     def _format_note(self, fdict):
3169         res = ''
3170         if fdict.get('ext') in ['f4f', 'f4m']:
3171             res += '(unsupported) '
3172         if fdict.get('language'):
3173             if res:
3174                 res += ' '
3175             res += '[%s] ' % fdict['language']
3176         if fdict.get('format_note') is not None:
3177             res += fdict['format_note'] + ' '
3178         if fdict.get('tbr') is not None:
3179             res += '%4dk ' % fdict['tbr']
3180         if fdict.get('container') is not None:
3181             if res:
3182                 res += ', '
3183             res += '%s container' % fdict['container']
3184         if (fdict.get('vcodec') is not None
3185                 and fdict.get('vcodec') != 'none'):
3186             if res:
3187                 res += ', '
3188             res += fdict['vcodec']
3189             if fdict.get('vbr') is not None:
3190                 res += '@'
3191         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3192             res += 'video@'
3193         if fdict.get('vbr') is not None:
3194             res += '%4dk' % fdict['vbr']
3195         if fdict.get('fps') is not None:
3196             if res:
3197                 res += ', '
3198             res += '%sfps' % fdict['fps']
3199         if fdict.get('acodec') is not None:
3200             if res:
3201                 res += ', '
3202             if fdict['acodec'] == 'none':
3203                 res += 'video only'
3204             else:
3205                 res += '%-5s' % fdict['acodec']
3206         elif fdict.get('abr') is not None:
3207             if res:
3208                 res += ', '
3209             res += 'audio'
3210         if fdict.get('abr') is not None:
3211             res += '@%3dk' % fdict['abr']
3212         if fdict.get('asr') is not None:
3213             res += ' (%5dHz)' % fdict['asr']
3214         if fdict.get('filesize') is not None:
3215             if res:
3216                 res += ', '
3217             res += format_bytes(fdict['filesize'])
3218         elif fdict.get('filesize_approx') is not None:
3219             if res:
3220                 res += ', '
3221             res += '~' + format_bytes(fdict['filesize_approx'])
3222         return res
3223
3224     def _list_format_headers(self, *headers):
3225         if self.params.get('listformats_table', True) is not False:
3226             return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3227         return headers
3228
3229     def list_formats(self, info_dict):
3230         formats = info_dict.get('formats', [info_dict])
3231         new_format = self.params.get('listformats_table', True) is not False
3232         if new_format:
3233             tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats))
3234             vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats))
3235             abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats))
3236             delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3237             table = [
3238                 [
3239                     self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3240                     format_field(f, 'ext'),
3241                     self.format_resolution(f),
3242                     format_field(f, 'fps', '%3d'),
3243                     format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3244                     delim,
3245                     format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3246                     format_field(f, 'tbr', f'%{tbr_digits}dk'),
3247                     shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
3248                     delim,
3249                     format_field(f, 'vcodec', default='unknown').replace('none', ''),
3250                     format_field(f, 'vbr', f'%{vbr_digits}dk'),
3251                     format_field(f, 'acodec', default='unknown').replace('none', ''),
3252                     format_field(f, 'abr', f'%{abr_digits}dk'),
3253                     format_field(f, 'asr', '%5dHz'),
3254                     join_nonempty(
3255                         self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3256                         format_field(f, 'language', '[%s]'),
3257                         format_field(f, 'format_note'),
3258                         format_field(f, 'container', ignore=(None, f.get('ext'))),
3259                         delim=', '),
3260                 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3261             header_line = self._list_format_headers(
3262                 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', '  TBR', 'PROTO',
3263                 delim, 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO')
3264         else:
3265             table = [
3266                 [
3267                     format_field(f, 'format_id'),
3268                     format_field(f, 'ext'),
3269                     self.format_resolution(f),
3270                     self._format_note(f)]
3271                 for f in formats
3272                 if f.get('preference') is None or f['preference'] >= -1000]
3273             header_line = ['format code', 'extension', 'resolution', 'note']
3274
3275         self.to_screen(
3276             '[info] Available formats for %s:' % info_dict['id'])
3277         self.to_stdout(render_table(
3278             header_line, table,
3279             extraGap=(0 if new_format else 1),
3280             hideEmpty=new_format,
3281             delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
3282
3283     def list_thumbnails(self, info_dict):
3284         thumbnails = list(info_dict.get('thumbnails'))
3285         if not thumbnails:
3286             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3287             return
3288
3289         self.to_screen(
3290             '[info] Thumbnails for %s:' % info_dict['id'])
3291         self.to_stdout(render_table(
3292             self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3293             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
3294
3295     def list_subtitles(self, video_id, subtitles, name='subtitles'):
3296         if not subtitles:
3297             self.to_screen('%s has no %s' % (video_id, name))
3298             return
3299         self.to_screen(
3300             'Available %s for %s:' % (name, video_id))
3301
3302         def _row(lang, formats):
3303             exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3304             if len(set(names)) == 1:
3305                 names = [] if names[0] == 'unknown' else names[:1]
3306             return [lang, ', '.join(names), ', '.join(exts)]
3307
3308         self.to_stdout(render_table(
3309             self._list_format_headers('Language', 'Name', 'Formats'),
3310             [_row(lang, formats) for lang, formats in subtitles.items()],
3311             hideEmpty=True))
3312
3313     def urlopen(self, req):
3314         """ Start an HTTP download """
3315         if isinstance(req, compat_basestring):
3316             req = sanitized_Request(req)
3317         return self._opener.open(req, timeout=self._socket_timeout)
3318
3319     def print_debug_header(self):
3320         if not self.params.get('verbose'):
3321             return
3322
3323         def get_encoding(stream):
3324             ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3325             if not supports_terminal_sequences(stream):
3326                 ret += ' (No ANSI)'
3327             return ret
3328
3329         encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3330             locale.getpreferredencoding(),
3331             sys.getfilesystemencoding(),
3332             get_encoding(self._screen_file), get_encoding(self._err_file),
3333             self.get_encoding())
3334
3335         logger = self.params.get('logger')
3336         if logger:
3337             write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3338             write_debug(encoding_str)
3339         else:
3340             write_string(f'[debug] {encoding_str}\n', encoding=None)
3341             write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3342
3343         source = detect_variant()
3344         write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
3345         if not _LAZY_LOADER:
3346             if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3347                 write_debug('Lazy loading extractors is forcibly disabled')
3348             else:
3349                 write_debug('Lazy loading extractors is disabled')
3350         if plugin_extractors or plugin_postprocessors:
3351             write_debug('Plugins: %s' % [
3352                 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3353                 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3354         if self.params.get('compat_opts'):
3355             write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3356         try:
3357             sp = Popen(
3358                 ['git', 'rev-parse', '--short', 'HEAD'],
3359                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3360                 cwd=os.path.dirname(os.path.abspath(__file__)))
3361             out, err = sp.communicate_or_kill()
3362             out = out.decode().strip()
3363             if re.match('[0-9a-f]+', out):
3364                 write_debug('Git HEAD: %s' % out)
3365         except Exception:
3366             try:
3367                 sys.exc_clear()
3368             except Exception:
3369                 pass
3370
3371         def python_implementation():
3372             impl_name = platform.python_implementation()
3373             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3374                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3375             return impl_name
3376
3377         write_debug('Python version %s (%s %s) - %s' % (
3378             platform.python_version(),
3379             python_implementation(),
3380             platform.architecture()[0],
3381             platform_name()))
3382
3383         exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3384         ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3385         if ffmpeg_features:
3386             exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3387
3388         exe_versions['rtmpdump'] = rtmpdump_version()
3389         exe_versions['phantomjs'] = PhantomJSwrapper._version()
3390         exe_str = ', '.join(
3391             f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3392         ) or 'none'
3393         write_debug('exe versions: %s' % exe_str)
3394
3395         from .downloader.websocket import has_websockets
3396         from .postprocessor.embedthumbnail import has_mutagen
3397         from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
3398
3399         lib_str = join_nonempty(
3400             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3401             KEYRING_AVAILABLE and 'keyring',
3402             has_mutagen and 'mutagen',
3403             SQLITE_AVAILABLE and 'sqlite',
3404             has_websockets and 'websockets',
3405             delim=', ') or 'none'
3406         write_debug('Optional libraries: %s' % lib_str)
3407
3408         proxy_map = {}
3409         for handler in self._opener.handlers:
3410             if hasattr(handler, 'proxies'):
3411                 proxy_map.update(handler.proxies)
3412         write_debug(f'Proxy map: {proxy_map}')
3413
3414         # Not implemented
3415         if False and self.params.get('call_home'):
3416             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3417             write_debug('Public IP address: %s' % ipaddr)
3418             latest_version = self.urlopen(
3419                 'https://yt-dl.org/latest/version').read().decode('utf-8')
3420             if version_tuple(latest_version) > version_tuple(__version__):
3421                 self.report_warning(
3422                     'You are using an outdated version (newest version: %s)! '
3423                     'See https://yt-dl.org/update if you need help updating.' %
3424                     latest_version)
3425
3426     def _setup_opener(self):
3427         timeout_val = self.params.get('socket_timeout')
3428         self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3429
3430         opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3431         opts_cookiefile = self.params.get('cookiefile')
3432         opts_proxy = self.params.get('proxy')
3433
3434         self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3435
3436         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3437         if opts_proxy is not None:
3438             if opts_proxy == '':
3439                 proxies = {}
3440             else:
3441                 proxies = {'http': opts_proxy, 'https': opts_proxy}
3442         else:
3443             proxies = compat_urllib_request.getproxies()
3444             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3445             if 'http' in proxies and 'https' not in proxies:
3446                 proxies['https'] = proxies['http']
3447         proxy_handler = PerRequestProxyHandler(proxies)
3448
3449         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3450         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3451         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3452         redirect_handler = YoutubeDLRedirectHandler()
3453         data_handler = compat_urllib_request_DataHandler()
3454
3455         # When passing our own FileHandler instance, build_opener won't add the
3456         # default FileHandler and allows us to disable the file protocol, which
3457         # can be used for malicious purposes (see
3458         # https://github.com/ytdl-org/youtube-dl/issues/8227)
3459         file_handler = compat_urllib_request.FileHandler()
3460
3461         def file_open(*args, **kwargs):
3462             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3463         file_handler.file_open = file_open
3464
3465         opener = compat_urllib_request.build_opener(
3466             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3467
3468         # Delete the default user-agent header, which would otherwise apply in
3469         # cases where our custom HTTP handler doesn't come into play
3470         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3471         opener.addheaders = []
3472         self._opener = opener
3473
3474     def encode(self, s):
3475         if isinstance(s, bytes):
3476             return s  # Already encoded
3477
3478         try:
3479             return s.encode(self.get_encoding())
3480         except UnicodeEncodeError as err:
3481             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3482             raise
3483
3484     def get_encoding(self):
3485         encoding = self.params.get('encoding')
3486         if encoding is None:
3487             encoding = preferredencoding()
3488         return encoding
3489
3490     def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3491         ''' Write infojson and returns True = written, False = skip, None = error '''
3492         if overwrite is None:
3493             overwrite = self.params.get('overwrites', True)
3494         if not self.params.get('writeinfojson'):
3495             return False
3496         elif not infofn:
3497             self.write_debug(f'Skipping writing {label} infojson')
3498             return False
3499         elif not self._ensure_dir_exists(infofn):
3500             return None
3501         elif not overwrite and os.path.exists(infofn):
3502             self.to_screen(f'[info] {label.title()} metadata is already present')
3503         else:
3504             self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3505             try:
3506                 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3507             except (OSError, IOError):
3508                 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3509                 return None
3510         return True
3511
3512     def _write_description(self, label, ie_result, descfn):
3513         ''' Write description and returns True = written, False = skip, None = error '''
3514         if not self.params.get('writedescription'):
3515             return False
3516         elif not descfn:
3517             self.write_debug(f'Skipping writing {label} description')
3518             return False
3519         elif not self._ensure_dir_exists(descfn):
3520             return None
3521         elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3522             self.to_screen(f'[info] {label.title()} description is already present')
3523         elif ie_result.get('description') is None:
3524             self.report_warning(f'There\'s no {label} description to write')
3525             return False
3526         else:
3527             try:
3528                 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3529                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3530                     descfile.write(ie_result['description'])
3531             except (OSError, IOError):
3532                 self.report_error(f'Cannot write {label} description file {descfn}')
3533                 return None
3534         return True
3535
3536     def _write_subtitles(self, info_dict, filename):
3537         ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3538         ret = []
3539         subtitles = info_dict.get('requested_subtitles')
3540         if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3541             # subtitles download errors are already managed as troubles in relevant IE
3542             # that way it will silently go on when used with unsupporting IE
3543             return ret
3544
3545         sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3546         if not sub_filename_base:
3547             self.to_screen('[info] Skipping writing video subtitles')
3548             return ret
3549         for sub_lang, sub_info in subtitles.items():
3550             sub_format = sub_info['ext']
3551             sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3552             sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3553             if not self.params.get('overwrites', True) and os.path.exists(sub_filename):
3554                 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3555                 sub_info['filepath'] = sub_filename
3556                 ret.append((sub_filename, sub_filename_final))
3557                 continue
3558
3559             self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3560             if sub_info.get('data') is not None:
3561                 try:
3562                     # Use newline='' to prevent conversion of newline characters
3563                     # See https://github.com/ytdl-org/youtube-dl/issues/10268
3564                     with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3565                         subfile.write(sub_info['data'])
3566                     sub_info['filepath'] = sub_filename
3567                     ret.append((sub_filename, sub_filename_final))
3568                     continue
3569                 except (OSError, IOError):
3570                     self.report_error(f'Cannot write video subtitles file {sub_filename}')
3571                     return None
3572
3573             try:
3574                 sub_copy = sub_info.copy()
3575                 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3576                 self.dl(sub_filename, sub_copy, subtitle=True)
3577                 sub_info['filepath'] = sub_filename
3578                 ret.append((sub_filename, sub_filename_final))
3579             except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3580                 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3581                 continue
3582         return ret
3583
3584     def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3585         ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3586         write_all = self.params.get('write_all_thumbnails', False)
3587         thumbnails, ret = [], []
3588         if write_all or self.params.get('writethumbnail', False):
3589             thumbnails = info_dict.get('thumbnails') or []
3590         multiple = write_all and len(thumbnails) > 1
3591
3592         if thumb_filename_base is None:
3593             thumb_filename_base = filename
3594         if thumbnails and not thumb_filename_base:
3595             self.write_debug(f'Skipping writing {label} thumbnail')
3596             return ret
3597
3598         for t in thumbnails[::-1]:
3599             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3600             thumb_display_id = f'{label} thumbnail {t["id"]}'
3601             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3602             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3603
3604             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
3605                 ret.append((thumb_filename, thumb_filename_final))
3606                 t['filepath'] = thumb_filename
3607                 self.to_screen('[info] %s is already present' % (
3608                     thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3609             else:
3610                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3611                 try:
3612                     uf = self.urlopen(t['url'])
3613                     self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3614                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3615                         shutil.copyfileobj(uf, thumbf)
3616                     ret.append((thumb_filename, thumb_filename_final))
3617                     t['filepath'] = thumb_filename
3618                 except network_exceptions as err:
3619                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3620             if ret and not write_all:
3621                 break
3622         return ret